From ed69becf3f95d1f841d3504f8b50e02b80bd04d6 Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Tue, 14 May 2024 15:28:35 -0700
Subject: [PATCH 1/3] update opencv build to just include libspng and libjpeg
 libraries

---
 cmake/externals/cvout/.gitkeep         |    0
 cmake/externals/libjpeg/CMakeLists.txt |   30 +
 cmake/externals/libjpeg/README         |  371 ++
 cmake/externals/libjpeg/change.log     |  502 ++
 cmake/externals/libjpeg/jaricom.c      |  153 +
 cmake/externals/libjpeg/jcapimin.c     |  288 +
 cmake/externals/libjpeg/jcapistd.c     |  162 +
 cmake/externals/libjpeg/jcarith.c      |  945 ++++
 cmake/externals/libjpeg/jccoefct.c     |  456 ++
 cmake/externals/libjpeg/jccolor.c      |  598 ++
 cmake/externals/libjpeg/jcdctmgr.c     |  466 ++
 cmake/externals/libjpeg/jchuff.c       | 1656 ++++++
 cmake/externals/libjpeg/jcinit.c       |  249 +
 cmake/externals/libjpeg/jcmainct.c     |  297 +
 cmake/externals/libjpeg/jcmarker.c     |  717 +++
 cmake/externals/libjpeg/jcmaster.c     |  675 +++
 cmake/externals/libjpeg/jcomapi.c      |  244 +
 cmake/externals/libjpeg/jconfig.h      |   64 +
 cmake/externals/libjpeg/jcparam.c      |  591 ++
 cmake/externals/libjpeg/jcprepct.c     |  358 ++
 cmake/externals/libjpeg/jcsample.c     |  545 ++
 cmake/externals/libjpeg/jctrans.c      |  399 ++
 cmake/externals/libjpeg/jdapimin.c     |  412 ++
 cmake/externals/libjpeg/jdapistd.c     |  276 +
 cmake/externals/libjpeg/jdarith.c      |  796 +++
 cmake/externals/libjpeg/jdatadst.c     |  263 +
 cmake/externals/libjpeg/jdatasrc.c     |  271 +
 cmake/externals/libjpeg/jdcoefct.c     |  744 +++
 cmake/externals/libjpeg/jdcolor.c      |  769 +++
 cmake/externals/libjpeg/jdct.h         |  409 ++
 cmake/externals/libjpeg/jddctmgr.c     |  384 ++
 cmake/externals/libjpeg/jdhuff.c       | 1559 ++++++
 cmake/externals/libjpeg/jdinput.c      |  657 +++
 cmake/externals/libjpeg/jdmainct.c     |  511 ++
 cmake/externals/libjpeg/jdmarker.c     | 1505 +++++
 cmake/externals/libjpeg/jdmaster.c     |  532 ++
 cmake/externals/libjpeg/jdmerge.c      |  437 ++
 cmake/externals/libjpeg/jdpostct.c     |  290 +
 cmake/externals/libjpeg/jdsample.c     |  341 ++
 cmake/externals/libjpeg/jdtrans.c      |  140 +
 cmake/externals/libjpeg/jerror.c       |  253 +
 cmake/externals/libjpeg/jerror.h       |  304 ++
 cmake/externals/libjpeg/jfdctflt.c     |  176 +
 cmake/externals/libjpeg/jfdctfst.c     |  232 +
 cmake/externals/libjpeg/jfdctint.c     | 4415 +++++++++++++++
 cmake/externals/libjpeg/jidctflt.c     |  238 +
 cmake/externals/libjpeg/jidctfst.c     |  351 ++
 cmake/externals/libjpeg/jidctint.c     | 5240 ++++++++++++++++++
 cmake/externals/libjpeg/jinclude.h     |  157 +
 cmake/externals/libjpeg/jmemansi.c     |  167 +
 cmake/externals/libjpeg/jmemmgr.c      | 1115 ++++
 cmake/externals/libjpeg/jmemnobs.c     |  113 +
 cmake/externals/libjpeg/jmemsys.h      |  198 +
 cmake/externals/libjpeg/jmorecfg.h     |  457 ++
 cmake/externals/libjpeg/jpegint.h      |  445 ++
 cmake/externals/libjpeg/jpeglib.h      | 1183 ++++
 cmake/externals/libjpeg/jquant1.c      |  851 +++
 cmake/externals/libjpeg/jquant2.c      | 1311 +++++
 cmake/externals/libjpeg/jutils.c       |  224 +
 cmake/externals/libjpeg/jversion.h     |   14 +
 cmake/externals/libspng/CMakeLists.txt |   39 +
 cmake/externals/libspng/LICENSE        |   25 +
 cmake/externals/libspng/spng.c         | 6980 ++++++++++++++++++++++++
 cmake/externals/libspng/spng.h         |  537 ++
 cmake/externals/opencv.cmake           |  175 +-
 65 files changed, 45099 insertions(+), 163 deletions(-)
 create mode 100644 cmake/externals/cvout/.gitkeep
 create mode 100644 cmake/externals/libjpeg/CMakeLists.txt
 create mode 100644 cmake/externals/libjpeg/README
 create mode 100644 cmake/externals/libjpeg/change.log
 create mode 100644 cmake/externals/libjpeg/jaricom.c
 create mode 100644 cmake/externals/libjpeg/jcapimin.c
 create mode 100644 cmake/externals/libjpeg/jcapistd.c
 create mode 100644 cmake/externals/libjpeg/jcarith.c
 create mode 100644 cmake/externals/libjpeg/jccoefct.c
 create mode 100644 cmake/externals/libjpeg/jccolor.c
 create mode 100644 cmake/externals/libjpeg/jcdctmgr.c
 create mode 100644 cmake/externals/libjpeg/jchuff.c
 create mode 100644 cmake/externals/libjpeg/jcinit.c
 create mode 100644 cmake/externals/libjpeg/jcmainct.c
 create mode 100644 cmake/externals/libjpeg/jcmarker.c
 create mode 100644 cmake/externals/libjpeg/jcmaster.c
 create mode 100644 cmake/externals/libjpeg/jcomapi.c
 create mode 100644 cmake/externals/libjpeg/jconfig.h
 create mode 100644 cmake/externals/libjpeg/jcparam.c
 create mode 100644 cmake/externals/libjpeg/jcprepct.c
 create mode 100644 cmake/externals/libjpeg/jcsample.c
 create mode 100644 cmake/externals/libjpeg/jctrans.c
 create mode 100644 cmake/externals/libjpeg/jdapimin.c
 create mode 100644 cmake/externals/libjpeg/jdapistd.c
 create mode 100644 cmake/externals/libjpeg/jdarith.c
 create mode 100644 cmake/externals/libjpeg/jdatadst.c
 create mode 100644 cmake/externals/libjpeg/jdatasrc.c
 create mode 100644 cmake/externals/libjpeg/jdcoefct.c
 create mode 100644 cmake/externals/libjpeg/jdcolor.c
 create mode 100644 cmake/externals/libjpeg/jdct.h
 create mode 100644 cmake/externals/libjpeg/jddctmgr.c
 create mode 100644 cmake/externals/libjpeg/jdhuff.c
 create mode 100644 cmake/externals/libjpeg/jdinput.c
 create mode 100644 cmake/externals/libjpeg/jdmainct.c
 create mode 100644 cmake/externals/libjpeg/jdmarker.c
 create mode 100644 cmake/externals/libjpeg/jdmaster.c
 create mode 100644 cmake/externals/libjpeg/jdmerge.c
 create mode 100644 cmake/externals/libjpeg/jdpostct.c
 create mode 100644 cmake/externals/libjpeg/jdsample.c
 create mode 100644 cmake/externals/libjpeg/jdtrans.c
 create mode 100644 cmake/externals/libjpeg/jerror.c
 create mode 100644 cmake/externals/libjpeg/jerror.h
 create mode 100644 cmake/externals/libjpeg/jfdctflt.c
 create mode 100644 cmake/externals/libjpeg/jfdctfst.c
 create mode 100644 cmake/externals/libjpeg/jfdctint.c
 create mode 100644 cmake/externals/libjpeg/jidctflt.c
 create mode 100644 cmake/externals/libjpeg/jidctfst.c
 create mode 100644 cmake/externals/libjpeg/jidctint.c
 create mode 100644 cmake/externals/libjpeg/jinclude.h
 create mode 100644 cmake/externals/libjpeg/jmemansi.c
 create mode 100644 cmake/externals/libjpeg/jmemmgr.c
 create mode 100644 cmake/externals/libjpeg/jmemnobs.c
 create mode 100644 cmake/externals/libjpeg/jmemsys.h
 create mode 100644 cmake/externals/libjpeg/jmorecfg.h
 create mode 100644 cmake/externals/libjpeg/jpegint.h
 create mode 100644 cmake/externals/libjpeg/jpeglib.h
 create mode 100644 cmake/externals/libjpeg/jquant1.c
 create mode 100644 cmake/externals/libjpeg/jquant2.c
 create mode 100644 cmake/externals/libjpeg/jutils.c
 create mode 100644 cmake/externals/libjpeg/jversion.h
 create mode 100644 cmake/externals/libspng/CMakeLists.txt
 create mode 100644 cmake/externals/libspng/LICENSE
 create mode 100644 cmake/externals/libspng/spng.c
 create mode 100644 cmake/externals/libspng/spng.h

diff --git a/cmake/externals/cvout/.gitkeep b/cmake/externals/cvout/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/cmake/externals/libjpeg/CMakeLists.txt b/cmake/externals/libjpeg/CMakeLists.txt
new file mode 100644
index 000000000..4e88164cc
--- /dev/null
+++ b/cmake/externals/libjpeg/CMakeLists.txt
@@ -0,0 +1,30 @@
+# ----------------------------------------------------------------------------
+#  CMake file for libjpeg. See root CMakeLists.txt
+#
+# ----------------------------------------------------------------------------
+project(JPEG_LIBRARY)
+
+file(GLOB lib_srcs *.c)
+file(GLOB lib_hdrs *.h)
+
+# ----------------------------------------------------------------------------------
+#         Define the library target:
+# ----------------------------------------------------------------------------------
+
+add_library(JPEG_LIBRARY STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${lib_srcs} ${lib_hdrs})
+
+if(CV_GCC OR CV_CLANG)
+  set_source_files_properties(jcdctmgr.c PROPERTIES COMPILE_FLAGS "-O1")
+endif()
+
+set_target_properties(JPEG_LIBRARY
+  PROPERTIES OUTPUT_NAME JPEG_LIBRARY
+  DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
+  COMPILE_PDB_NAME JPEG_LIBRARY
+  COMPILE_PDB_NAME_DEBUG "JPEG_LIBRARY${OPENCV_DEBUG_POSTFIX}"
+  ARCHIVE_OUTPUT_DIRECTORY "${3P_LIBRARY_OUTPUT_PATH}"
+  )
+
+if(ENABLE_SOLUTION_FOLDERS)
+  set_target_properties(JPEG_LIBRARY PROPERTIES FOLDER "3rdparty")
+endif()
diff --git a/cmake/externals/libjpeg/README b/cmake/externals/libjpeg/README
new file mode 100644
index 000000000..13b245c5c
--- /dev/null
+++ b/cmake/externals/libjpeg/README
@@ -0,0 +1,371 @@
+The Independent JPEG Group's JPEG software
+==========================================
+
+README for release 9d of 12-Jan-2020
+====================================
+
+This distribution contains the ninth public release of the Independent JPEG
+Group's free JPEG software.  You are welcome to redistribute this software and
+to use it for any purpose, subject to the conditions under LEGAL ISSUES, below.
+
+This software is the work of Tom Lane, Guido Vollbeding, Philip Gladstone,
+Bill Allombert, Jim Boucher, Lee Crocker, Bob Friesenhahn, Ben Jackson,
+John Korejwa, Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi,
+Ge' Weijers, and other members of the Independent JPEG Group.
+
+IJG is not affiliated with the ISO/IEC JTC1/SC29/WG1 standards committee
+(previously known as JPEG, together with ITU-T SG16).
+
+
+DOCUMENTATION ROADMAP
+=====================
+
+This file contains the following sections:
+
+OVERVIEW            General description of JPEG and the IJG software.
+LEGAL ISSUES        Copyright, lack of warranty, terms of distribution.
+REFERENCES          Where to learn more about JPEG.
+ARCHIVE LOCATIONS   Where to find newer versions of this software.
+ACKNOWLEDGMENTS     Special thanks.
+FILE FORMAT WARS    Software *not* to get.
+TO DO               Plans for future IJG releases.
+
+Other documentation files in the distribution are:
+
+User documentation:
+  install.txt       How to configure and install the IJG software.
+  usage.txt         Usage instructions for cjpeg, djpeg, jpegtran,
+                    rdjpgcom, and wrjpgcom.
+  *.1               Unix-style man pages for programs (same info as usage.txt).
+  wizard.txt        Advanced usage instructions for JPEG wizards only.
+  change.log        Version-to-version change highlights.
+Programmer and internal documentation:
+  libjpeg.txt       How to use the JPEG library in your own programs.
+  example.c         Sample code for calling the JPEG library.
+  structure.txt     Overview of the JPEG library's internal structure.
+  filelist.txt      Road map of IJG files.
+  coderules.txt     Coding style rules --- please read if you contribute code.
+
+Please read at least the files install.txt and usage.txt.  Some information
+can also be found in the JPEG FAQ (Frequently Asked Questions) article.  See
+ARCHIVE LOCATIONS below to find out where to obtain the FAQ article.
+
+If you want to understand how the JPEG code works, we suggest reading one or
+more of the REFERENCES, then looking at the documentation files (in roughly
+the order listed) before diving into the code.
+
+
+OVERVIEW
+========
+
+This package contains C software to implement JPEG image encoding, decoding,
+and transcoding.  JPEG (pronounced "jay-peg") is a standardized compression
+method for full-color and grayscale images.
+
+This software implements JPEG baseline, extended-sequential, and progressive
+compression processes.  Provision is made for supporting all variants of these
+processes, although some uncommon parameter settings aren't implemented yet.
+We have made no provision for supporting the hierarchical or lossless
+processes defined in the standard.
+
+We provide a set of library routines for reading and writing JPEG image files,
+plus two sample applications "cjpeg" and "djpeg", which use the library to
+perform conversion between JPEG and some other popular image file formats.
+The library is intended to be reused in other applications.
+
+In order to support file conversion and viewing software, we have included
+considerable functionality beyond the bare JPEG coding/decoding capability;
+for example, the color quantization modules are not strictly part of JPEG
+decoding, but they are essential for output to colormapped file formats or
+colormapped displays.  These extra functions can be compiled out of the
+library if not required for a particular application.
+
+We have also included "jpegtran", a utility for lossless transcoding between
+different JPEG processes, and "rdjpgcom" and "wrjpgcom", two simple
+applications for inserting and extracting textual comments in JFIF files.
+
+The emphasis in designing this software has been on achieving portability and
+flexibility, while also making it fast enough to be useful.  In particular,
+the software is not intended to be read as a tutorial on JPEG.  (See the
+REFERENCES section for introductory material.)  Rather, it is intended to
+be reliable, portable, industrial-strength code.  We do not claim to have
+achieved that goal in every aspect of the software, but we strive for it.
+
+We welcome the use of this software as a component of commercial products.
+No royalty is required, but we do ask for an acknowledgement in product
+documentation, as described under LEGAL ISSUES.
+
+
+LEGAL ISSUES
+============
+
+In plain English:
+
+1. We don't promise that this software works.  (But if you find any bugs,
+   please let us know!)
+2. You can use this software for whatever you want.  You don't have to pay us.
+3. You may not pretend that you wrote this software.  If you use it in a
+   program, you must acknowledge somewhere in your documentation that
+   you've used the IJG code.
+
+In legalese:
+
+The authors make NO WARRANTY or representation, either express or implied,
+with respect to this software, its quality, accuracy, merchantability, or
+fitness for a particular purpose.  This software is provided "AS IS", and you,
+its user, assume the entire risk as to its quality and accuracy.
+
+This software is copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
+All Rights Reserved except as specified below.
+
+Permission is hereby granted to use, copy, modify, and distribute this
+software (or portions thereof) for any purpose, without fee, subject to these
+conditions:
+(1) If any part of the source code for this software is distributed, then this
+README file must be included, with this copyright and no-warranty notice
+unaltered; and any additions, deletions, or changes to the original files
+must be clearly indicated in accompanying documentation.
+(2) If only executable code is distributed, then the accompanying
+documentation must state that "this software is based in part on the work of
+the Independent JPEG Group".
+(3) Permission for use of this software is granted only if the user accepts
+full responsibility for any undesirable consequences; the authors accept
+NO LIABILITY for damages of any kind.
+
+These conditions apply to any software derived from or based on the IJG code,
+not just to the unmodified library.  If you use our work, you ought to
+acknowledge us.
+
+Permission is NOT granted for the use of any IJG author's name or company name
+in advertising or publicity relating to this software or products derived from
+it.  This software may be referred to only as "the Independent JPEG Group's
+software".
+
+We specifically permit and encourage the use of this software as the basis of
+commercial products, provided that all warranty or liability claims are
+assumed by the product vendor.
+
+
+The Unix configuration script "configure" was produced with GNU Autoconf.
+It is copyright by the Free Software Foundation but is freely distributable.
+The same holds for its supporting scripts (config.guess, config.sub,
+ltmain.sh).  Another support script, install-sh, is copyright by X Consortium
+but is also freely distributable.
+
+
+REFERENCES
+==========
+
+We recommend reading one or more of these references before trying to
+understand the innards of the JPEG software.
+
+The best short technical introduction to the JPEG compression algorithm is
+	Wallace, Gregory K.  "The JPEG Still Picture Compression Standard",
+	Communications of the ACM, April 1991 (vol. 34 no. 4), pp. 30-44.
+(Adjacent articles in that issue discuss MPEG motion picture compression,
+applications of JPEG, and related topics.)  If you don't have the CACM issue
+handy, a PDF file containing a revised version of Wallace's article is
+available at http://www.ijg.org/files/Wallace.JPEG.pdf.  The file (actually
+a preprint for an article that appeared in IEEE Trans. Consumer Electronics)
+omits the sample images that appeared in CACM, but it includes corrections
+and some added material.  Note: the Wallace article is copyright ACM and IEEE,
+and it may not be used for commercial purposes.
+
+A somewhat less technical, more leisurely introduction to JPEG can be found in
+"The Data Compression Book" by Mark Nelson and Jean-loup Gailly, published by
+M&T Books (New York), 2nd ed. 1996, ISBN 1-55851-434-1.  This book provides
+good explanations and example C code for a multitude of compression methods
+including JPEG.  It is an excellent source if you are comfortable reading C
+code but don't know much about data compression in general.  The book's JPEG
+sample code is far from industrial-strength, but when you are ready to look
+at a full implementation, you've got one here...
+
+The best currently available description of JPEG is the textbook "JPEG Still
+Image Data Compression Standard" by William B. Pennebaker and Joan L.
+Mitchell, published by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1.
+Price US$59.95, 638 pp.  The book includes the complete text of the ISO JPEG
+standards (DIS 10918-1 and draft DIS 10918-2).
+Although this is by far the most detailed and comprehensive exposition of
+JPEG publicly available, we point out that it is still missing an explanation
+of the most essential properties and algorithms of the underlying DCT
+technology.
+If you think that you know about DCT-based JPEG after reading this book,
+then you are in delusion.  The real fundamentals and corresponding potential
+of DCT-based JPEG are not publicly known so far, and that is the reason for
+all the mistaken developments taking place in the image coding domain.
+
+The original JPEG standard is divided into two parts, Part 1 being the actual
+specification, while Part 2 covers compliance testing methods.  Part 1 is
+titled "Digital Compression and Coding of Continuous-tone Still Images,
+Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS
+10918-1, ITU-T T.81.  Part 2 is titled "Digital Compression and Coding of
+Continuous-tone Still Images, Part 2: Compliance testing" and has document
+numbers ISO/IEC IS 10918-2, ITU-T T.83.
+IJG JPEG 8 introduced an implementation of the JPEG SmartScale extension
+which is specified in two documents:  A contributed document at ITU and ISO
+with title "ITU-T JPEG-Plus Proposal for Extending ITU-T T.81 for Advanced
+Image Coding", April 2006, Geneva, Switzerland.  The latest version of this
+document is Revision 3.  And a contributed document ISO/IEC JTC1/SC29/WG1 N
+5799 with title "Evolution of JPEG", June/July 2011, Berlin, Germany.
+IJG JPEG 9 introduces a reversible color transform for improved lossless
+compression which is described in a contributed document ISO/IEC JTC1/SC29/
+WG1 N 6080 with title "JPEG 9 Lossless Coding", June/July 2012, Paris,
+France.
+
+The JPEG standard does not specify all details of an interchangeable file
+format.  For the omitted details we follow the "JFIF" conventions, version 2.
+JFIF version 1 has been adopted as Recommendation ITU-T T.871 (05/2011) :
+Information technology - Digital compression and coding of continuous-tone
+still images: JPEG File Interchange Format (JFIF).  It is available as a
+free download in PDF file format from http://www.itu.int/rec/T-REC-T.871.
+A PDF file of the older JFIF document is available at
+http://www.w3.org/Graphics/JPEG/jfif3.pdf.
+
+The TIFF 6.0 file format specification can be obtained by FTP from
+ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz.  The JPEG incorporation scheme
+found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems.
+IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6).
+Instead, we recommend the JPEG design proposed by TIFF Technical Note #2
+(Compression tag 7).  Copies of this Note can be obtained from
+http://www.ijg.org/files/.  It is expected that the next revision
+of the TIFF spec will replace the 6.0 JPEG design with the Note's design.
+Although IJG's own code does not support TIFF/JPEG, the free libtiff library
+uses our library to implement TIFF/JPEG per the Note.
+
+
+ARCHIVE LOCATIONS
+=================
+
+The "official" archive site for this software is www.ijg.org.
+The most recent released version can always be found there in
+directory "files".  This particular version will be archived as
+http://www.ijg.org/files/jpegsrc.v9d.tar.gz, and in Windows-compatible
+"zip" archive format as http://www.ijg.org/files/jpegsr9d.zip.
+
+The JPEG FAQ (Frequently Asked Questions) article is a source of some
+general information about JPEG.
+It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/
+and other news.answers archive sites, including the official news.answers
+archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/.
+If you don't have Web or FTP access, send e-mail to mail-server@rtfm.mit.edu
+with body
+	send usenet/news.answers/jpeg-faq/part1
+	send usenet/news.answers/jpeg-faq/part2
+
+
+ACKNOWLEDGMENTS
+===============
+
+Thank to Juergen Bruder for providing me with a copy of the common DCT
+algorithm article, only to find out that I had come to the same result
+in a more direct and comprehensible way with a more generative approach.
+
+Thank to Istvan Sebestyen and Joan L. Mitchell for inviting me to the
+ITU JPEG (Study Group 16) meeting in Geneva, Switzerland.
+
+Thank to Thomas Wiegand and Gary Sullivan for inviting me to the
+Joint Video Team (MPEG & ITU) meeting in Geneva, Switzerland.
+
+Thank to Thomas Richter and Daniel Lee for inviting me to the
+ISO/IEC JTC1/SC29/WG1 (previously known as JPEG, together with ITU-T SG16)
+meeting in Berlin, Germany.
+
+Thank to John Korejwa and Massimo Ballerini for inviting me to
+fruitful consultations in Boston, MA and Milan, Italy.
+
+Thank to Hendrik Elstner, Roland Fassauer, Simone Zuck, Guenther
+Maier-Gerber, Walter Stoeber, Fred Schmitz, and Norbert Braunagel
+for corresponding business development.
+
+Thank to Nico Zschach and Dirk Stelling of the technical support team
+at the Digital Images company in Halle for providing me with extra
+equipment for configuration tests.
+
+Thank to Richard F. Lyon (then of Foveon Inc.) for fruitful
+communication about JPEG configuration in Sigma Photo Pro software.
+
+Thank to Andrew Finkenstadt for hosting the ijg.org site.
+
+Thank to Thomas G. Lane for the original design and development of
+this singular software package.
+
+Thank to Lars Goehler, Andreas Heinecke, Sebastian Fuss, Yvonne Roebert,
+Andrej Werner, and Ulf-Dietrich Braumann for support and public relations.
+
+
+FILE FORMAT WARS
+================
+
+The ISO/IEC JTC1/SC29/WG1 standards committee (previously known as JPEG,
+together with ITU-T SG16) currently promotes different formats containing
+the name "JPEG" which is misleading because these formats are incompatible
+with original DCT-based JPEG and are based on faulty technologies.
+IJG therefore does not and will not support such momentary mistakes
+(see REFERENCES).
+There exist also distributions under the name "OpenJPEG" promoting such
+kind of formats which is misleading because they don't support original
+JPEG images.
+We have no sympathy for the promotion of inferior formats.  Indeed, one of
+the original reasons for developing this free software was to help force
+convergence on common, interoperable format standards for JPEG files.
+Don't use an incompatible file format!
+(In any case, our decoder will remain capable of reading existing JPEG
+image files indefinitely.)
+
+The ISO committee pretends to be "responsible for the popular JPEG" in their
+public reports which is not true because they don't respond to actual
+requirements for the maintenance of the original JPEG specification.
+Furthermore, the ISO committee pretends to "ensure interoperability" with
+their standards which is not true because their "standards" support only
+application-specific and proprietary use cases and contain mathematically
+incorrect code.
+
+There are currently different distributions in circulation containing the
+name "libjpeg" which is misleading because they don't have the features and
+are incompatible with formats supported by actual IJG libjpeg distributions.
+One of those fakes is released by members of the ISO committee and just uses
+the name of libjpeg for misdirection of people, similar to the abuse of the
+name JPEG as described above, while having nothing in common with actual IJG
+libjpeg distributions and containing mathematically incorrect code.
+The other one claims to be a "derivative" or "fork" of the original libjpeg,
+but violates the license conditions as described under LEGAL ISSUES above
+and violates basic C programming properties.
+We have no sympathy for the release of misleading, incorrect and illegal
+distributions derived from obsolete code bases.
+Don't use an obsolete code base!
+
+According to the UCC (Uniform Commercial Code) law, IJG has the lawful and
+legal right to foreclose on certain standardization bodies and other
+institutions or corporations that knowingly perform substantial and
+systematic deceptive acts and practices, fraud, theft, and damaging of the
+value of the people of this planet without their knowing, willing and
+intentional consent.
+The titles, ownership, and rights of these institutions and all their assets
+are now duly secured and held in trust for the free people of this planet.
+People of the planet, on every country, may have a financial interest in
+the assets of these former principals, agents, and beneficiaries of the
+foreclosed institutions and corporations.
+IJG asserts what is: that each man, woman, and child has unalienable value
+and rights granted and deposited in them by the Creator and not any one of
+the people is subordinate to any artificial principality, corporate fiction
+or the special interest of another without their appropriate knowing,
+willing and intentional consent made by contract or accommodation agreement.
+IJG expresses that which already was.
+The people have already determined and demanded that public administration
+entities, national governments, and their supporting judicial systems must
+be fully transparent, accountable, and liable.
+IJG has secured the value for all concerned free people of the planet.
+
+A partial list of foreclosed institutions and corporations ("Hall of Shame")
+is currently prepared and will be published later.
+
+
+TO DO
+=====
+
+Version 9 is the second release of a new generation JPEG standard
+to overcome the limitations of the original JPEG specification,
+and is the first true source reference JPEG codec.
+More features are being prepared for coming releases...
+
+Please send bug reports, offers of help, etc. to jpeg-info@jpegclub.org.
diff --git a/cmake/externals/libjpeg/change.log b/cmake/externals/libjpeg/change.log
new file mode 100644
index 000000000..ed8ec6afc
--- /dev/null
+++ b/cmake/externals/libjpeg/change.log
@@ -0,0 +1,502 @@
+CHANGE LOG for Independent JPEG Group's JPEG software
+
+
+Version 9d  12-Jan-2020
+-----------------------
+
+Optimize the optimal Huffman code table generation to produce
+slightly smaller files.  Thank to John Korejwa for suggestion.
+Note: Requires rebuild of testimgp.jpg.
+
+Decoding Huffman: Use default tables if tables are not defined.
+Thank to Simone Azzalin for report (Motion JPEG),
+and to Martin Strunz for hint.
+
+Add sanity check in optimal Huffman code table generation.
+Thank to Adam Farley for suggestion.
+
+rdtarga.c: use read_byte(), with EOF check, instead of getc()
+in read_*_pixel().
+Thank to Chijin Zhou for cjpeg potential vulnerability report.
+
+jmemnobs.c: respect the max_memory_to_use setting in
+jpeg_mem_available() computation.  Thank to Sheng Shu and
+Dongdong She for djpeg potential vulnerability report.
+
+jdarith.c, jdhuff.c: avoid left shift of negative value
+compiler warning in decode_mcu_AC_refine().
+Thank to Indu Bhagat for suggestion.
+
+Add x64 (64-bit) platform support, avoid compiler warnings.
+Thank to Jonathan Potter, Feiyun Wang, and Sheng Shu for suggestion.
+
+Adjust libjpeg version specification for pkg-config file.
+Thank to Chen Chen for suggestion.
+
+Restore GIF read and write support from libjpeg version 6a.
+Thank to Wolfgang Werner (W.W.) Heinz for suggestion.
+
+Improve consistency in raw (downsampled) image data processing mode.
+Thank to Zhongyuan Zhou for hint.
+
+Avoid out of bounds array read (AC derived table pointers)
+in start pass in jdhuff.c.  Thank to Peng Li for report.
+
+Improve code sanity (jdhuff.c).
+Thank to Reza Mirzazade farkhani for reports.
+
+Add jpegtran -drop option; add options to the crop extension and wipe
+to fill the extra area with content from the source image region,
+instead of gray out.
+
+
+Version 9c  14-Jan-2018
+-----------------------
+
+jpegtran: add an option to the -wipe switch to fill the region
+with the average of adjacent blocks, instead of gray out.
+Thank to Caitlyn Feddock and Maddie Ziegler for inspiration.
+
+Make range extension bits adjustable (in jpegint.h).
+Thank to Robin Watts for suggestion.
+
+Provide macros for fflush() and ferror() in jinclude.h in order
+to facilitate adaption by applications using an own FILE class.
+Thank to Gerhard Huber for suggestion.
+
+Add libjpeg pkg-config file.  Thank to Mark Lavi, Vincent Torri,
+Patrick McMunn, and Huw Davies for suggestion.
+
+Add sanity checks in cjpeg image reader modules.
+Thank to Bingchang, Liu for reports.
+
+
+Version 9b  17-Jan-2016
+-----------------------
+
+Improvements and optimizations in DCT and color calculations.
+Normalize range limit array composition and access pattern.
+Thank to Sia Furler and Maddie Ziegler for inspiration.
+
+Use merged upsample with scaled DCT sizes larger than 8.
+Thank to Taylor Hatala for inspiration.
+
+Check for excessive comment lengths in argument parsing in wrjpgcom.c.
+Thank to Julian Cohen for hint.
+
+Add makefile.b32 for use with Borland C++ 32-bit (bcc32).
+Thank to Joe Slater for contribution.
+
+Document 'f' specifier for jpegtran -crop specification.
+Thank to Michele Martone for suggestion.
+
+Use defined value from header instead of hardwired number in rdswitch.c.
+Thank to Robert Sprowson for hint.
+
+
+Version 9a  19-Jan-2014
+-----------------------
+
+Add support for wide gamut color spaces (JFIF version 2).
+Improve clarity and accuracy in color conversion modules.
+Note: Requires rebuild of test images.
+
+Extend the bit depth support to all values from 8 to 12
+(BITS_IN_JSAMPLE configuration option in jmorecfg.h).
+jpegtran now supports N bits sample data precision with all N from 8 to 12
+in a single instance.  Thank to Roland Fassauer for inspiration.
+
+Try to resolve issues with new boolean type definition.
+Thank also to v4hn for suggestion.
+
+Enable option to use default Huffman tables for lossless compression
+(for hardware solution), and in this case improve lossless RGB compression
+with reversible color transform.  Thank to Benny Alexandar for hint.
+
+Extend the entropy decoding structure, so that extraneous bytes between
+compressed scan data and following marker can be reported correctly.
+Thank to Nigel Tao for hint.
+
+Add jpegtran -wipe option and extension for -crop.
+Thank to Andrew Senior, David Clunie, and Josef Schmid for suggestion.
+
+
+Version 9  13-Jan-2013
+----------------------
+
+Add cjpeg -rgb1 option to create an RGB JPEG file, and insert
+a simple reversible color transform into the processing which
+significantly improves the compression.
+The recommended command for lossless coding of RGB images is now
+cjpeg -rgb1 -block 1 -arithmetic.
+As said, this option improves the compression significantly, but
+the files are not compatible with JPEG decoders prior to IJG v9
+due to the included color transform.
+The used color transform and marker signaling is compatible with
+other JPEG standards (e.g., JPEG-LS part 2).
+
+Remove the automatic de-ANSI-fication support (Automake 1.12).
+Thank also to Nitin A Kamble for suggestion.
+
+Add remark for jpeg_mem_dest() in jdatadst.c.
+Thank to Elie-Gregoire Khoury for the hint.
+
+Support files with invalid component identifiers (created
+by Adobe PDF).  Thank to Robin Watts for the suggestion.
+
+Adapt full buffer case in jcmainct.c for use with scaled DCT.
+Thank to Sergii Biloshytskyi for the suggestion.
+
+Add type identifier for declaration of noreturn functions.
+Thank to Brett L. Moore for the suggestion.
+
+Correct argument type in format string, avoid compiler warnings.
+Thank to Vincent Torri for hint.
+
+Add missing #include directives in configuration checks, avoid
+configuration errors.  Thank to John Spencer for the hint.
+
+
+Version 8d  15-Jan-2012
+-----------------------
+
+Add cjpeg -rgb option to create RGB JPEG files.
+Using this switch suppresses the conversion from RGB
+colorspace input to the default YCbCr JPEG colorspace.
+This feature allows true lossless JPEG coding of RGB color images.
+The recommended command for this purpose is currently
+cjpeg -rgb -block 1 -arithmetic.
+SmartScale capable decoder (introduced with IJG JPEG 8) required.
+Thank to Michael Koch for the initial suggestion.
+
+Add option to disable the region adjustment in the transupp crop code.
+Thank to Jeffrey Friedl for the suggestion.
+
+Thank to Richard Jones and Edd Dawson for various minor corrections.
+
+Thank to Akim Demaille for configure.ac cleanup.
+
+
+Version 8c  16-Jan-2011
+-----------------------
+
+Add option to compression library and cjpeg (-block N) to use
+different DCT block size.
+All N from 1 to 16 are possible.  Default is 8 (baseline format).
+Larger values produce higher compression,
+smaller values produce higher quality.
+SmartScale capable decoder (introduced with IJG JPEG 8) required.
+
+
+Version 8b  16-May-2010
+-----------------------
+
+Repair problem in new memory source manager with corrupt JPEG data.
+Thank to Ted Campbell and Samuel Chun for the report.
+
+Repair problem in Makefile.am test target.
+Thank to anonymous user for the report.
+
+Support MinGW installation with automatic configure.
+Thank to Volker Grabsch for the suggestion.
+
+
+Version 8a  28-Feb-2010
+-----------------------
+
+Writing tables-only datastreams via jpeg_write_tables works again.
+
+Support 32-bit BMPs (RGB image with Alpha channel) for read in cjpeg.
+Thank to Brett Blackham for the suggestion.
+
+Improve accuracy in floating point IDCT calculation.
+Thank to Robert Hooke for the hint.
+
+
+Version 8  10-Jan-2010
+----------------------
+
+jpegtran now supports the same -scale option as djpeg for "lossless" resize.
+An implementation of the JPEG SmartScale extension is required for this
+feature.  A (draft) specification of the JPEG SmartScale extension is
+available as a contributed document at ITU and ISO.  Revision 2 or later
+of the document is required (latest document version is Revision 3).
+The SmartScale extension will enable more features beside lossless resize
+in future implementations, as described in the document (new compression
+options).
+
+Add sanity check in BMP reader module to avoid cjpeg crash for empty input
+image (thank to Isaev Ildar of ISP RAS, Moscow, RU for reporting this error).
+
+Add data source and destination managers for read from and write to
+memory buffers.  New API functions jpeg_mem_src and jpeg_mem_dest.
+Thank to Roberto Boni from Italy for the suggestion.
+
+
+Version 7  27-Jun-2009
+----------------------
+
+New scaled DCTs implemented.
+djpeg now supports scalings N/8 with all N from 1 to 16.
+cjpeg now supports scalings 8/N with all N from 1 to 16.
+Scaled DCTs with size larger than 8 are now also used for resolving the
+common 2x2 chroma subsampling case without additional spatial resampling.
+Separate spatial resampling for those kind of files is now only necessary
+for N>8 scaling cases.
+Furthermore, separate scaled DCT functions are provided for direct resolving
+of the common asymmetric subsampling cases (2x1 and 1x2) without additional
+spatial resampling.
+
+cjpeg -quality option has been extended for support of separate quality
+settings for luminance and chrominance (or in general, for every provided
+quantization table slot).
+New API function jpeg_default_qtables() and q_scale_factor array in library.
+
+Added -nosmooth option to cjpeg, complementary to djpeg.
+New variable "do_fancy_downsampling" in library, complement to fancy
+upsampling.  Fancy upsampling now uses direct DCT scaling with sizes
+larger than 8.  The old method is not reversible and has been removed.
+
+Support arithmetic entropy encoding and decoding.
+Added files jaricom.c, jcarith.c, jdarith.c.
+
+Straighten the file structure:
+Removed files jidctred.c, jcphuff.c, jchuff.h, jdphuff.c, jdhuff.h.
+
+jpegtran has a new "lossless" cropping feature.
+
+Implement -perfect option in jpegtran, new API function
+jtransform_perfect_transform() in transupp. (DP 204_perfect.dpatch)
+
+Better error messages for jpegtran fopen failure.
+(DP 203_jpegtran_errmsg.dpatch)
+
+Fix byte order issue with 16bit PPM/PGM files in rdppm.c/wrppm.c:
+according to Netpbm, the de facto standard implementation of the PNM formats,
+the most significant byte is first. (DP 203_rdppm.dpatch)
+
+Add -raw option to rdjpgcom not to mangle the output.
+(DP 205_rdjpgcom_raw.dpatch)
+
+Make rdjpgcom locale aware. (DP 201_rdjpgcom_locale.dpatch)
+
+Add extern "C" to jpeglib.h.
+This avoids the need to put extern "C" { ... } around #include "jpeglib.h"
+in your C++ application.  Defining the symbol DONT_USE_EXTERN_C in the
+configuration prevents this. (DP 202_jpeglib.h_c++.dpatch)
+
+
+Version 6b  27-Mar-1998
+-----------------------
+
+jpegtran has new features for lossless image transformations (rotation
+and flipping) as well as "lossless" reduction to grayscale.
+
+jpegtran now copies comments by default; it has a -copy switch to enable
+copying all APPn blocks as well, or to suppress comments.  (Formerly it
+always suppressed comments and APPn blocks.)  jpegtran now also preserves
+JFIF version and resolution information.
+
+New decompressor library feature: COM and APPn markers found in the input
+file can be saved in memory for later use by the application.  (Before,
+you had to code this up yourself with a custom marker processor.)
+
+There is an unused field "void * client_data" now in compress and decompress
+parameter structs; this may be useful in some applications.
+
+JFIF version number information is now saved by the decoder and accepted by
+the encoder.  jpegtran uses this to copy the source file's version number,
+to ensure "jpegtran -copy all" won't create bogus files that contain JFXX
+extensions but claim to be version 1.01.  Applications that generate their
+own JFXX extension markers also (finally) have a supported way to cause the
+encoder to emit JFIF version number 1.02.
+
+djpeg's trace mode reports JFIF 1.02 thumbnail images as such, rather
+than as unknown APP0 markers.
+
+In -verbose mode, djpeg and rdjpgcom will try to print the contents of
+APP12 markers as text.  Some digital cameras store useful text information
+in APP12 markers.
+
+Handling of truncated data streams is more robust: blocks beyond the one in
+which the error occurs will be output as uniform gray, or left unchanged
+if decoding a progressive JPEG.  The appearance no longer depends on the
+Huffman tables being used.
+
+Huffman tables are checked for validity much more carefully than before.
+
+To avoid the Unisys LZW patent, djpeg's GIF output capability has been
+changed to produce "uncompressed GIFs", and cjpeg's GIF input capability
+has been removed altogether.  We're not happy about it either, but there
+seems to be no good alternative.
+
+The configure script now supports building libjpeg as a shared library
+on many flavors of Unix (all the ones that GNU libtool knows how to
+build shared libraries for).  Use "./configure --enable-shared" to
+try this out.
+
+New jconfig file and makefiles for Microsoft Visual C++ and Developer Studio.
+Also, a jconfig file and a build script for Metrowerks CodeWarrior
+on Apple Macintosh.  makefile.dj has been updated for DJGPP v2, and there
+are miscellaneous other minor improvements in the makefiles.
+
+jmemmac.c now knows how to create temporary files following Mac System 7
+conventions.
+
+djpeg's -map switch is now able to read raw-format PPM files reliably.
+
+cjpeg -progressive -restart no longer generates any unnecessary DRI markers.
+
+Multiple calls to jpeg_simple_progression for a single JPEG object
+no longer leak memory.
+
+
+Version 6a  7-Feb-96
+--------------------
+
+Library initialization sequence modified to detect version mismatches
+and struct field packing mismatches between library and calling application.
+This change requires applications to be recompiled, but does not require
+any application source code change.
+
+All routine declarations changed to the style "GLOBAL(type) name ...",
+that is, GLOBAL, LOCAL, METHODDEF, EXTERN are now macros taking the
+routine's return type as an argument.  This makes it possible to add
+Microsoft-style linkage keywords to all the routines by changing just
+these macros.  Note that any application code that was using these macros
+will have to be changed.
+
+DCT coefficient quantization tables are now stored in normal array order
+rather than zigzag order.  Application code that calls jpeg_add_quant_table,
+or otherwise manipulates quantization tables directly, will need to be
+changed.  If you need to make such code work with either older or newer
+versions of the library, a test like "#if JPEG_LIB_VERSION >= 61" is
+recommended.
+
+djpeg's trace capability now dumps DQT tables in natural order, not zigzag
+order.  This allows the trace output to be made into a "-qtables" file
+more easily.
+
+New system-dependent memory manager module for use on Apple Macintosh.
+
+Fix bug in cjpeg's -smooth option: last one or two scanlines would be
+duplicates of the prior line unless the image height mod 16 was 1 or 2.
+
+Repair minor problems in VMS, BCC, MC6 makefiles.
+
+New configure script based on latest GNU Autoconf.
+
+Correct the list of include files needed by MetroWerks C for ccommand().
+
+Numerous small documentation updates.
+
+
+Version 6  2-Aug-95
+-------------------
+
+Progressive JPEG support: library can read and write full progressive JPEG
+files.  A "buffered image" mode supports incremental decoding for on-the-fly
+display of progressive images.  Simply recompiling an existing IJG-v5-based
+decoder with v6 should allow it to read progressive files, though of course
+without any special progressive display.
+
+New "jpegtran" application performs lossless transcoding between different
+JPEG formats; primarily, it can be used to convert baseline to progressive
+JPEG and vice versa.  In support of jpegtran, the library now allows lossless
+reading and writing of JPEG files as DCT coefficient arrays.  This ability
+may be of use in other applications.
+
+Notes for programmers:
+* We changed jpeg_start_decompress() to be able to suspend; this makes all
+decoding modes available to suspending-input applications.  However,
+existing applications that use suspending input will need to be changed
+to check the return value from jpeg_start_decompress().  You don't need to
+do anything if you don't use a suspending data source.
+* We changed the interface to the virtual array routines: access_virt_array
+routines now take a count of the number of rows to access this time.  The
+last parameter to request_virt_array routines is now interpreted as the
+maximum number of rows that may be accessed at once, but not necessarily
+the height of every access.
+
+
+Version 5b  15-Mar-95
+---------------------
+
+Correct bugs with grayscale images having v_samp_factor > 1.
+
+jpeg_write_raw_data() now supports output suspension.
+
+Correct bugs in "configure" script for case of compiling in
+a directory other than the one containing the source files.
+
+Repair bug in jquant1.c: sometimes didn't use as many colors as it could.
+
+Borland C makefile and jconfig file work under either MS-DOS or OS/2.
+
+Miscellaneous improvements to documentation.
+
+
+Version 5a  7-Dec-94
+--------------------
+
+Changed color conversion roundoff behavior so that grayscale values are
+represented exactly.  (This causes test image files to change.)
+
+Make ordered dither use 16x16 instead of 4x4 pattern for a small quality
+improvement.
+
+New configure script based on latest GNU Autoconf.
+Fix configure script to handle CFLAGS correctly.
+Rename *.auto files to *.cfg, so that configure script still works if
+file names have been truncated for DOS.
+
+Fix bug in rdbmp.c: didn't allow for extra data between header and image.
+
+Modify rdppm.c/wrppm.c to handle 2-byte raw PPM/PGM formats for 12-bit data.
+
+Fix several bugs in rdrle.c.
+
+NEED_SHORT_EXTERNAL_NAMES option was broken.
+
+Revise jerror.h/jerror.c for more flexibility in message table.
+
+Repair oversight in jmemname.c NO_MKTEMP case: file could be there
+but unreadable.
+
+
+Version 5  24-Sep-94
+--------------------
+
+Version 5 represents a nearly complete redesign and rewrite of the IJG
+software.  Major user-visible changes include:
+  * Automatic configuration simplifies installation for most Unix systems.
+  * A range of speed vs. image quality tradeoffs are supported.
+    This includes resizing of an image during decompression: scaling down
+    by a factor of 1/2, 1/4, or 1/8 is handled very efficiently.
+  * New programs rdjpgcom and wrjpgcom allow insertion and extraction
+    of text comments in a JPEG file.
+
+The application programmer's interface to the library has changed completely.
+Notable improvements include:
+  * We have eliminated the use of callback routines for handling the
+    uncompressed image data.  The application now sees the library as a
+    set of routines that it calls to read or write image data on a
+    scanline-by-scanline basis.
+  * The application image data is represented in a conventional interleaved-
+    pixel format, rather than as a separate array for each color channel.
+    This can save a copying step in many programs.
+  * The handling of compressed data has been cleaned up: the application can
+    supply routines to source or sink the compressed data.  It is possible to
+    suspend processing on source/sink buffer overrun, although this is not
+    supported in all operating modes.
+  * All static state has been eliminated from the library, so that multiple
+    instances of compression or decompression can be active concurrently.
+  * JPEG abbreviated datastream formats are supported, ie, quantization and
+    Huffman tables can be stored separately from the image data.
+  * And not only that, but the documentation of the library has improved
+    considerably!
+
+
+The last widely used release before the version 5 rewrite was version 4A of
+18-Feb-93.  Change logs before that point have been discarded, since they
+are not of much interest after the rewrite.
diff --git a/cmake/externals/libjpeg/jaricom.c b/cmake/externals/libjpeg/jaricom.c
new file mode 100644
index 000000000..690068861
--- /dev/null
+++ b/cmake/externals/libjpeg/jaricom.c
@@ -0,0 +1,153 @@
+/*
+ * jaricom.c
+ *
+ * Developed 1997-2011 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains probability estimation tables for common use in
+ * arithmetic entropy encoding and decoding routines.
+ *
+ * This data represents Table D.3 in the JPEG spec (D.2 in the draft),
+ * ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81, and Table 24
+ * in the JBIG spec, ISO/IEC IS 11544 and CCITT Recommendation ITU-T T.82.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+/* The following #define specifies the packing of the four components
+ * into the compact INT32 representation.
+ * Note that this formula must match the actual arithmetic encoder
+ * and decoder implementation.  The implementation has to be changed
+ * if this formula is changed.
+ * The current organization is leaned on Markus Kuhn's JBIG
+ * implementation (jbig_tab.c).
+ */
+
+#define V(i,a,b,c,d) (((INT32)a << 16) | ((INT32)c << 8) | ((INT32)d << 7) | b)
+
+const INT32 jpeg_aritab[113+1] = {
+/*
+ * Index, Qe_Value, Next_Index_LPS, Next_Index_MPS, Switch_MPS
+ */
+  V(   0, 0x5a1d,   1,   1, 1 ),
+  V(   1, 0x2586,  14,   2, 0 ),
+  V(   2, 0x1114,  16,   3, 0 ),
+  V(   3, 0x080b,  18,   4, 0 ),
+  V(   4, 0x03d8,  20,   5, 0 ),
+  V(   5, 0x01da,  23,   6, 0 ),
+  V(   6, 0x00e5,  25,   7, 0 ),
+  V(   7, 0x006f,  28,   8, 0 ),
+  V(   8, 0x0036,  30,   9, 0 ),
+  V(   9, 0x001a,  33,  10, 0 ),
+  V(  10, 0x000d,  35,  11, 0 ),
+  V(  11, 0x0006,   9,  12, 0 ),
+  V(  12, 0x0003,  10,  13, 0 ),
+  V(  13, 0x0001,  12,  13, 0 ),
+  V(  14, 0x5a7f,  15,  15, 1 ),
+  V(  15, 0x3f25,  36,  16, 0 ),
+  V(  16, 0x2cf2,  38,  17, 0 ),
+  V(  17, 0x207c,  39,  18, 0 ),
+  V(  18, 0x17b9,  40,  19, 0 ),
+  V(  19, 0x1182,  42,  20, 0 ),
+  V(  20, 0x0cef,  43,  21, 0 ),
+  V(  21, 0x09a1,  45,  22, 0 ),
+  V(  22, 0x072f,  46,  23, 0 ),
+  V(  23, 0x055c,  48,  24, 0 ),
+  V(  24, 0x0406,  49,  25, 0 ),
+  V(  25, 0x0303,  51,  26, 0 ),
+  V(  26, 0x0240,  52,  27, 0 ),
+  V(  27, 0x01b1,  54,  28, 0 ),
+  V(  28, 0x0144,  56,  29, 0 ),
+  V(  29, 0x00f5,  57,  30, 0 ),
+  V(  30, 0x00b7,  59,  31, 0 ),
+  V(  31, 0x008a,  60,  32, 0 ),
+  V(  32, 0x0068,  62,  33, 0 ),
+  V(  33, 0x004e,  63,  34, 0 ),
+  V(  34, 0x003b,  32,  35, 0 ),
+  V(  35, 0x002c,  33,   9, 0 ),
+  V(  36, 0x5ae1,  37,  37, 1 ),
+  V(  37, 0x484c,  64,  38, 0 ),
+  V(  38, 0x3a0d,  65,  39, 0 ),
+  V(  39, 0x2ef1,  67,  40, 0 ),
+  V(  40, 0x261f,  68,  41, 0 ),
+  V(  41, 0x1f33,  69,  42, 0 ),
+  V(  42, 0x19a8,  70,  43, 0 ),
+  V(  43, 0x1518,  72,  44, 0 ),
+  V(  44, 0x1177,  73,  45, 0 ),
+  V(  45, 0x0e74,  74,  46, 0 ),
+  V(  46, 0x0bfb,  75,  47, 0 ),
+  V(  47, 0x09f8,  77,  48, 0 ),
+  V(  48, 0x0861,  78,  49, 0 ),
+  V(  49, 0x0706,  79,  50, 0 ),
+  V(  50, 0x05cd,  48,  51, 0 ),
+  V(  51, 0x04de,  50,  52, 0 ),
+  V(  52, 0x040f,  50,  53, 0 ),
+  V(  53, 0x0363,  51,  54, 0 ),
+  V(  54, 0x02d4,  52,  55, 0 ),
+  V(  55, 0x025c,  53,  56, 0 ),
+  V(  56, 0x01f8,  54,  57, 0 ),
+  V(  57, 0x01a4,  55,  58, 0 ),
+  V(  58, 0x0160,  56,  59, 0 ),
+  V(  59, 0x0125,  57,  60, 0 ),
+  V(  60, 0x00f6,  58,  61, 0 ),
+  V(  61, 0x00cb,  59,  62, 0 ),
+  V(  62, 0x00ab,  61,  63, 0 ),
+  V(  63, 0x008f,  61,  32, 0 ),
+  V(  64, 0x5b12,  65,  65, 1 ),
+  V(  65, 0x4d04,  80,  66, 0 ),
+  V(  66, 0x412c,  81,  67, 0 ),
+  V(  67, 0x37d8,  82,  68, 0 ),
+  V(  68, 0x2fe8,  83,  69, 0 ),
+  V(  69, 0x293c,  84,  70, 0 ),
+  V(  70, 0x2379,  86,  71, 0 ),
+  V(  71, 0x1edf,  87,  72, 0 ),
+  V(  72, 0x1aa9,  87,  73, 0 ),
+  V(  73, 0x174e,  72,  74, 0 ),
+  V(  74, 0x1424,  72,  75, 0 ),
+  V(  75, 0x119c,  74,  76, 0 ),
+  V(  76, 0x0f6b,  74,  77, 0 ),
+  V(  77, 0x0d51,  75,  78, 0 ),
+  V(  78, 0x0bb6,  77,  79, 0 ),
+  V(  79, 0x0a40,  77,  48, 0 ),
+  V(  80, 0x5832,  80,  81, 1 ),
+  V(  81, 0x4d1c,  88,  82, 0 ),
+  V(  82, 0x438e,  89,  83, 0 ),
+  V(  83, 0x3bdd,  90,  84, 0 ),
+  V(  84, 0x34ee,  91,  85, 0 ),
+  V(  85, 0x2eae,  92,  86, 0 ),
+  V(  86, 0x299a,  93,  87, 0 ),
+  V(  87, 0x2516,  86,  71, 0 ),
+  V(  88, 0x5570,  88,  89, 1 ),
+  V(  89, 0x4ca9,  95,  90, 0 ),
+  V(  90, 0x44d9,  96,  91, 0 ),
+  V(  91, 0x3e22,  97,  92, 0 ),
+  V(  92, 0x3824,  99,  93, 0 ),
+  V(  93, 0x32b4,  99,  94, 0 ),
+  V(  94, 0x2e17,  93,  86, 0 ),
+  V(  95, 0x56a8,  95,  96, 1 ),
+  V(  96, 0x4f46, 101,  97, 0 ),
+  V(  97, 0x47e5, 102,  98, 0 ),
+  V(  98, 0x41cf, 103,  99, 0 ),
+  V(  99, 0x3c3d, 104, 100, 0 ),
+  V( 100, 0x375e,  99,  93, 0 ),
+  V( 101, 0x5231, 105, 102, 0 ),
+  V( 102, 0x4c0f, 106, 103, 0 ),
+  V( 103, 0x4639, 107, 104, 0 ),
+  V( 104, 0x415e, 103,  99, 0 ),
+  V( 105, 0x5627, 105, 106, 1 ),
+  V( 106, 0x50e7, 108, 107, 0 ),
+  V( 107, 0x4b85, 109, 103, 0 ),
+  V( 108, 0x5597, 110, 109, 0 ),
+  V( 109, 0x504f, 111, 107, 0 ),
+  V( 110, 0x5a10, 110, 111, 1 ),
+  V( 111, 0x5522, 112, 109, 0 ),
+  V( 112, 0x59eb, 112, 111, 1 ),
+/*
+ * This last entry is used for fixed probability estimate of 0.5
+ * as suggested in Section 10.3 Table 5 of ITU-T Rec. T.851.
+ */
+  V( 113, 0x5a1d, 113, 113, 0 )
+};
diff --git a/cmake/externals/libjpeg/jcapimin.c b/cmake/externals/libjpeg/jcapimin.c
new file mode 100644
index 000000000..639ce86f4
--- /dev/null
+++ b/cmake/externals/libjpeg/jcapimin.c
@@ -0,0 +1,288 @@
+/*
+ * jcapimin.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * Modified 2003-2010 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the compression half
+ * of the JPEG library.  These are the "minimum" API routines that may be
+ * needed in either the normal full-compression case or the transcoding-only
+ * case.
+ *
+ * Most of the routines intended to be called directly by an application
+ * are in this file or in jcapistd.c.  But also see jcparam.c for
+ * parameter-setup helper routines, jcomapi.c for routines shared by
+ * compression and decompression, and jctrans.c for the transcoding case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Initialization of a JPEG compression object.
+ * The error manager must already be set up (in case memory manager fails).
+ */
+
+GLOBAL(void)
+jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize)
+{
+  int i;
+
+  /* Guard against version mismatches between library and caller. */
+  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
+  if (version != JPEG_LIB_VERSION)
+    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
+  if (structsize != SIZEOF(struct jpeg_compress_struct))
+    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
+	     (int) SIZEOF(struct jpeg_compress_struct), (int) structsize);
+
+  /* For debugging purposes, we zero the whole master structure.
+   * But the application has already set the err pointer, and may have set
+   * client_data, so we have to save and restore those fields.
+   * Note: if application hasn't set client_data, tools like Purify may
+   * complain here.
+   */
+  {
+    struct jpeg_error_mgr * err = cinfo->err;
+    void * client_data = cinfo->client_data; /* ignore Purify complaint here */
+    MEMZERO(cinfo, SIZEOF(struct jpeg_compress_struct));
+    cinfo->err = err;
+    cinfo->client_data = client_data;
+  }
+  cinfo->is_decompressor = FALSE;
+
+  /* Initialize a memory manager instance for this object */
+  jinit_memory_mgr((j_common_ptr) cinfo);
+
+  /* Zero out pointers to permanent structures. */
+  cinfo->progress = NULL;
+  cinfo->dest = NULL;
+
+  cinfo->comp_info = NULL;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    cinfo->quant_tbl_ptrs[i] = NULL;
+    cinfo->q_scale_factor[i] = 100;
+  }
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    cinfo->dc_huff_tbl_ptrs[i] = NULL;
+    cinfo->ac_huff_tbl_ptrs[i] = NULL;
+  }
+
+  /* Must do it here for emit_dqt in case jpeg_write_tables is used */
+  cinfo->block_size = DCTSIZE;
+  cinfo->natural_order = jpeg_natural_order;
+  cinfo->lim_Se = DCTSIZE2-1;
+
+  cinfo->script_space = NULL;
+
+  cinfo->input_gamma = 1.0;	/* in case application forgets */
+
+  /* OK, I'm ready */
+  cinfo->global_state = CSTATE_START;
+}
+
+
+/*
+ * Destruction of a JPEG compression object
+ */
+
+GLOBAL(void)
+jpeg_destroy_compress (j_compress_ptr cinfo)
+{
+  jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Abort processing of a JPEG compression operation,
+ * but don't destroy the object itself.
+ */
+
+GLOBAL(void)
+jpeg_abort_compress (j_compress_ptr cinfo)
+{
+  jpeg_abort((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Forcibly suppress or un-suppress all quantization and Huffman tables.
+ * Marks all currently defined tables as already written (if suppress)
+ * or not written (if !suppress).  This will control whether they get emitted
+ * by a subsequent jpeg_start_compress call.
+ *
+ * This routine is exported for use by applications that want to produce
+ * abbreviated JPEG datastreams.  It logically belongs in jcparam.c, but
+ * since it is called by jpeg_start_compress, we put it here --- otherwise
+ * jcparam.o would be linked whether the application used it or not.
+ */
+
+GLOBAL(void)
+jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress)
+{
+  int i;
+  JQUANT_TBL * qtbl;
+  JHUFF_TBL * htbl;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    if ((qtbl = cinfo->quant_tbl_ptrs[i]) != NULL)
+      qtbl->sent_table = suppress;
+  }
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    if ((htbl = cinfo->dc_huff_tbl_ptrs[i]) != NULL)
+      htbl->sent_table = suppress;
+    if ((htbl = cinfo->ac_huff_tbl_ptrs[i]) != NULL)
+      htbl->sent_table = suppress;
+  }
+}
+
+
+/*
+ * Finish JPEG compression.
+ *
+ * If a multipass operating mode was selected, this may do a great deal of
+ * work including most of the actual output.
+ */
+
+GLOBAL(void)
+jpeg_finish_compress (j_compress_ptr cinfo)
+{
+  JDIMENSION iMCU_row;
+
+  if (cinfo->global_state == CSTATE_SCANNING ||
+      cinfo->global_state == CSTATE_RAW_OK) {
+    /* Terminate first pass */
+    if (cinfo->next_scanline < cinfo->image_height)
+      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
+    (*cinfo->master->finish_pass) (cinfo);
+  } else if (cinfo->global_state != CSTATE_WRCOEFS)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Perform any remaining passes */
+  while (! cinfo->master->is_last_pass) {
+    (*cinfo->master->prepare_for_pass) (cinfo);
+    for (iMCU_row = 0; iMCU_row < cinfo->total_iMCU_rows; iMCU_row++) {
+      if (cinfo->progress != NULL) {
+	cinfo->progress->pass_counter = (long) iMCU_row;
+	cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows;
+	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      }
+      /* We bypass the main controller and invoke coef controller directly;
+       * all work is being done from the coefficient buffer.
+       */
+      if (! (*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE) NULL))
+	ERREXIT(cinfo, JERR_CANT_SUSPEND);
+    }
+    (*cinfo->master->finish_pass) (cinfo);
+  }
+  /* Write EOI, do final cleanup */
+  (*cinfo->marker->write_file_trailer) (cinfo);
+  (*cinfo->dest->term_destination) (cinfo);
+  /* We can use jpeg_abort to release memory and reset global_state */
+  jpeg_abort((j_common_ptr) cinfo);
+}
+
+
+/*
+ * Write a special marker.
+ * This is only recommended for writing COM or APPn markers.
+ * Must be called after jpeg_start_compress() and before
+ * first call to jpeg_write_scanlines() or jpeg_write_raw_data().
+ */
+
+GLOBAL(void)
+jpeg_write_marker (j_compress_ptr cinfo, int marker,
+		   const JOCTET *dataptr, unsigned int datalen)
+{
+  JMETHOD(void, write_marker_byte, (j_compress_ptr info, int val));
+
+  if (cinfo->next_scanline != 0 ||
+      (cinfo->global_state != CSTATE_SCANNING &&
+       cinfo->global_state != CSTATE_RAW_OK &&
+       cinfo->global_state != CSTATE_WRCOEFS))
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
+  write_marker_byte = cinfo->marker->write_marker_byte;	/* copy for speed */
+  while (datalen--) {
+    (*write_marker_byte) (cinfo, *dataptr);
+    dataptr++;
+  }
+}
+
+/* Same, but piecemeal. */
+
+GLOBAL(void)
+jpeg_write_m_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
+{
+  if (cinfo->next_scanline != 0 ||
+      (cinfo->global_state != CSTATE_SCANNING &&
+       cinfo->global_state != CSTATE_RAW_OK &&
+       cinfo->global_state != CSTATE_WRCOEFS))
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
+}
+
+GLOBAL(void)
+jpeg_write_m_byte (j_compress_ptr cinfo, int val)
+{
+  (*cinfo->marker->write_marker_byte) (cinfo, val);
+}
+
+
+/*
+ * Alternate compression function: just write an abbreviated table file.
+ * Before calling this, all parameters and a data destination must be set up.
+ *
+ * To produce a pair of files containing abbreviated tables and abbreviated
+ * image data, one would proceed as follows:
+ *
+ *		initialize JPEG object
+ *		set JPEG parameters
+ *		set destination to table file
+ *		jpeg_write_tables(cinfo);
+ *		set destination to image file
+ *		jpeg_start_compress(cinfo, FALSE);
+ *		write data...
+ *		jpeg_finish_compress(cinfo);
+ *
+ * jpeg_write_tables has the side effect of marking all tables written
+ * (same as jpeg_suppress_tables(..., TRUE)).  Thus a subsequent start_compress
+ * will not re-emit the tables unless it is passed write_all_tables=TRUE.
+ */
+
+GLOBAL(void)
+jpeg_write_tables (j_compress_ptr cinfo)
+{
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Initialize the marker writer ... bit of a crock to do it here. */
+  jinit_marker_writer(cinfo);
+  /* Write them tables! */
+  (*cinfo->marker->write_tables_only) (cinfo);
+  /* And clean up. */
+  (*cinfo->dest->term_destination) (cinfo);
+  /*
+   * In library releases up through v6a, we called jpeg_abort() here to free
+   * any working memory allocated by the destination manager and marker
+   * writer.  Some applications had a problem with that: they allocated space
+   * of their own from the library memory manager, and didn't want it to go
+   * away during write_tables.  So now we do nothing.  This will cause a
+   * memory leak if an app calls write_tables repeatedly without doing a full
+   * compression cycle or otherwise resetting the JPEG object.  However, that
+   * seems less bad than unexpectedly freeing memory in the normal case.
+   * An app that prefers the old behavior can call jpeg_abort for itself after
+   * each call to jpeg_write_tables().
+   */
+}
diff --git a/cmake/externals/libjpeg/jcapistd.c b/cmake/externals/libjpeg/jcapistd.c
new file mode 100644
index 000000000..0917afa97
--- /dev/null
+++ b/cmake/externals/libjpeg/jcapistd.c
@@ -0,0 +1,162 @@
+/*
+ * jcapistd.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2013 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the compression half
+ * of the JPEG library.  These are the "standard" API routines that are
+ * used in the normal full-compression case.  They are not used by a
+ * transcoding-only application.  Note that if an application links in
+ * jpeg_start_compress, it will end up linking in the entire compressor.
+ * We thus must separate this file from jcapimin.c to avoid linking the
+ * whole compression library into a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Compression initialization.
+ * Before calling this, all parameters and a data destination must be set up.
+ *
+ * We require a write_all_tables parameter as a failsafe check when writing
+ * multiple datastreams from the same compression object.  Since prior runs
+ * will have left all the tables marked sent_table=TRUE, a subsequent run
+ * would emit an abbreviated stream (no tables) by default.  This may be what
+ * is wanted, but for safety's sake it should not be the default behavior:
+ * programmers should have to make a deliberate choice to emit abbreviated
+ * images.  Therefore the documentation and examples should encourage people
+ * to pass write_all_tables=TRUE; then it will take active thought to do the
+ * wrong thing.
+ */
+
+GLOBAL(void)
+jpeg_start_compress (j_compress_ptr cinfo, boolean write_all_tables)
+{
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (write_all_tables)
+    jpeg_suppress_tables(cinfo, FALSE);	/* mark all tables to be written */
+
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Perform master selection of active modules */
+  jinit_compress_master(cinfo);
+  /* Set up for the first pass */
+  (*cinfo->master->prepare_for_pass) (cinfo);
+  /* Ready for application to drive first pass through jpeg_write_scanlines
+   * or jpeg_write_raw_data.
+   */
+  cinfo->next_scanline = 0;
+  cinfo->global_state = (cinfo->raw_data_in ? CSTATE_RAW_OK : CSTATE_SCANNING);
+}
+
+
+/*
+ * Write some scanlines of data to the JPEG compressor.
+ *
+ * The return value will be the number of lines actually written.
+ * This should be less than the supplied num_lines only in case that
+ * the data destination module has requested suspension of the compressor,
+ * or if more than image_height scanlines are passed in.
+ *
+ * Note: we warn about excess calls to jpeg_write_scanlines() since
+ * this likely signals an application programmer error.  However,
+ * excess scanlines passed in the last valid call are *silently* ignored,
+ * so that the application need not adjust num_lines for end-of-image
+ * when using a multiple-scanline buffer.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_write_scanlines (j_compress_ptr cinfo, JSAMPARRAY scanlines,
+		      JDIMENSION num_lines)
+{
+  JDIMENSION row_ctr, rows_left;
+
+  if (cinfo->global_state != CSTATE_SCANNING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->next_scanline >= cinfo->image_height)
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->next_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->image_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Give master control module another chance if this is first call to
+   * jpeg_write_scanlines.  This lets output of the frame/scan headers be
+   * delayed so that application can write COM, etc, markers between
+   * jpeg_start_compress and jpeg_write_scanlines.
+   */
+  if (cinfo->master->call_pass_startup)
+    (*cinfo->master->pass_startup) (cinfo);
+
+  /* Ignore any extra scanlines at bottom of image. */
+  rows_left = cinfo->image_height - cinfo->next_scanline;
+  if (num_lines > rows_left)
+    num_lines = rows_left;
+
+  row_ctr = 0;
+  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, num_lines);
+  cinfo->next_scanline += row_ctr;
+  return row_ctr;
+}
+
+
+/*
+ * Alternate entry point to write raw data.
+ * Processes exactly one iMCU row per call, unless suspended.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data,
+		     JDIMENSION num_lines)
+{
+  JDIMENSION lines_per_iMCU_row;
+
+  if (cinfo->global_state != CSTATE_RAW_OK)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->next_scanline >= cinfo->image_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->next_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->image_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Give master control module another chance if this is first call to
+   * jpeg_write_raw_data.  This lets output of the frame/scan headers be
+   * delayed so that application can write COM, etc, markers between
+   * jpeg_start_compress and jpeg_write_raw_data.
+   */
+  if (cinfo->master->call_pass_startup)
+    (*cinfo->master->pass_startup) (cinfo);
+
+  /* Verify that at least one iMCU row has been passed. */
+  lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_v_scaled_size;
+  if (num_lines < lines_per_iMCU_row)
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* Directly compress the row. */
+  if (! (*cinfo->coef->compress_data) (cinfo, data)) {
+    /* If compressor did not consume the whole row, suspend processing. */
+    return 0;
+  }
+
+  /* OK, we processed one iMCU row. */
+  cinfo->next_scanline += lines_per_iMCU_row;
+  return lines_per_iMCU_row;
+}
diff --git a/cmake/externals/libjpeg/jcarith.c b/cmake/externals/libjpeg/jcarith.c
new file mode 100644
index 000000000..1b45089a3
--- /dev/null
+++ b/cmake/externals/libjpeg/jcarith.c
@@ -0,0 +1,945 @@
+/*
+ * jcarith.c
+ *
+ * Developed 1997-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains portable arithmetic entropy encoding routines for JPEG
+ * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81).
+ *
+ * Both sequential and progressive modes are supported in this single module.
+ *
+ * Suspension is not currently supported in this module.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Expanded entropy encoder object for arithmetic encoding. */
+
+typedef struct {
+  struct jpeg_entropy_encoder pub; /* public fields */
+
+  INT32 c; /* C register, base of coding interval, layout as in sec. D.1.3 */
+  INT32 a;               /* A register, normalized size of coding interval */
+  INT32 sc;        /* counter for stacked 0xFF values which might overflow */
+  INT32 zc;          /* counter for pending 0x00 output values which might *
+                          * be discarded at the end ("Pacman" termination) */
+  int ct;  /* bit shift counter, determines when next byte will be written */
+  int buffer;                /* buffer for most recent output byte != 0xFF */
+
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+  int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
+
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+  int next_restart_num;		/* next restart number to write (0-7) */
+
+  /* Pointers to statistics areas (these workspaces have image lifespan) */
+  unsigned char * dc_stats[NUM_ARITH_TBLS];
+  unsigned char * ac_stats[NUM_ARITH_TBLS];
+
+  /* Statistics bin for coding with fixed probability 0.5 */
+  unsigned char fixed_bin[4];
+} arith_entropy_encoder;
+
+typedef arith_entropy_encoder * arith_entropy_ptr;
+
+/* The following two definitions specify the allocation chunk size
+ * for the statistics area.
+ * According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
+ * 49 statistics bins for DC, and 245 statistics bins for AC coding.
+ *
+ * We use a compact representation with 1 byte per statistics bin,
+ * thus the numbers directly represent byte sizes.
+ * This 1 byte per statistics bin contains the meaning of the MPS
+ * (more probable symbol) in the highest bit (mask 0x80), and the
+ * index into the probability estimation state machine table
+ * in the lower bits (mask 0x7F).
+ */
+
+#define DC_STAT_BINS 64
+#define AC_STAT_BINS 256
+
+/* NOTE: Uncomment the following #define if you want to use the
+ * given formula for calculating the AC conditioning parameter Kx
+ * for spectral selection progressive coding in section G.1.3.2
+ * of the spec (Kx = Kmin + SRL (8 + Se - Kmin) 4).
+ * Although the spec and P&M authors claim that this "has proven
+ * to give good results for 8 bit precision samples", I'm not
+ * convinced yet that this is really beneficial.
+ * Early tests gave only very marginal compression enhancements
+ * (a few - around 5 or so - bytes even for very large files),
+ * which would turn out rather negative if we'd suppress the
+ * DAC (Define Arithmetic Conditioning) marker segments for
+ * the default parameters in the future.
+ * Note that currently the marker writing module emits 12-byte
+ * DAC segments for a full-component scan in a color image.
+ * This is not worth worrying about IMHO. However, since the
+ * spec defines the default values to be used if the tables
+ * are omitted (unlike Huffman tables, which are required
+ * anyway), one might optimize this behaviour in the future,
+ * and then it would be disadvantageous to use custom tables if
+ * they don't provide sufficient gain to exceed the DAC size.
+ *
+ * On the other hand, I'd consider it as a reasonable result
+ * that the conditioning has no significant influence on the
+ * compression performance. This means that the basic
+ * statistical model is already rather stable.
+ *
+ * Thus, at the moment, we use the default conditioning values
+ * anyway, and do not use the custom formula.
+ *
+#define CALCULATE_SPECTRAL_CONDITIONING
+ */
+
+/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
+ * We assume that int right shift is unsigned if INT32 right shift is,
+ * which should be safe.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define ISHIFT_TEMPS	int ishift_temp;
+#define IRIGHT_SHIFT(x,shft)  \
+	((ishift_temp = (x)) < 0 ? \
+	 (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
+	 (ishift_temp >> (shft)))
+#else
+#define ISHIFT_TEMPS
+#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+
+LOCAL(void)
+emit_byte (int val, j_compress_ptr cinfo)
+/* Write next output byte; we do not support suspension in this module. */
+{
+  struct jpeg_destination_mgr * dest = cinfo->dest;
+
+  *dest->next_output_byte++ = (JOCTET) val;
+  if (--dest->free_in_buffer == 0)
+    if (! (*dest->empty_output_buffer) (cinfo))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+}
+
+
+/*
+ * Finish up at the end of an arithmetic-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass (j_compress_ptr cinfo)
+{
+  arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
+  INT32 temp;
+
+  /* Section D.1.8: Termination of encoding */
+
+  /* Find the e->c in the coding interval with the largest
+   * number of trailing zero bits */
+  if ((temp = (e->a - 1 + e->c) & 0xFFFF0000L) < e->c)
+    e->c = temp + 0x8000L;
+  else
+    e->c = temp;
+  /* Send remaining bytes to output */
+  e->c <<= e->ct;
+  if (e->c & 0xF8000000L) {
+    /* One final overflow has to be handled */
+    if (e->buffer >= 0) {
+      if (e->zc)
+	do emit_byte(0x00, cinfo);
+	while (--e->zc);
+      emit_byte(e->buffer + 1, cinfo);
+      if (e->buffer + 1 == 0xFF)
+	emit_byte(0x00, cinfo);
+    }
+    e->zc += e->sc;  /* carry-over converts stacked 0xFF bytes to 0x00 */
+    e->sc = 0;
+  } else {
+    if (e->buffer == 0)
+      ++e->zc;
+    else if (e->buffer >= 0) {
+      if (e->zc)
+	do emit_byte(0x00, cinfo);
+	while (--e->zc);
+      emit_byte(e->buffer, cinfo);
+    }
+    if (e->sc) {
+      if (e->zc)
+	do emit_byte(0x00, cinfo);
+	while (--e->zc);
+      do {
+	emit_byte(0xFF, cinfo);
+	emit_byte(0x00, cinfo);
+      } while (--e->sc);
+    }
+  }
+  /* Output final bytes only if they are not 0x00 */
+  if (e->c & 0x7FFF800L) {
+    if (e->zc)  /* output final pending zero bytes */
+      do emit_byte(0x00, cinfo);
+      while (--e->zc);
+    emit_byte((int) ((e->c >> 19) & 0xFF), cinfo);
+    if (((e->c >> 19) & 0xFF) == 0xFF)
+      emit_byte(0x00, cinfo);
+    if (e->c & 0x7F800L) {
+      emit_byte((int) ((e->c >> 11) & 0xFF), cinfo);
+      if (((e->c >> 11) & 0xFF) == 0xFF)
+	emit_byte(0x00, cinfo);
+    }
+  }
+}
+
+
+/*
+ * The core arithmetic encoding routine (common in JPEG and JBIG).
+ * This needs to go as fast as possible.
+ * Machine-dependent optimization facilities
+ * are not utilized in this portable implementation.
+ * However, this code should be fairly efficient and
+ * may be a good base for further optimizations anyway.
+ *
+ * Parameter 'val' to be encoded may be 0 or 1 (binary decision).
+ *
+ * Note: I've added full "Pacman" termination support to the
+ * byte output routines, which is equivalent to the optional
+ * Discard_final_zeros procedure (Figure D.15) in the spec.
+ * Thus, we always produce the shortest possible output
+ * stream compliant to the spec (no trailing zero bytes,
+ * except for FF stuffing).
+ *
+ * I've also introduced a new scheme for accessing
+ * the probability estimation state machine table,
+ * derived from Markus Kuhn's JBIG implementation.
+ */
+
+LOCAL(void)
+arith_encode (j_compress_ptr cinfo, unsigned char *st, int val) 
+{
+  register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
+  register unsigned char nl, nm;
+  register INT32 qe, temp;
+  register int sv;
+
+  /* Fetch values from our compact representation of Table D.3(D.2):
+   * Qe values and probability estimation state machine
+   */
+  sv = *st;
+  qe = jpeg_aritab[sv & 0x7F];	/* => Qe_Value */
+  nl = qe & 0xFF; qe >>= 8;	/* Next_Index_LPS + Switch_MPS */
+  nm = qe & 0xFF; qe >>= 8;	/* Next_Index_MPS */
+
+  /* Encode & estimation procedures per sections D.1.4 & D.1.5 */
+  e->a -= qe;
+  if (val != (sv >> 7)) {
+    /* Encode the less probable symbol */
+    if (e->a >= qe) {
+      /* If the interval size (qe) for the less probable symbol (LPS)
+       * is larger than the interval size for the MPS, then exchange
+       * the two symbols for coding efficiency, otherwise code the LPS
+       * as usual: */
+      e->c += e->a;
+      e->a = qe;
+    }
+    *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
+  } else {
+    /* Encode the more probable symbol */
+    if (e->a >= 0x8000L)
+      return;  /* A >= 0x8000 -> ready, no renormalization required */
+    if (e->a < qe) {
+      /* If the interval size (qe) for the less probable symbol (LPS)
+       * is larger than the interval size for the MPS, then exchange
+       * the two symbols for coding efficiency: */
+      e->c += e->a;
+      e->a = qe;
+    }
+    *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
+  }
+
+  /* Renormalization & data output per section D.1.6 */
+  do {
+    e->a <<= 1;
+    e->c <<= 1;
+    if (--e->ct == 0) {
+      /* Another byte is ready for output */
+      temp = e->c >> 19;
+      if (temp > 0xFF) {
+	/* Handle overflow over all stacked 0xFF bytes */
+	if (e->buffer >= 0) {
+	  if (e->zc)
+	    do emit_byte(0x00, cinfo);
+	    while (--e->zc);
+	  emit_byte(e->buffer + 1, cinfo);
+	  if (e->buffer + 1 == 0xFF)
+	    emit_byte(0x00, cinfo);
+	}
+	e->zc += e->sc;  /* carry-over converts stacked 0xFF bytes to 0x00 */
+	e->sc = 0;
+	/* Note: The 3 spacer bits in the C register guarantee
+	 * that the new buffer byte can't be 0xFF here
+	 * (see page 160 in the P&M JPEG book). */
+	/* New output byte, might overflow later */
+	e->buffer = (int) (temp & 0xFF);
+      } else if (temp == 0xFF) {
+	++e->sc;  /* stack 0xFF byte (which might overflow later) */
+      } else {
+	/* Output all stacked 0xFF bytes, they will not overflow any more */
+	if (e->buffer == 0)
+	  ++e->zc;
+	else if (e->buffer >= 0) {
+	  if (e->zc)
+	    do emit_byte(0x00, cinfo);
+	    while (--e->zc);
+	  emit_byte(e->buffer, cinfo);
+	}
+	if (e->sc) {
+	  if (e->zc)
+	    do emit_byte(0x00, cinfo);
+	    while (--e->zc);
+	  do {
+	    emit_byte(0xFF, cinfo);
+	    emit_byte(0x00, cinfo);
+	  } while (--e->sc);
+	}
+	/* New output byte (can still overflow) */
+	e->buffer = (int) (temp & 0xFF);
+      }
+      e->c &= 0x7FFFFL;
+      e->ct += 8;
+    }
+  } while (e->a < 0x8000L);
+}
+
+
+/*
+ * Emit a restart marker & resynchronize predictions.
+ */
+
+LOCAL(void)
+emit_restart (j_compress_ptr cinfo, int restart_num)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  int ci;
+  jpeg_component_info * compptr;
+
+  finish_pass(cinfo);
+
+  emit_byte(0xFF, cinfo);
+  emit_byte(JPEG_RST0 + restart_num, cinfo);
+
+  /* Re-initialize statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* DC needs no table for refinement scan */
+    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
+      MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
+      /* Reset DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    /* AC needs no table when not present */
+    if (cinfo->Se) {
+      MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
+    }
+  }
+
+  /* Reset arithmetic encoding variables */
+  entropy->c = 0;
+  entropy->a = 0x10000L;
+  entropy->sc = 0;
+  entropy->zc = 0;
+  entropy->ct = 11;
+  entropy->buffer = -1;  /* empty */
+}
+
+
+/*
+ * MCU encoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  unsigned char *st;
+  int blkn, ci, tbl;
+  int v, v2, m;
+  ISHIFT_TEMPS
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    ci = cinfo->MCU_membership[blkn];
+    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
+
+    /* Compute the DC value after the required point transform by Al.
+     * This is simply an arithmetic right shift.
+     */
+    m = IRIGHT_SHIFT((int) (MCU_data[blkn][0][0]), cinfo->Al);
+
+    /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.4: Encode_DC_DIFF */
+    if ((v = m - entropy->last_dc_val[ci]) == 0) {
+      arith_encode(cinfo, st, 0);
+      entropy->dc_context[ci] = 0;	/* zero diff category */
+    } else {
+      entropy->last_dc_val[ci] = m;
+      arith_encode(cinfo, st, 1);
+      /* Figure F.6: Encoding nonzero value v */
+      /* Figure F.7: Encoding the sign of v */
+      if (v > 0) {
+	arith_encode(cinfo, st + 1, 0);	/* Table F.4: SS = S0 + 1 */
+	st += 2;			/* Table F.4: SP = S0 + 2 */
+	entropy->dc_context[ci] = 4;	/* small positive diff category */
+      } else {
+	v = -v;
+	arith_encode(cinfo, st + 1, 1);	/* Table F.4: SS = S0 + 1 */
+	st += 3;			/* Table F.4: SN = S0 + 3 */
+	entropy->dc_context[ci] = 8;	/* small negative diff category */
+      }
+      /* Figure F.8: Encoding the magnitude category of v */
+      m = 0;
+      if (v -= 1) {
+	arith_encode(cinfo, st, 1);
+	m = 1;
+	v2 = v;
+	st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
+	while (v2 >>= 1) {
+	  arith_encode(cinfo, st, 1);
+	  m <<= 1;
+	  st += 1;
+	}
+      }
+      arith_encode(cinfo, st, 0);
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
+	entropy->dc_context[ci] = 0;	/* zero diff category */
+      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
+	entropy->dc_context[ci] += 8;	/* large diff category */
+      /* Figure F.9: Encoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+	arith_encode(cinfo, st, (m & v) ? 1 : 0);
+    }
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  const int * natural_order;
+  JBLOCKROW block;
+  unsigned char *st;
+  int tbl, k, ke;
+  int v, v2, m;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  natural_order = cinfo->natural_order;
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */
+
+  /* Establish EOB (end-of-block) index */
+  ke = cinfo->Se;
+  do {
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value.
+     */
+    if ((v = (*block)[natural_order[ke]]) >= 0) {
+      if (v >>= cinfo->Al) break;
+    } else {
+      v = -v;
+      if (v >>= cinfo->Al) break;
+    }
+  } while (--ke);
+
+  /* Figure F.5: Encode_AC_Coefficients */
+  for (k = cinfo->Ss - 1; k < ke;) {
+    st = entropy->ac_stats[tbl] + 3 * k;
+    arith_encode(cinfo, st, 0);		/* EOB decision */
+    for (;;) {
+      if ((v = (*block)[natural_order[++k]]) >= 0) {
+	if (v >>= cinfo->Al) {
+	  arith_encode(cinfo, st + 1, 1);
+	  arith_encode(cinfo, entropy->fixed_bin, 0);
+	  break;
+	}
+      } else {
+	v = -v;
+	if (v >>= cinfo->Al) {
+	  arith_encode(cinfo, st + 1, 1);
+	  arith_encode(cinfo, entropy->fixed_bin, 1);
+	  break;
+	}
+      }
+      arith_encode(cinfo, st + 1, 0);
+      st += 3;
+    }
+    st += 2;
+    /* Figure F.8: Encoding the magnitude category of v */
+    m = 0;
+    if (v -= 1) {
+      arith_encode(cinfo, st, 1);
+      m = 1;
+      v2 = v;
+      if (v2 >>= 1) {
+	arith_encode(cinfo, st, 1);
+	m <<= 1;
+	st = entropy->ac_stats[tbl] +
+	     (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+	while (v2 >>= 1) {
+	  arith_encode(cinfo, st, 1);
+	  m <<= 1;
+	  st += 1;
+	}
+      }
+    }
+    arith_encode(cinfo, st, 0);
+    /* Figure F.9: Encoding the magnitude bit pattern of v */
+    st += 14;
+    while (m >>= 1)
+      arith_encode(cinfo, st, (m & v) ? 1 : 0);
+  }
+  /* Encode EOB decision only if k < cinfo->Se */
+  if (k < cinfo->Se) {
+    st = entropy->ac_stats[tbl] + 3 * k;
+    arith_encode(cinfo, st, 1);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component,
+ * although the spec is not very clear on the point.
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  unsigned char *st;
+  int Al, blkn;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  st = entropy->fixed_bin;	/* use fixed probability estimation */
+  Al = cinfo->Al;
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    /* We simply emit the Al'th bit of the DC coefficient value. */
+    arith_encode(cinfo, st, (MCU_data[blkn][0][0] >> Al) & 1);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  const int * natural_order;
+  JBLOCKROW block;
+  unsigned char *st;
+  int tbl, k, ke, kex;
+  int v;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  natural_order = cinfo->natural_order;
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  /* Section G.1.3.3: Encoding of AC coefficients */
+
+  /* Establish EOB (end-of-block) index */
+  ke = cinfo->Se;
+  do {
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value.
+     */
+    if ((v = (*block)[natural_order[ke]]) >= 0) {
+      if (v >>= cinfo->Al) break;
+    } else {
+      v = -v;
+      if (v >>= cinfo->Al) break;
+    }
+  } while (--ke);
+
+  /* Establish EOBx (previous stage end-of-block) index */
+  for (kex = ke; kex > 0; kex--)
+    if ((v = (*block)[natural_order[kex]]) >= 0) {
+      if (v >>= cinfo->Ah) break;
+    } else {
+      v = -v;
+      if (v >>= cinfo->Ah) break;
+    }
+
+  /* Figure G.10: Encode_AC_Coefficients_SA */
+  for (k = cinfo->Ss - 1; k < ke;) {
+    st = entropy->ac_stats[tbl] + 3 * k;
+    if (k >= kex)
+      arith_encode(cinfo, st, 0);	/* EOB decision */
+    for (;;) {
+      if ((v = (*block)[natural_order[++k]]) >= 0) {
+	if (v >>= cinfo->Al) {
+	  if (v >> 1)			/* previously nonzero coef */
+	    arith_encode(cinfo, st + 2, (v & 1));
+	  else {			/* newly nonzero coef */
+	    arith_encode(cinfo, st + 1, 1);
+	    arith_encode(cinfo, entropy->fixed_bin, 0);
+	  }
+	  break;
+	}
+      } else {
+	v = -v;
+	if (v >>= cinfo->Al) {
+	  if (v >> 1)			/* previously nonzero coef */
+	    arith_encode(cinfo, st + 2, (v & 1));
+	  else {			/* newly nonzero coef */
+	    arith_encode(cinfo, st + 1, 1);
+	    arith_encode(cinfo, entropy->fixed_bin, 1);
+	  }
+	  break;
+	}
+      }
+      arith_encode(cinfo, st + 1, 0);
+      st += 3;
+    }
+  }
+  /* Encode EOB decision only if k < cinfo->Se */
+  if (k < cinfo->Se) {
+    st = entropy->ac_stats[tbl] + 3 * k;
+    arith_encode(cinfo, st, 1);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Encode and output one MCU's worth of arithmetic-compressed coefficients.
+ */
+
+METHODDEF(boolean)
+encode_mcu (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  const int * natural_order;
+  JBLOCKROW block;
+  unsigned char *st;
+  int tbl, k, ke;
+  int v, v2, m;
+  int blkn, ci;
+  jpeg_component_info * compptr;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  natural_order = cinfo->natural_order;
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+
+    /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
+
+    tbl = compptr->dc_tbl_no;
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.4: Encode_DC_DIFF */
+    if ((v = (*block)[0] - entropy->last_dc_val[ci]) == 0) {
+      arith_encode(cinfo, st, 0);
+      entropy->dc_context[ci] = 0;	/* zero diff category */
+    } else {
+      entropy->last_dc_val[ci] = (*block)[0];
+      arith_encode(cinfo, st, 1);
+      /* Figure F.6: Encoding nonzero value v */
+      /* Figure F.7: Encoding the sign of v */
+      if (v > 0) {
+	arith_encode(cinfo, st + 1, 0);	/* Table F.4: SS = S0 + 1 */
+	st += 2;			/* Table F.4: SP = S0 + 2 */
+	entropy->dc_context[ci] = 4;	/* small positive diff category */
+      } else {
+	v = -v;
+	arith_encode(cinfo, st + 1, 1);	/* Table F.4: SS = S0 + 1 */
+	st += 3;			/* Table F.4: SN = S0 + 3 */
+	entropy->dc_context[ci] = 8;	/* small negative diff category */
+      }
+      /* Figure F.8: Encoding the magnitude category of v */
+      m = 0;
+      if (v -= 1) {
+	arith_encode(cinfo, st, 1);
+	m = 1;
+	v2 = v;
+	st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
+	while (v2 >>= 1) {
+	  arith_encode(cinfo, st, 1);
+	  m <<= 1;
+	  st += 1;
+	}
+      }
+      arith_encode(cinfo, st, 0);
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
+	entropy->dc_context[ci] = 0;	/* zero diff category */
+      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
+	entropy->dc_context[ci] += 8;	/* large diff category */
+      /* Figure F.9: Encoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+	arith_encode(cinfo, st, (m & v) ? 1 : 0);
+    }
+
+    /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */
+
+    if ((ke = cinfo->lim_Se) == 0) continue;
+    tbl = compptr->ac_tbl_no;
+
+    /* Establish EOB (end-of-block) index */
+    do {
+      if ((*block)[natural_order[ke]]) break;
+    } while (--ke);
+
+    /* Figure F.5: Encode_AC_Coefficients */
+    for (k = 0; k < ke;) {
+      st = entropy->ac_stats[tbl] + 3 * k;
+      arith_encode(cinfo, st, 0);	/* EOB decision */
+      while ((v = (*block)[natural_order[++k]]) == 0) {
+	arith_encode(cinfo, st + 1, 0);
+	st += 3;
+      }
+      arith_encode(cinfo, st + 1, 1);
+      /* Figure F.6: Encoding nonzero value v */
+      /* Figure F.7: Encoding the sign of v */
+      if (v > 0) {
+	arith_encode(cinfo, entropy->fixed_bin, 0);
+      } else {
+	v = -v;
+	arith_encode(cinfo, entropy->fixed_bin, 1);
+      }
+      st += 2;
+      /* Figure F.8: Encoding the magnitude category of v */
+      m = 0;
+      if (v -= 1) {
+	arith_encode(cinfo, st, 1);
+	m = 1;
+	v2 = v;
+	if (v2 >>= 1) {
+	  arith_encode(cinfo, st, 1);
+	  m <<= 1;
+	  st = entropy->ac_stats[tbl] +
+	       (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+	  while (v2 >>= 1) {
+	    arith_encode(cinfo, st, 1);
+	    m <<= 1;
+	    st += 1;
+	  }
+	}
+      }
+      arith_encode(cinfo, st, 0);
+      /* Figure F.9: Encoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+	arith_encode(cinfo, st, (m & v) ? 1 : 0);
+    }
+    /* Encode EOB decision only if k < cinfo->lim_Se */
+    if (k < cinfo->lim_Se) {
+      st = entropy->ac_stats[tbl] + 3 * k;
+      arith_encode(cinfo, st, 1);
+    }
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Initialize for an arithmetic-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass (j_compress_ptr cinfo, boolean gather_statistics)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  int ci, tbl;
+  jpeg_component_info * compptr;
+
+  if (gather_statistics)
+    /* Make sure to avoid that in the master control logic!
+     * We are fully adaptive here and need no extra
+     * statistics gathering pass!
+     */
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+
+  /* We assume jcmaster.c already validated the progressive scan parameters. */
+
+  /* Select execution routines */
+  if (cinfo->progressive_mode) {
+    if (cinfo->Ah == 0) {
+      if (cinfo->Ss == 0)
+	entropy->pub.encode_mcu = encode_mcu_DC_first;
+      else
+	entropy->pub.encode_mcu = encode_mcu_AC_first;
+    } else {
+      if (cinfo->Ss == 0)
+	entropy->pub.encode_mcu = encode_mcu_DC_refine;
+      else
+	entropy->pub.encode_mcu = encode_mcu_AC_refine;
+    }
+  } else
+    entropy->pub.encode_mcu = encode_mcu;
+
+  /* Allocate & initialize requested statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* DC needs no table for refinement scan */
+    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
+      tbl = compptr->dc_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->dc_stats[tbl] == NULL)
+	entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
+      MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
+      /* Initialize DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    /* AC needs no table when not present */
+    if (cinfo->Se) {
+      tbl = compptr->ac_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->ac_stats[tbl] == NULL)
+	entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
+      MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
+#ifdef CALCULATE_SPECTRAL_CONDITIONING
+      if (cinfo->progressive_mode)
+	/* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */
+	cinfo->arith_ac_K[tbl] = cinfo->Ss + ((8 + cinfo->Se - cinfo->Ss) >> 4);
+#endif
+    }
+  }
+
+  /* Initialize arithmetic encoding variables */
+  entropy->c = 0;
+  entropy->a = 0x10000L;
+  entropy->sc = 0;
+  entropy->zc = 0;
+  entropy->ct = 11;
+  entropy->buffer = -1;  /* empty */
+
+  /* Initialize restart stuff */
+  entropy->restarts_to_go = cinfo->restart_interval;
+  entropy->next_restart_num = 0;
+}
+
+
+/*
+ * Module initialization routine for arithmetic entropy encoding.
+ */
+
+GLOBAL(void)
+jinit_arith_encoder (j_compress_ptr cinfo)
+{
+  arith_entropy_ptr entropy;
+  int i;
+
+  entropy = (arith_entropy_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(arith_entropy_encoder));
+  cinfo->entropy = &entropy->pub;
+  entropy->pub.start_pass = start_pass;
+  entropy->pub.finish_pass = finish_pass;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    entropy->dc_stats[i] = NULL;
+    entropy->ac_stats[i] = NULL;
+  }
+
+  /* Initialize index for fixed probability estimation */
+  entropy->fixed_bin[0] = 113;
+}
diff --git a/cmake/externals/libjpeg/jccoefct.c b/cmake/externals/libjpeg/jccoefct.c
new file mode 100644
index 000000000..494aa2298
--- /dev/null
+++ b/cmake/externals/libjpeg/jccoefct.c
@@ -0,0 +1,456 @@
+/*
+ * jccoefct.c
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * Modified 2003-2022 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the coefficient buffer controller for compression.
+ * This controller is the top level of the JPEG compressor proper.
+ * The coefficient buffer lies between forward-DCT and entropy encoding steps.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* We use a full-image coefficient buffer when doing Huffman optimization,
+ * and also for writing multiple-scan JPEG files.  In all cases, the DCT
+ * step is run during the first pass, and subsequent passes need only read
+ * the buffered coefficients.
+ */
+#ifdef ENTROPY_OPT_SUPPORTED
+#define FULL_COEF_BUFFER_SUPPORTED
+#else
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+#define FULL_COEF_BUFFER_SUPPORTED
+#endif
+#endif
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_coef_controller pub; /* public fields */
+
+  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
+  JDIMENSION MCU_ctr;		/* counts MCUs processed in current row */
+  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+
+  /* For single-pass compression, it's sufficient to buffer just one MCU
+   * (although this may prove a bit slow in practice).
+   * We append a workspace of C_MAX_BLOCKS_IN_MCU coefficient blocks,
+   * and reuse it for each MCU constructed and sent.
+   * In multi-pass modes, this array points to the current MCU's blocks
+   * within the virtual arrays.
+   */
+  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
+
+  /* In multi-pass modes, we need a virtual block array for each component. */
+  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
+
+  /* Workspace for single-pass compression (omitted otherwise). */
+  JBLOCK blk_buffer[C_MAX_BLOCKS_IN_MCU];
+} my_coef_controller;
+
+typedef my_coef_controller * my_coef_ptr;
+
+
+/* Forward declarations */
+METHODDEF(boolean) compress_data
+    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+METHODDEF(boolean) compress_first_pass
+    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+METHODDEF(boolean) compress_output
+    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+#endif
+
+
+LOCAL(void)
+start_iMCU_row (j_compress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row */
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->MCU_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  coef->iMCU_row_num = 0;
+  start_iMCU_row(cinfo);
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (coef->whole_image[0] != NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub.compress_data = compress_data;
+    break;
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+  case JBUF_SAVE_AND_PASS:
+    if (coef->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub.compress_data = compress_first_pass;
+    break;
+  case JBUF_CRANK_DEST:
+    if (coef->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub.compress_data = compress_output;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+  }
+}
+
+
+/*
+ * Process some data in the single-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the image.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf contains a plane for each component in image,
+ * which we index according to the component's SOF position.
+ */
+
+METHODDEF(boolean)
+compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int ci, xindex, yindex, yoffset, blockcnt;
+  JBLOCKROW blkp;
+  JSAMPARRAY input_ptr;
+  JDIMENSION xpos;
+  jpeg_component_info *compptr;
+  forward_DCT_ptr forward_DCT;
+
+  /* Loop to write as much as one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
+	 MCU_col_num++) {
+      /* Determine where data comes from in input_buf and do the DCT thing.
+       * Each call on forward_DCT processes a horizontal row of DCT blocks as
+       * wide as an MCU.  Dummy blocks at the right or bottom edge are filled in
+       * specially.  The data in them does not matter for image reconstruction,
+       * so we fill them with values that will encode to the smallest amount of
+       * data, viz: all zeroes in the AC entries, DC entries equal to previous
+       * block's DC value.  (Thanks to Thomas Kinsman for this idea.)
+       */
+      blkp = coef->blk_buffer;	/* pointer to current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	forward_DCT = cinfo->fdct->forward_DCT[compptr->component_index];
+	input_ptr = input_buf[compptr->component_index] +
+	  yoffset * compptr->DCT_v_scaled_size;
+	/* ypos == (yoffset + yindex) * compptr->DCT_v_scaled_size */
+	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+						: compptr->last_col_width;
+	xpos = MCU_col_num * compptr->MCU_sample_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  if (coef->iMCU_row_num < last_iMCU_row ||
+	      yoffset + yindex < compptr->last_row_height) {
+	    (*forward_DCT) (cinfo, compptr, input_ptr, blkp,
+			    xpos, (JDIMENSION) blockcnt);
+	    input_ptr += compptr->DCT_v_scaled_size;
+	    blkp += blockcnt;
+	    /* Dummy blocks at right edge */
+	    if ((xindex = compptr->MCU_width - blockcnt) == 0)
+	      continue;
+	  } else {
+	    /* At bottom of image, need a whole row of dummy blocks */
+	    xindex = compptr->MCU_width;
+	  }
+	  /* Fill in any dummy blocks needed in this row */
+	  MEMZERO(blkp, xindex * SIZEOF(JBLOCK));
+	  do {
+	    blkp[0][0] = blkp[-1][0];
+	    blkp++;
+	  } while (--xindex);
+	}
+      }
+      /* Try to write the MCU.  In event of a suspension failure, we will
+       * re-DCT the MCU on restart (a bit inefficient, could be fixed...)
+       */
+      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->MCU_ctr = MCU_col_num;
+	return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+
+/*
+ * Process some data in the first pass of a multi-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the image.
+ * This amount of data is read from the source buffer, DCT'd and quantized,
+ * and saved into the virtual arrays.  We also generate suitable dummy blocks
+ * as needed at the right and lower edges.  (The dummy blocks are constructed
+ * in the virtual arrays, which have been padded appropriately.)  This makes
+ * it possible for subsequent passes not to worry about real vs. dummy blocks.
+ *
+ * We must also emit the data to the entropy encoder.  This is conveniently
+ * done by calling compress_output() after we've loaded the current strip
+ * of the virtual arrays.
+ *
+ * NB: input_buf contains a plane for each component in image.  All
+ * components are DCT'd and loaded into the virtual arrays in this pass.
+ * However, it may be that only a subset of the components are emitted to
+ * the entropy encoder during this first pass; be careful about looking
+ * at the scan-dependent variables (MCU dimensions, etc).
+ */
+
+METHODDEF(boolean)
+compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION blocks_across, MCUs_across, MCUindex;
+  int bi, ci, h_samp_factor, block_row, block_rows, ndummy;
+  JCOEF lastDC;
+  jpeg_component_info *compptr;
+  JBLOCKARRAY buffer;
+  JBLOCKROW thisblockrow, lastblockrow;
+  JSAMPARRAY input_ptr;
+  forward_DCT_ptr forward_DCT;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Align the virtual buffer for this component. */
+    buffer = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[ci],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, TRUE);
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (coef->iMCU_row_num < last_iMCU_row)
+      block_rows = compptr->v_samp_factor;
+    else {
+      /* NB: can't use last_row_height here, since may not be set! */
+      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+    }
+    blocks_across = compptr->width_in_blocks;
+    h_samp_factor = compptr->h_samp_factor;
+    /* Count number of dummy blocks to be added at the right margin. */
+    ndummy = (int) (blocks_across % h_samp_factor);
+    if (ndummy > 0)
+      ndummy = h_samp_factor - ndummy;
+    forward_DCT = cinfo->fdct->forward_DCT[ci];
+    input_ptr = input_buf[ci];
+    /* Perform DCT for all non-dummy blocks in this iMCU row.  Each call
+     * on forward_DCT processes a complete horizontal row of DCT blocks.
+     */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      thisblockrow = buffer[block_row];
+      (*forward_DCT) (cinfo, compptr, input_ptr, thisblockrow,
+		      (JDIMENSION) 0, blocks_across);
+      input_ptr += compptr->DCT_v_scaled_size;
+      if (ndummy > 0) {
+	/* Create dummy blocks at the right edge of the image. */
+	thisblockrow += blocks_across; /* => first dummy block */
+	FMEMZERO((void FAR *) thisblockrow, ndummy * SIZEOF(JBLOCK));
+	lastDC = thisblockrow[-1][0];
+	for (bi = 0; bi < ndummy; bi++) {
+	  thisblockrow[bi][0] = lastDC;
+	}
+      }
+    }
+    /* If at end of image, create dummy block rows as needed.
+     * The tricky part here is that within each MCU, we want the DC values
+     * of the dummy blocks to match the last real block's DC value.
+     * This squeezes a few more bytes out of the resulting file...
+     */
+    if (block_row < compptr->v_samp_factor) {
+      blocks_across += ndummy;	/* include lower right corner */
+      MCUs_across = blocks_across / h_samp_factor;
+      do {
+	thisblockrow = buffer[block_row];
+	lastblockrow = buffer[block_row-1];
+	FMEMZERO((void FAR *) thisblockrow,
+		 (size_t) blocks_across * SIZEOF(JBLOCK));
+	for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) {
+	  lastDC = lastblockrow[h_samp_factor-1][0];
+	  for (bi = 0; bi < h_samp_factor; bi++) {
+	    thisblockrow[bi][0] = lastDC;
+	  }
+	  thisblockrow += h_samp_factor; /* advance to next MCU in row */
+	  lastblockrow += h_samp_factor;
+	}
+      } while (++block_row < compptr->v_samp_factor);
+    }
+  }
+  /* NB: compress_output will increment iMCU_row_num if successful.
+   * A suspension return will result in redoing all the work above next time.
+   */
+
+  /* Emit data to the entropy encoder, sharing code with subsequent passes */
+  return compress_output(cinfo, input_buf);
+}
+
+
+/*
+ * Process some data in subsequent passes of a multi-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the scan.
+ * The data is obtained from the virtual arrays and fed to the entropy coder.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf is ignored; it is likely to be a NULL pointer.
+ */
+
+METHODDEF(boolean)
+compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  int ci, xindex, yindex, yoffset;
+  JDIMENSION start_col;
+  JBLOCKARRAY blkp;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan.
+   * NB: during first pass, this is safe only because the buffers will
+   * already be aligned properly, so jmemmgr.c won't need to do any I/O.
+   */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, FALSE);
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkp = coef->MCU_buffer;	/* pointer to current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	start_col = MCU_col_num * compptr->MCU_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  buffer_ptr = buffer[ci][yoffset + yindex] + start_col;
+	  xindex = compptr->MCU_width;
+	  do {
+	    *blkp++ = buffer_ptr++;
+	  } while (--xindex);
+	}
+      }
+      /* Try to write the MCU. */
+      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->MCU_ctr = MCU_col_num;
+	return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+#endif /* FULL_COEF_BUFFER_SUPPORTED */
+
+
+/*
+ * Initialize coefficient buffer controller.
+ */
+
+GLOBAL(void)
+jinit_c_coef_controller (j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_coef_ptr coef;
+
+  if (need_full_buffer) {
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+    /* Allocate a full-image virtual array for each component, */
+    /* padded to a multiple of samp_factor DCT blocks in each direction. */
+    int ci;
+    jpeg_component_info *compptr;
+
+    coef = (my_coef_ptr) (*cinfo->mem->alloc_small)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       SIZEOF(my_coef_controller) - SIZEOF(coef->blk_buffer));
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
+				(long) compptr->h_samp_factor),
+	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+				(long) compptr->v_samp_factor),
+	 (JDIMENSION) compptr->v_samp_factor);
+    }
+#else
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif
+  } else {
+    /* We only need a single-MCU buffer. */
+    JBLOCKARRAY blkp;
+    JBLOCKROW buffer_ptr;
+    int bi;
+
+    coef = (my_coef_ptr) (*cinfo->mem->alloc_small)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_coef_controller));
+    blkp = coef->MCU_buffer;
+    buffer_ptr = coef->blk_buffer;
+    bi = C_MAX_BLOCKS_IN_MCU;
+    do {
+      *blkp++ = buffer_ptr++;
+    } while (--bi);
+    coef->whole_image[0] = NULL; /* flag for no virtual arrays */
+  }
+
+  coef->pub.start_pass = start_pass_coef;
+  cinfo->coef = &coef->pub;
+}
diff --git a/cmake/externals/libjpeg/jccolor.c b/cmake/externals/libjpeg/jccolor.c
new file mode 100644
index 000000000..c028dd9db
--- /dev/null
+++ b/cmake/externals/libjpeg/jccolor.c
@@ -0,0 +1,598 @@
+/*
+ * jccolor.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modified 2011-2023 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains input colorspace conversion routines.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_converter pub; /* public fields */
+
+  /* Private state for RGB->YCC conversion */
+  INT32 * rgb_ycc_tab;		/* => table for RGB to YCbCr conversion */
+} my_color_converter;
+
+typedef my_color_converter * my_cconvert_ptr;
+
+
+/**************** RGB -> YCbCr conversion: most common case **************/
+
+/*
+ * YCbCr is defined per Recommendation ITU-R BT.601-7 (03/2011),
+ * previously known as Recommendation CCIR 601-1, except that Cb and Cr
+ * are normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * sRGB (standard RGB color space) is defined per IEC 61966-2-1:1999.
+ * sYCC (standard luma-chroma-chroma color space with extended gamut)
+ * is defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex F.
+ * bg-sRGB and bg-sYCC (big gamut standard color spaces)
+ * are defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex G.
+ * Note that the derived conversion coefficients given in some of these
+ * documents are imprecise.  The general conversion equations are
+ *	Y  = Kr * R + (1 - Kr - Kb) * G + Kb * B
+ *	Cb = (B - Y) / (1 - Kb) / K
+ *	Cr = (R - Y) / (1 - Kr) / K
+ * With Kr = 0.299 and Kb = 0.114 (derived according to SMPTE RP 177-1993
+ * from the 1953 FCC NTSC primaries and CIE Illuminant C), K = 2 for sYCC,
+ * the conversion equations to be implemented are therefore
+ *	Y  =  0.299 * R + 0.587 * G + 0.114 * B
+ *	Cb = -0.168735892 * R - 0.331264108 * G + 0.5 * B + CENTERJSAMPLE
+ *	Cr =  0.5 * R - 0.418687589 * G - 0.081312411 * B + CENTERJSAMPLE
+ * Note: older versions of the IJG code used a zero offset of MAXJSAMPLE/2,
+ * rather than CENTERJSAMPLE, for Cb and Cr.  This gave equal positive and
+ * negative swings for Cb/Cr, but meant that grayscale values (Cb=Cr=0)
+ * were not represented exactly.  Now we sacrifice exact representation of
+ * maximum red and maximum blue in order to get exact grayscales.
+ *
+ * To avoid floating-point arithmetic, we represent the fractional constants
+ * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
+ * the products by 2^16, with appropriate rounding, to get the correct answer.
+ *
+ * For even more speed, we avoid doing any multiplications in the inner loop
+ * by precalculating the constants times R,G,B for all possible values.
+ * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
+ * for 9-bit to 12-bit samples it is still acceptable.  It's not very
+ * reasonable for 16-bit samples, but if you want lossless storage
+ * you shouldn't be changing colorspace anyway.
+ * The CENTERJSAMPLE offsets and the rounding fudge-factor of 0.5 are included
+ * in the tables to save adding them separately in the inner loop.
+ */
+
+#define SCALEBITS	16	/* speediest right-shift on some machines */
+#define CBCR_OFFSET	((INT32) CENTERJSAMPLE << SCALEBITS)
+#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+/* We allocate one big table and divide it up into eight parts, instead of
+ * doing eight alloc_small requests.  This lets us use a single table base
+ * address, which can be held in a register in the inner loops on many
+ * machines (more than can hold all eight addresses, anyway).
+ */
+
+#define R_Y_OFF		0			/* offset to R => Y section */
+#define G_Y_OFF		(1*(MAXJSAMPLE+1))	/* offset to G => Y section */
+#define B_Y_OFF		(2*(MAXJSAMPLE+1))	/* etc. */
+#define R_CB_OFF	(3*(MAXJSAMPLE+1))
+#define G_CB_OFF	(4*(MAXJSAMPLE+1))
+#define B_CB_OFF	(5*(MAXJSAMPLE+1))
+#define R_CR_OFF	B_CB_OFF		/* B=>Cb, R=>Cr are the same */
+#define G_CR_OFF	(6*(MAXJSAMPLE+1))
+#define B_CR_OFF	(7*(MAXJSAMPLE+1))
+#define TABLE_SIZE	(8*(MAXJSAMPLE+1))
+
+
+/*
+ * Initialize for RGB->YCC colorspace conversion.
+ */
+
+METHODDEF(void)
+rgb_ycc_start (j_compress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  INT32 * rgb_ycc_tab;
+  INT32 i;
+
+  /* Allocate and fill in the conversion tables. */
+  cconvert->rgb_ycc_tab = rgb_ycc_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				TABLE_SIZE * SIZEOF(INT32));
+
+  for (i = 0; i <= MAXJSAMPLE; i++) {
+    rgb_ycc_tab[i+R_Y_OFF] = FIX(0.299) * i;
+    rgb_ycc_tab[i+G_Y_OFF] = FIX(0.587) * i;
+    rgb_ycc_tab[i+B_Y_OFF] = FIX(0.114) * i + ONE_HALF;
+    rgb_ycc_tab[i+R_CB_OFF] = (- FIX(0.168735892)) * i;
+    rgb_ycc_tab[i+G_CB_OFF] = (- FIX(0.331264108)) * i;
+    /* We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr.
+     * This ensures that the maximum output will round to MAXJSAMPLE
+     * not MAXJSAMPLE+1, and thus that we don't have to range-limit.
+     */
+    rgb_ycc_tab[i+B_CB_OFF] = (i << (SCALEBITS-1)) + CBCR_OFFSET + ONE_HALF-1;
+/*  B=>Cb and R=>Cr tables are the same
+    rgb_ycc_tab[i+R_CR_OFF] = (i << (SCALEBITS-1)) + CBCR_OFFSET + ONE_HALF-1;
+*/
+    rgb_ycc_tab[i+G_CR_OFF] = (- FIX(0.418687589)) * i;
+    rgb_ycc_tab[i+B_CR_OFF] = (- FIX(0.081312411)) * i;
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ *
+ * Note that we change from the application's interleaved-pixel format
+ * to our internal noninterleaved, one-plane-per-component format.  The
+ * input buffer is therefore three times as wide as the output buffer.
+ *
+ * A starting row offset is provided only for the output buffer.  The
+ * caller can easily adjust the passed input_buf value to accommodate
+ * any row offset required on that side.
+ */
+
+METHODDEF(void)
+rgb_ycc_convert (j_compress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		 JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr0, outptr1, outptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr[RGB_RED]);
+      g = GETJSAMPLE(inptr[RGB_GREEN]);
+      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      inptr += RGB_PIXELSIZE;
+      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
+       * must be too; we do not need an explicit range-limiting operation.
+       * Hence the value being shifted is never negative, and we don't
+       * need the general RIGHT_SHIFT macro.
+       */
+      /* Y */
+      outptr0[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+      /* Cb */
+      outptr1[col] = (JSAMPLE)
+		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
+		 >> SCALEBITS);
+      /* Cr */
+      outptr2[col] = (JSAMPLE)
+		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
+		 >> SCALEBITS);
+    }
+  }
+}
+
+
+/**************** Cases other than RGB -> YCbCr **************/
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles RGB->grayscale conversion,
+ * which is the same as the RGB->Y portion of RGB->YCbCr.
+ * We assume rgb_ycc_start has been called (we only use the Y tables).
+ */
+
+METHODDEF(void)
+rgb_gray_convert (j_compress_ptr cinfo,
+		  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		  JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register INT32 y;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr = output_buf[0][output_row++];
+    for (col = 0; col < num_cols; col++) {
+      y  = ctab[R_Y_OFF + GETJSAMPLE(inptr[RGB_RED])];
+      y += ctab[G_Y_OFF + GETJSAMPLE(inptr[RGB_GREEN])];
+      y += ctab[B_Y_OFF + GETJSAMPLE(inptr[RGB_BLUE])];
+      inptr += RGB_PIXELSIZE;
+      outptr[col] = (JSAMPLE) (y >> SCALEBITS);
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles Adobe-style CMYK->YCCK conversion,
+ * where we convert R=1-C, G=1-M, and B=1-Y to YCbCr using the
+ * same conversion as above, while passing K (black) unchanged.
+ * We assume rgb_ycc_start has been called.
+ */
+
+METHODDEF(void)
+cmyk_ycck_convert (j_compress_ptr cinfo,
+		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		   JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr0, outptr1, outptr2, outptr3;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    outptr3 = output_buf[3][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = MAXJSAMPLE - GETJSAMPLE(inptr[0]);
+      g = MAXJSAMPLE - GETJSAMPLE(inptr[1]);
+      b = MAXJSAMPLE - GETJSAMPLE(inptr[2]);
+      /* K passes through as-is */
+      outptr3[col] = inptr[3];	/* don't need GETJSAMPLE here */
+      inptr += 4;
+      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
+       * must be too; we do not need an explicit range-limiting operation.
+       * Hence the value being shifted is never negative, and we don't
+       * need the general RIGHT_SHIFT macro.
+       */
+      /* Y */
+      outptr0[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+      /* Cb */
+      outptr1[col] = (JSAMPLE)
+		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
+		 >> SCALEBITS);
+      /* Cr */
+      outptr2[col] = (JSAMPLE)
+		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
+		 >> SCALEBITS);
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * [R,G,B] to [R-G,G,B-G] conversion with modulo calculation
+ * (forward reversible color transform).
+ * This can be seen as an adaption of the general RGB->YCbCr
+ * conversion equation with Kr = Kb = 0, while replacing the
+ * normalization by modulo calculation.
+ */
+
+METHODDEF(void)
+rgb_rgb1_convert (j_compress_ptr cinfo,
+		  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		  JDIMENSION output_row, int num_rows)
+{
+  register int r, g, b;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr0, outptr1, outptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr[RGB_RED]);
+      g = GETJSAMPLE(inptr[RGB_GREEN]);
+      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      inptr += RGB_PIXELSIZE;
+      /* Assume that MAXJSAMPLE+1 is a power of 2, so that the MOD
+       * (modulo) operator is equivalent to the bitmask operator AND.
+       */
+      outptr0[col] = (JSAMPLE) ((r - g + CENTERJSAMPLE) & MAXJSAMPLE);
+      outptr1[col] = (JSAMPLE) g;
+      outptr2[col] = (JSAMPLE) ((b - g + CENTERJSAMPLE) & MAXJSAMPLE);
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles grayscale output with no conversion.
+ * The source can be either plain grayscale or YCC (since Y == gray).
+ */
+
+METHODDEF(void)
+grayscale_convert (j_compress_ptr cinfo,
+		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		   JDIMENSION output_row, int num_rows)
+{
+  register JSAMPROW inptr;
+  register JSAMPROW outptr;
+  register JDIMENSION count;
+  register int instride = cinfo->input_components;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr = output_buf[0][output_row++];
+    for (count = num_cols; count > 0; count--) {
+      *outptr++ = *inptr;	/* don't need GETJSAMPLE() here */
+      inptr += instride;
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * No colorspace conversion, but change from interleaved
+ * to separate-planes representation.
+ */
+
+METHODDEF(void)
+rgb_convert (j_compress_ptr cinfo,
+	     JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+	     JDIMENSION output_row, int num_rows)
+{
+  register JSAMPROW inptr;
+  register JSAMPROW outptr0, outptr1, outptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      /* We can dispense with GETJSAMPLE() here */
+      outptr0[col] = inptr[RGB_RED];
+      outptr1[col] = inptr[RGB_GREEN];
+      outptr2[col] = inptr[RGB_BLUE];
+      inptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles multi-component colorspaces without conversion.
+ * We assume input_components == num_components.
+ */
+
+METHODDEF(void)
+null_convert (j_compress_ptr cinfo,
+	      JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+	      JDIMENSION output_row, int num_rows)
+{
+  register JSAMPROW inptr;
+  register JSAMPROW outptr;
+  register JDIMENSION count;
+  register int num_comps = cinfo->num_components;
+  JDIMENSION num_cols = cinfo->image_width;
+  int ci;
+
+  while (--num_rows >= 0) {
+    /* It seems fastest to make a separate pass for each component. */
+    for (ci = 0; ci < num_comps; ci++) {
+      inptr = input_buf[0] + ci;
+      outptr = output_buf[ci][output_row];
+      for (count = num_cols; count > 0; count--) {
+	*outptr++ = *inptr;	/* don't need GETJSAMPLE() here */
+	inptr += num_comps;
+      }
+    }
+    input_buf++;
+    output_row++;
+  }
+}
+
+
+/*
+ * Empty method for start_pass.
+ */
+
+METHODDEF(void)
+null_method (j_compress_ptr cinfo)
+{
+  /* no work needed */
+}
+
+
+/*
+ * Module initialization routine for input colorspace conversion.
+ */
+
+GLOBAL(void)
+jinit_color_converter (j_compress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert;
+
+  cconvert = (my_cconvert_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_color_converter));
+  cinfo->cconvert = &cconvert->pub;
+  /* set start_pass to null method until we find out differently */
+  cconvert->pub.start_pass = null_method;
+
+  /* Make sure input_components agrees with in_color_space */
+  switch (cinfo->in_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->input_components != 1)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  case JCS_RGB:
+  case JCS_BG_RGB:
+#if RGB_PIXELSIZE != 3
+    if (cinfo->input_components != RGB_PIXELSIZE)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+#endif /* else share code with YCbCr */
+
+  case JCS_YCbCr:
+  case JCS_BG_YCC:
+    if (cinfo->input_components != 3)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  case JCS_CMYK:
+  case JCS_YCCK:
+    if (cinfo->input_components != 4)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  default:			/* JCS_UNKNOWN can be anything */
+    if (cinfo->input_components < 1)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+  }
+
+  /* Support color transform only for RGB colorspaces */
+  if (cinfo->color_transform &&
+      cinfo->jpeg_color_space != JCS_RGB &&
+      cinfo->jpeg_color_space != JCS_BG_RGB)
+    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+
+  /* Check num_components, set conversion method based on requested space */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->num_components != 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    switch (cinfo->in_color_space) {
+    case JCS_GRAYSCALE:
+    case JCS_YCbCr:
+    case JCS_BG_YCC:
+      cconvert->pub.color_convert = grayscale_convert;
+      break;
+    case JCS_RGB:
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = rgb_gray_convert;
+      break;
+    default:
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+
+  case JCS_RGB:
+  case JCS_BG_RGB:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space != cinfo->jpeg_color_space)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    switch (cinfo->color_transform) {
+    case JCT_NONE:
+      cconvert->pub.color_convert = rgb_convert;
+      break;
+    case JCT_SUBTRACT_GREEN:
+      cconvert->pub.color_convert = rgb_rgb1_convert;
+      break;
+    default:
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+
+  case JCS_YCbCr:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    switch (cinfo->in_color_space) {
+    case JCS_RGB:
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = rgb_ycc_convert;
+      break;
+    case JCS_YCbCr:
+      cconvert->pub.color_convert = null_convert;
+      break;
+    default:
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+
+  case JCS_BG_YCC:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    switch (cinfo->in_color_space) {
+    case JCS_RGB:
+      /* For conversion from normal RGB input to BG_YCC representation,
+       * the Cb/Cr values are first computed as usual, and then
+       * quantized further after DCT processing by a factor of
+       * 2 in reference to the nominal quantization factor.
+       */
+      /* need quantization scale by factor of 2 after DCT */
+      cinfo->comp_info[1].component_needed = TRUE;
+      cinfo->comp_info[2].component_needed = TRUE;
+      /* compute normal YCC first */
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = rgb_ycc_convert;
+      break;
+    case JCS_YCbCr:
+      /* need quantization scale by factor of 2 after DCT */
+      cinfo->comp_info[1].component_needed = TRUE;
+      cinfo->comp_info[2].component_needed = TRUE;
+      /*FALLTHROUGH*/
+    case JCS_BG_YCC:
+      /* Pass through for BG_YCC input */
+      cconvert->pub.color_convert = null_convert;
+      break;
+    default:
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+
+  case JCS_CMYK:
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space != JCS_CMYK)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    cconvert->pub.color_convert = null_convert;
+    break;
+
+  case JCS_YCCK:
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    switch (cinfo->in_color_space) {
+    case JCS_CMYK:
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = cmyk_ycck_convert;
+      break;
+    case JCS_YCCK:
+      cconvert->pub.color_convert = null_convert;
+      break;
+    default:
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+
+  default:			/* allow null conversion of JCS_UNKNOWN */
+    if (cinfo->jpeg_color_space != cinfo->in_color_space ||
+	cinfo->num_components != cinfo->input_components)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    cconvert->pub.color_convert = null_convert;
+  }
+}
diff --git a/cmake/externals/libjpeg/jcdctmgr.c b/cmake/externals/libjpeg/jcdctmgr.c
new file mode 100644
index 000000000..a48ccd814
--- /dev/null
+++ b/cmake/externals/libjpeg/jcdctmgr.c
@@ -0,0 +1,466 @@
+/*
+ * jcdctmgr.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2003-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the forward-DCT management logic.
+ * This code selects a particular DCT implementation to be used,
+ * and it performs related housekeeping chores including coefficient
+ * quantization.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+
+/* Private subobject for this module */
+
+typedef struct {
+  struct jpeg_forward_dct pub;	/* public fields */
+
+  /* Pointer to the DCT routine actually in use */
+  forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
+
+#ifdef DCT_FLOAT_SUPPORTED
+  /* Same as above for the floating-point case. */
+  float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
+#endif
+} my_fdct_controller;
+
+typedef my_fdct_controller * my_fdct_ptr;
+
+
+/* The allocated post-DCT divisor tables -- big enough for any
+ * supported variant and not identical to the quant table entries,
+ * because of scaling (especially for an unnormalized DCT) --
+ * are pointed to by dct_table in the per-component comp_info
+ * structures.  Each table is given in normal array order.
+ */
+
+typedef union {
+  DCTELEM int_array[DCTSIZE2];
+#ifdef DCT_FLOAT_SUPPORTED
+  FAST_FLOAT float_array[DCTSIZE2];
+#endif
+} divisor_table;
+
+
+/* The current scaled-DCT routines require ISLOW-style divisor tables,
+ * so be sure to compile that code if either ISLOW or SCALING is requested.
+ */
+#ifdef DCT_ISLOW_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#else
+#ifdef DCT_SCALING_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#endif
+#endif
+
+
+/*
+ * Perform forward DCT on one or more blocks of a component.
+ *
+ * The input samples are taken from the sample_data[] array starting at
+ * position start_col, and moving to the right for any additional blocks.
+ * The quantized coefficients are returned in coef_blocks[].
+ */
+
+METHODDEF(void)
+forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
+	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+	     JDIMENSION start_col, JDIMENSION num_blocks)
+/* This version is used for integer DCT implementations. */
+{
+  /* This routine is heavily used, so it's worth coding it tightly. */
+  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
+  forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
+  DCTELEM * divisors = (DCTELEM *) compptr->dct_table;
+  DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
+  JDIMENSION bi;
+
+  for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
+    /* Perform the DCT */
+    (*do_dct) (workspace, sample_data, start_col);
+
+    /* Quantize/descale the coefficients, and store into coef_blocks[] */
+    { register DCTELEM temp, qval;
+      register int i;
+      register JCOEFPTR output_ptr = coef_blocks[bi];
+
+      for (i = 0; i < DCTSIZE2; i++) {
+	qval = divisors[i];
+	temp = workspace[i];
+	/* Divide the coefficient value by qval, ensuring proper rounding.
+	 * Since C does not specify the direction of rounding for negative
+	 * quotients, we have to force the dividend positive for portability.
+	 *
+	 * In most files, at least half of the output values will be zero
+	 * (at default quantization settings, more like three-quarters...)
+	 * so we should ensure that this case is fast.  On many machines,
+	 * a comparison is enough cheaper than a divide to make a special test
+	 * a win.  Since both inputs will be nonnegative, we need only test
+	 * for a < b to discover whether a/b is 0.
+	 * If your machine's division is fast enough, define FAST_DIVIDE.
+	 */
+#ifdef FAST_DIVIDE
+#define DIVIDE_BY(a,b)	a /= b
+#else
+#define DIVIDE_BY(a,b)	if (a >= b) a /= b; else a = 0
+#endif
+	if (temp < 0) {
+	  temp = -temp;
+	  temp += qval>>1;	/* for rounding */
+	  DIVIDE_BY(temp, qval);
+	  temp = -temp;
+	} else {
+	  temp += qval>>1;	/* for rounding */
+	  DIVIDE_BY(temp, qval);
+	}
+	output_ptr[i] = (JCOEF) temp;
+      }
+    }
+  }
+}
+
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+METHODDEF(void)
+forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+		   JDIMENSION start_col, JDIMENSION num_blocks)
+/* This version is used for floating-point DCT implementations. */
+{
+  /* This routine is heavily used, so it's worth coding it tightly. */
+  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
+  float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
+  FAST_FLOAT * divisors = (FAST_FLOAT *) compptr->dct_table;
+  FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
+  JDIMENSION bi;
+
+  for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
+    /* Perform the DCT */
+    (*do_dct) (workspace, sample_data, start_col);
+
+    /* Quantize/descale the coefficients, and store into coef_blocks[] */
+    { register FAST_FLOAT temp;
+      register int i;
+      register JCOEFPTR output_ptr = coef_blocks[bi];
+
+      for (i = 0; i < DCTSIZE2; i++) {
+	/* Apply the quantization and scaling factor */
+	temp = workspace[i] * divisors[i];
+	/* Round to nearest integer.
+	 * Since C does not specify the direction of rounding for negative
+	 * quotients, we have to force the dividend positive for portability.
+	 * The maximum coefficient size is +-16K (for 12-bit data), so this
+	 * code should work for either 16-bit or 32-bit ints.
+	 */
+	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
+      }
+    }
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
+
+
+/*
+ * Initialize for a processing pass.
+ * Verify that all referenced Q-tables are present, and set up
+ * the divisor table for each one.
+ * In the current implementation, DCT of all components is done during
+ * the first pass, even if only some components will be output in the
+ * first scan.  Hence all components should be examined here.
+ */
+
+METHODDEF(void)
+start_pass_fdctmgr (j_compress_ptr cinfo)
+{
+  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
+  int ci, qtblno, i;
+  jpeg_component_info *compptr;
+  int method = 0;
+  JQUANT_TBL * qtbl;
+  DCTELEM * dtbl;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Select the proper DCT routine for this component's scaling */
+    switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
+#ifdef DCT_SCALING_SUPPORTED
+    case ((1 << 8) + 1):
+      fdct->do_dct[ci] = jpeg_fdct_1x1;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((2 << 8) + 2):
+      fdct->do_dct[ci] = jpeg_fdct_2x2;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((3 << 8) + 3):
+      fdct->do_dct[ci] = jpeg_fdct_3x3;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((4 << 8) + 4):
+      fdct->do_dct[ci] = jpeg_fdct_4x4;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((5 << 8) + 5):
+      fdct->do_dct[ci] = jpeg_fdct_5x5;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((6 << 8) + 6):
+      fdct->do_dct[ci] = jpeg_fdct_6x6;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((7 << 8) + 7):
+      fdct->do_dct[ci] = jpeg_fdct_7x7;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((9 << 8) + 9):
+      fdct->do_dct[ci] = jpeg_fdct_9x9;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((10 << 8) + 10):
+      fdct->do_dct[ci] = jpeg_fdct_10x10;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((11 << 8) + 11):
+      fdct->do_dct[ci] = jpeg_fdct_11x11;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((12 << 8) + 12):
+      fdct->do_dct[ci] = jpeg_fdct_12x12;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((13 << 8) + 13):
+      fdct->do_dct[ci] = jpeg_fdct_13x13;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((14 << 8) + 14):
+      fdct->do_dct[ci] = jpeg_fdct_14x14;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((15 << 8) + 15):
+      fdct->do_dct[ci] = jpeg_fdct_15x15;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((16 << 8) + 16):
+      fdct->do_dct[ci] = jpeg_fdct_16x16;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((16 << 8) + 8):
+      fdct->do_dct[ci] = jpeg_fdct_16x8;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((14 << 8) + 7):
+      fdct->do_dct[ci] = jpeg_fdct_14x7;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((12 << 8) + 6):
+      fdct->do_dct[ci] = jpeg_fdct_12x6;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((10 << 8) + 5):
+      fdct->do_dct[ci] = jpeg_fdct_10x5;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((8 << 8) + 4):
+      fdct->do_dct[ci] = jpeg_fdct_8x4;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((6 << 8) + 3):
+      fdct->do_dct[ci] = jpeg_fdct_6x3;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((4 << 8) + 2):
+      fdct->do_dct[ci] = jpeg_fdct_4x2;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((2 << 8) + 1):
+      fdct->do_dct[ci] = jpeg_fdct_2x1;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((8 << 8) + 16):
+      fdct->do_dct[ci] = jpeg_fdct_8x16;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((7 << 8) + 14):
+      fdct->do_dct[ci] = jpeg_fdct_7x14;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((6 << 8) + 12):
+      fdct->do_dct[ci] = jpeg_fdct_6x12;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((5 << 8) + 10):
+      fdct->do_dct[ci] = jpeg_fdct_5x10;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((4 << 8) + 8):
+      fdct->do_dct[ci] = jpeg_fdct_4x8;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((3 << 8) + 6):
+      fdct->do_dct[ci] = jpeg_fdct_3x6;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((2 << 8) + 4):
+      fdct->do_dct[ci] = jpeg_fdct_2x4;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+    case ((1 << 8) + 2):
+      fdct->do_dct[ci] = jpeg_fdct_1x2;
+      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
+      break;
+#endif
+    case ((DCTSIZE << 8) + DCTSIZE):
+      switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+      case JDCT_ISLOW:
+	fdct->do_dct[ci] = jpeg_fdct_islow;
+	method = JDCT_ISLOW;
+	break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+      case JDCT_IFAST:
+	fdct->do_dct[ci] = jpeg_fdct_ifast;
+	method = JDCT_IFAST;
+	break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+      case JDCT_FLOAT:
+	fdct->do_float_dct[ci] = jpeg_fdct_float;
+	method = JDCT_FLOAT;
+	break;
+#endif
+      default:
+	ERREXIT(cinfo, JERR_NOT_COMPILED);
+      }
+      break;
+    default:
+      ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
+	       compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
+    }
+    qtblno = compptr->quant_tbl_no;
+    /* Make sure specified quantization table is present */
+    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
+	cinfo->quant_tbl_ptrs[qtblno] == NULL)
+      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
+    qtbl = cinfo->quant_tbl_ptrs[qtblno];
+    /* Create divisor table from quant table */
+    switch (method) {
+#ifdef PROVIDE_ISLOW_TABLES
+    case JDCT_ISLOW:
+      /* For LL&M IDCT method, divisors are equal to raw quantization
+       * coefficients multiplied by 8 (to counteract scaling).
+       */
+      dtbl = (DCTELEM *) compptr->dct_table;
+      for (i = 0; i < DCTSIZE2; i++) {
+	dtbl[i] =
+	  ((DCTELEM) qtbl->quantval[i]) << (compptr->component_needed ? 4 : 3);
+      }
+      fdct->pub.forward_DCT[ci] = forward_DCT;
+      break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+    case JDCT_IFAST:
+      {
+	/* For AA&N IDCT method, divisors are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * We apply a further scale factor of 8.
+	 */
+#define CONST_BITS 14
+	static const INT16 aanscales[DCTSIZE2] = {
+	  /* precomputed values scaled up by 14 bits */
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+	};
+	SHIFT_TEMPS
+
+	dtbl = (DCTELEM *) compptr->dct_table;
+	for (i = 0; i < DCTSIZE2; i++) {
+	  dtbl[i] = (DCTELEM)
+	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
+				  (INT32) aanscales[i]),
+		    compptr->component_needed ? CONST_BITS-4 : CONST_BITS-3);
+	}
+      }
+      fdct->pub.forward_DCT[ci] = forward_DCT;
+      break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+    case JDCT_FLOAT:
+      {
+	/* For float AA&N IDCT method, divisors are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * We apply a further scale factor of 8.
+	 * What's actually stored is 1/divisor so that the inner loop can
+	 * use a multiplication rather than a division.
+	 */
+	FAST_FLOAT * fdtbl = (FAST_FLOAT *) compptr->dct_table;
+	int row, col;
+	static const double aanscalefactor[DCTSIZE] = {
+	  1.0, 1.387039845, 1.306562965, 1.175875602,
+	  1.0, 0.785694958, 0.541196100, 0.275899379
+	};
+
+	i = 0;
+	for (row = 0; row < DCTSIZE; row++) {
+	  for (col = 0; col < DCTSIZE; col++) {
+	    fdtbl[i] = (FAST_FLOAT)
+	      (1.0 / ((double) qtbl->quantval[i] *
+		      aanscalefactor[row] * aanscalefactor[col] *
+		      (compptr->component_needed ? 16.0 : 8.0)));
+	    i++;
+	  }
+	}
+      }
+      fdct->pub.forward_DCT[ci] = forward_DCT_float;
+      break;
+#endif
+    default:
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+    }
+  }
+}
+
+
+/*
+ * Initialize FDCT manager.
+ */
+
+GLOBAL(void)
+jinit_forward_dct (j_compress_ptr cinfo)
+{
+  my_fdct_ptr fdct;
+  int ci;
+  jpeg_component_info *compptr;
+
+  fdct = (my_fdct_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_fdct_controller));
+  cinfo->fdct = &fdct->pub;
+  fdct->pub.start_pass = start_pass_fdctmgr;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Allocate a divisor table for each component */
+    compptr->dct_table = (*cinfo->mem->alloc_small)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(divisor_table));
+  }
+}
diff --git a/cmake/externals/libjpeg/jchuff.c b/cmake/externals/libjpeg/jchuff.c
new file mode 100644
index 000000000..1f527b218
--- /dev/null
+++ b/cmake/externals/libjpeg/jchuff.c
@@ -0,0 +1,1656 @@
+/*
+ * jchuff.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2006-2023 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains Huffman entropy encoding routines.
+ * Both sequential and progressive modes are supported in this single module.
+ *
+ * Much of the complexity here has to do with supporting output suspension.
+ * If the data destination module demands suspension, we want to be able to
+ * back up to the start of the current MCU.  To do this, we copy state
+ * variables into local working storage, and update them back to the
+ * permanent JPEG objects only upon successful completion of an MCU.
+ *
+ * We do not support output suspension for the progressive JPEG mode, since
+ * the library currently does not allow multiple-scan files to be written
+ * with output suspension.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* The legal range of a DCT coefficient is
+ *  -1024 .. +1023  for 8-bit sample data precision;
+ * -16384 .. +16383 for 12-bit sample data precision.
+ * Hence the magnitude should always fit in sample data precision + 2 bits.
+ */
+
+/* Derived data constructed for each Huffman table */
+
+typedef struct {
+  unsigned int ehufco[256];	/* code for each symbol */
+  char ehufsi[256];		/* length of code for each symbol */
+  /* If no code has been allocated for a symbol S, ehufsi[S] contains 0 */
+} c_derived_tbl;
+
+
+/* Expanded entropy encoder object for Huffman encoding.
+ *
+ * The savable_state subrecord contains fields that change within an MCU,
+ * but must not be updated permanently until we complete the MCU.
+ */
+
+typedef struct {
+  INT32 put_buffer;		/* current bit-accumulation buffer */
+  int put_bits;			/* # of bits now in it */
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+} savable_state;
+
+/* This macro is to work around compilers with missing or broken
+ * structure assignment.  You'll need to fix this code if you have
+ * such a compiler and you change MAX_COMPS_IN_SCAN.
+ */
+
+#ifndef NO_STRUCT_ASSIGN
+#define ASSIGN_STATE(dest,src)  ((dest) = (src))
+#else
+#if MAX_COMPS_IN_SCAN == 4
+#define ASSIGN_STATE(dest,src)  \
+	((dest).put_buffer = (src).put_buffer, \
+	 (dest).put_bits = (src).put_bits, \
+	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
+	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
+	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
+	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+#endif
+#endif
+
+
+typedef struct {
+  struct jpeg_entropy_encoder pub; /* public fields */
+
+  savable_state saved;		/* Bit buffer & DC state at start of MCU */
+
+  /* These fields are NOT loaded into local working state. */
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+  int next_restart_num;		/* next restart number to write (0-7) */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  c_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
+  c_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
+
+  /* Statistics tables for optimization */
+  long * dc_count_ptrs[NUM_HUFF_TBLS];
+  long * ac_count_ptrs[NUM_HUFF_TBLS];
+
+  /* Following fields used only in progressive mode */
+
+  /* Mode flag: TRUE for optimization, FALSE for actual data output */
+  boolean gather_statistics;
+
+  /* next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
+   */
+  JOCTET * next_output_byte;	/* => next byte to write in buffer */
+  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+  j_compress_ptr cinfo;		/* link to cinfo (needed for dump_buffer) */
+
+  /* Coding status for AC components */
+  int ac_tbl_no;		/* the table number of the single component */
+  unsigned int EOBRUN;		/* run length of EOBs */
+  unsigned int BE;		/* # of buffered correction bits before MCU */
+  char * bit_buffer;		/* buffer for correction bits (1 per char) */
+  /* packing correction bits tightly would save some space but cost time... */
+} huff_entropy_encoder;
+
+typedef huff_entropy_encoder * huff_entropy_ptr;
+
+/* Working state while writing an MCU (sequential mode).
+ * This struct contains all the fields that are needed by subroutines.
+ */
+
+typedef struct {
+  JOCTET * next_output_byte;	/* => next byte to write in buffer */
+  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+  savable_state cur;		/* Current bit buffer & DC state */
+  j_compress_ptr cinfo;		/* dump_buffer needs access to this */
+} working_state;
+
+/* MAX_CORR_BITS is the number of bits the AC refinement correction-bit
+ * buffer can hold.  Larger sizes may slightly improve compression, but
+ * 1000 is already well into the realm of overkill.
+ * The minimum safe size is 64 bits.
+ */
+
+#define MAX_CORR_BITS  1000	/* Max # of correction bits I can buffer */
+
+/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
+ * We assume that int right shift is unsigned if INT32 right shift is,
+ * which should be safe.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define ISHIFT_TEMPS	int ishift_temp;
+#define IRIGHT_SHIFT(x,shft)  \
+	((ishift_temp = (x)) < 0 ? \
+	 (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
+	 (ishift_temp >> (shft)))
+#else
+#define ISHIFT_TEMPS
+#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+
+/*
+ * Compute the derived values for a Huffman table.
+ * This routine also performs some validation checks on the table.
+ */
+
+LOCAL(void)
+jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
+			 c_derived_tbl ** pdtbl)
+{
+  JHUFF_TBL *htbl;
+  c_derived_tbl *dtbl;
+  int p, i, l, lastp, si, maxsymbol;
+  char huffsize[257];
+  unsigned int huffcode[257];
+  unsigned int code;
+
+  /* Note that huffsize[] and huffcode[] are filled in code-length order,
+   * paralleling the order of the symbols themselves in htbl->huffval[].
+   */
+
+  /* Find the input Huffman table */
+  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+  htbl =
+    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
+  if (htbl == NULL)
+    htbl = jpeg_std_huff_table((j_common_ptr) cinfo, isDC, tblno);
+
+  /* Allocate a workspace if we haven't already done so. */
+  if (*pdtbl == NULL)
+    *pdtbl = (c_derived_tbl *) (*cinfo->mem->alloc_small)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(c_derived_tbl));
+  dtbl = *pdtbl;
+  
+  /* Figure C.1: make table of Huffman code length for each symbol */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    i = (int) htbl->bits[l];
+    if (i < 0 || p + i > 256)	/* protect against table overrun */
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    while (i--)
+      huffsize[p++] = (char) l;
+  }
+  huffsize[p] = 0;
+  lastp = p;
+  
+  /* Figure C.2: generate the codes themselves */
+  /* We also validate that the counts represent a legal Huffman code tree. */
+
+  code = 0;
+  si = huffsize[0];
+  p = 0;
+  while (huffsize[p]) {
+    while (((int) huffsize[p]) == si) {
+      huffcode[p++] = code;
+      code++;
+    }
+    /* code is now 1 more than the last code used for codelength si; but
+     * it must still fit in si bits, since no code is allowed to be all ones.
+     */
+    if (((INT32) code) >= (((INT32) 1) << si))
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    code <<= 1;
+    si++;
+  }
+  
+  /* Figure C.3: generate encoding tables */
+  /* These are code and size indexed by symbol value */
+
+  /* Set all codeless symbols to have code length 0;
+   * this lets us detect duplicate VAL entries here, and later
+   * allows emit_bits to detect any attempt to emit such symbols.
+   */
+  MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi));
+
+  /* This is also a convenient place to check for out-of-range
+   * and duplicated VAL entries.  We allow 0..255 for AC symbols
+   * but only 0..15 for DC.  (We could constrain them further
+   * based on data depth and mode, but this seems enough.)
+   */
+  maxsymbol = isDC ? 15 : 255;
+
+  for (p = 0; p < lastp; p++) {
+    i = htbl->huffval[p];
+    if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    dtbl->ehufco[i] = huffcode[p];
+    dtbl->ehufsi[i] = huffsize[p];
+  }
+}
+
+
+/* Outputting bytes to the file.
+ * NB: these must be called only when actually outputting,
+ * that is, entropy->gather_statistics == FALSE.
+ */
+
+/* Emit a byte, taking 'action' if must suspend. */
+#define emit_byte_s(state,val,action)  \
+	{ *(state)->next_output_byte++ = (JOCTET) (val);  \
+	  if (--(state)->free_in_buffer == 0)  \
+	    if (! dump_buffer_s(state))  \
+	      { action; } }
+
+/* Emit a byte */
+#define emit_byte_e(entropy,val)  \
+	{ *(entropy)->next_output_byte++ = (JOCTET) (val);  \
+	  if (--(entropy)->free_in_buffer == 0)  \
+	    dump_buffer_e(entropy); }
+
+
+LOCAL(boolean)
+dump_buffer_s (working_state * state)
+/* Empty the output buffer; return TRUE if successful, FALSE if must suspend */
+{
+  struct jpeg_destination_mgr * dest = state->cinfo->dest;
+
+  if (! (*dest->empty_output_buffer) (state->cinfo))
+    return FALSE;
+  /* After a successful buffer dump, must reset buffer pointers */
+  state->next_output_byte = dest->next_output_byte;
+  state->free_in_buffer = dest->free_in_buffer;
+  return TRUE;
+}
+
+
+LOCAL(void)
+dump_buffer_e (huff_entropy_ptr entropy)
+/* Empty the output buffer; we do not support suspension in this case. */
+{
+  struct jpeg_destination_mgr * dest = entropy->cinfo->dest;
+
+  if (! (*dest->empty_output_buffer) (entropy->cinfo))
+    ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);
+  /* After a successful buffer dump, must reset buffer pointers */
+  entropy->next_output_byte = dest->next_output_byte;
+  entropy->free_in_buffer = dest->free_in_buffer;
+}
+
+
+/* Outputting bits to the file */
+
+/* Only the right 24 bits of put_buffer are used; the valid bits are
+ * left-justified in this part.  At most 16 bits can be passed to emit_bits
+ * in one call, and we never retain more than 7 bits in put_buffer
+ * between calls, so 24 bits are sufficient.
+ */
+
+INLINE
+LOCAL(boolean)
+emit_bits_s (working_state * state, unsigned int code, int size)
+/* Emit some bits; return TRUE if successful, FALSE if must suspend */
+{
+  /* This routine is heavily used, so it's worth coding tightly. */
+  register INT32 put_buffer;
+  register int put_bits;
+
+  /* if size is 0, caller used an invalid Huffman table entry */
+  if (size == 0)
+    ERREXIT(state->cinfo, JERR_HUFF_MISSING_CODE);
+
+  /* mask off any extra bits in code */
+  put_buffer = ((INT32) code) & ((((INT32) 1) << size) - 1);
+
+  /* new number of bits in buffer */
+  put_bits = size + state->cur.put_bits;
+
+  put_buffer <<= 24 - put_bits; /* align incoming bits */
+
+  /* and merge with old buffer contents */
+  put_buffer |= state->cur.put_buffer;
+
+  while (put_bits >= 8) {
+    int c = (int) ((put_buffer >> 16) & 0xFF);
+
+    emit_byte_s(state, c, return FALSE);
+    if (c == 0xFF) {		/* need to stuff a zero byte? */
+      emit_byte_s(state, 0, return FALSE);
+    }
+    put_buffer <<= 8;
+    put_bits -= 8;
+  }
+
+  state->cur.put_buffer = put_buffer; /* update state variables */
+  state->cur.put_bits = put_bits;
+
+  return TRUE;
+}
+
+
+INLINE
+LOCAL(void)
+emit_bits_e (huff_entropy_ptr entropy, unsigned int code, int size)
+/* Emit some bits, unless we are in gather mode */
+{
+  /* This routine is heavily used, so it's worth coding tightly. */
+  register INT32 put_buffer;
+  register int put_bits;
+
+  /* if size is 0, caller used an invalid Huffman table entry */
+  if (size == 0)
+    ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
+
+  if (entropy->gather_statistics)
+    return;			/* do nothing if we're only getting stats */
+
+  /* mask off any extra bits in code */
+  put_buffer = ((INT32) code) & ((((INT32) 1) << size) - 1);
+
+  /* new number of bits in buffer */
+  put_bits = size + entropy->saved.put_bits;
+
+  put_buffer <<= 24 - put_bits; /* align incoming bits */
+
+  /* and merge with old buffer contents */
+  put_buffer |= entropy->saved.put_buffer;
+
+  while (put_bits >= 8) {
+    int c = (int) ((put_buffer >> 16) & 0xFF);
+
+    emit_byte_e(entropy, c);
+    if (c == 0xFF) {		/* need to stuff a zero byte? */
+      emit_byte_e(entropy, 0);
+    }
+    put_buffer <<= 8;
+    put_bits -= 8;
+  }
+
+  entropy->saved.put_buffer = put_buffer; /* update variables */
+  entropy->saved.put_bits = put_bits;
+}
+
+
+LOCAL(boolean)
+flush_bits_s (working_state * state)
+{
+  if (! emit_bits_s(state, 0x7F, 7)) /* fill any partial byte with ones */
+    return FALSE;
+  state->cur.put_buffer = 0;	     /* and reset bit-buffer to empty */
+  state->cur.put_bits = 0;
+  return TRUE;
+}
+
+
+LOCAL(void)
+flush_bits_e (huff_entropy_ptr entropy)
+{
+  emit_bits_e(entropy, 0x7F, 7); /* fill any partial byte with ones */
+  entropy->saved.put_buffer = 0; /* and reset bit-buffer to empty */
+  entropy->saved.put_bits = 0;
+}
+
+
+/*
+ * Emit (or just count) a Huffman symbol.
+ */
+
+INLINE
+LOCAL(void)
+emit_dc_symbol (huff_entropy_ptr entropy, int tbl_no, int symbol)
+{
+  if (entropy->gather_statistics)
+    entropy->dc_count_ptrs[tbl_no][symbol]++;
+  else {
+    c_derived_tbl * tbl = entropy->dc_derived_tbls[tbl_no];
+    emit_bits_e(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
+  }
+}
+
+
+INLINE
+LOCAL(void)
+emit_ac_symbol (huff_entropy_ptr entropy, int tbl_no, int symbol)
+{
+  if (entropy->gather_statistics)
+    entropy->ac_count_ptrs[tbl_no][symbol]++;
+  else {
+    c_derived_tbl * tbl = entropy->ac_derived_tbls[tbl_no];
+    emit_bits_e(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
+  }
+}
+
+
+/*
+ * Emit bits from a correction bit buffer.
+ */
+
+LOCAL(void)
+emit_buffered_bits (huff_entropy_ptr entropy, char * bufstart,
+		    unsigned int nbits)
+{
+  if (entropy->gather_statistics)
+    return;			/* no real work */
+
+  while (nbits > 0) {
+    emit_bits_e(entropy, (unsigned int) (*bufstart), 1);
+    bufstart++;
+    nbits--;
+  }
+}
+
+
+/*
+ * Emit any pending EOBRUN symbol.
+ */
+
+LOCAL(void)
+emit_eobrun (huff_entropy_ptr entropy)
+{
+  register int temp, nbits;
+
+  if (entropy->EOBRUN > 0) {	/* if there is any pending EOBRUN */
+    temp = entropy->EOBRUN;
+    nbits = 0;
+    while ((temp >>= 1))
+      nbits++;
+    /* safety check: shouldn't happen given limited correction-bit buffer */
+    if (nbits > 14)
+      ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
+
+    emit_ac_symbol(entropy, entropy->ac_tbl_no, nbits << 4);
+    if (nbits)
+      emit_bits_e(entropy, entropy->EOBRUN, nbits);
+
+    entropy->EOBRUN = 0;
+
+    /* Emit any buffered correction bits */
+    emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);
+    entropy->BE = 0;
+  }
+}
+
+
+/*
+ * Emit a restart marker & resynchronize predictions.
+ */
+
+LOCAL(boolean)
+emit_restart_s (working_state * state, int restart_num)
+{
+  int ci;
+
+  if (! flush_bits_s(state))
+    return FALSE;
+
+  emit_byte_s(state, 0xFF, return FALSE);
+  emit_byte_s(state, JPEG_RST0 + restart_num, return FALSE);
+
+  /* Re-initialize DC predictions to 0 */
+  for (ci = 0; ci < state->cinfo->comps_in_scan; ci++)
+    state->cur.last_dc_val[ci] = 0;
+
+  /* The restart counter is not updated until we successfully write the MCU. */
+
+  return TRUE;
+}
+
+
+LOCAL(void)
+emit_restart_e (huff_entropy_ptr entropy, int restart_num)
+{
+  int ci;
+
+  emit_eobrun(entropy);
+
+  if (! entropy->gather_statistics) {
+    flush_bits_e(entropy);
+    emit_byte_e(entropy, 0xFF);
+    emit_byte_e(entropy, JPEG_RST0 + restart_num);
+  }
+
+  if (entropy->cinfo->Ss == 0) {
+    /* Re-initialize DC predictions to 0 */
+    for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)
+      entropy->saved.last_dc_val[ci] = 0;
+  } else {
+    /* Re-initialize all AC-related fields to 0 */
+    entropy->EOBRUN = 0;
+    entropy->BE = 0;
+  }
+}
+
+
+/*
+ * MCU encoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  register int temp, temp2;
+  register int nbits;
+  int max_coef_bits;
+  int blkn, ci, tbl;
+  ISHIFT_TEMPS
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart_e(entropy, entropy->next_restart_num);
+
+  /* Since we're encoding a difference, the range limit is twice as much. */
+  max_coef_bits = cinfo->data_precision + 3;
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    ci = cinfo->MCU_membership[blkn];
+    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
+
+    /* Compute the DC value after the required point transform by Al.
+     * This is simply an arithmetic right shift.
+     */
+    temp = IRIGHT_SHIFT((int) (MCU_data[blkn][0][0]), cinfo->Al);
+
+    /* DC differences are figured on the point-transformed values. */
+    if ((temp2 = temp - entropy->saved.last_dc_val[ci]) == 0) {
+      /* Count/emit the Huffman-coded symbol for the number of bits */
+      emit_dc_symbol(entropy, tbl, 0);
+
+      continue;
+    }
+
+    entropy->saved.last_dc_val[ci] = temp;
+
+    /* Encode the DC coefficient difference per section G.1.2.1 */
+    if ((temp = temp2) < 0) {
+      temp = -temp;		/* temp is abs value of input */
+      /* For a negative input, want temp2 = bitwise complement of abs(input) */
+      /* This code assumes we are on a two's complement machine */
+      temp2--;
+    }
+
+    /* Find the number of bits needed for the magnitude of the coefficient */
+    nbits = 0;
+    do nbits++;			/* there must be at least one 1 bit */
+    while ((temp >>= 1));
+    /* Check for out-of-range coefficient values */
+    if (nbits > max_coef_bits)
+      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
+    /* Count/emit the Huffman-coded symbol for the number of bits */
+    emit_dc_symbol(entropy, tbl, nbits);
+
+    /* Emit that number of bits of the value, if positive, */
+    /* or the complement of its magnitude, if negative. */
+    emit_bits_e(entropy, (unsigned int) temp2, nbits);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  const int * natural_order;
+  JBLOCKROW block;
+  register int temp, temp2;
+  register int nbits;
+  register int r, k;
+  int Se, Al, max_coef_bits;
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart_e(entropy, entropy->next_restart_num);
+
+  Se = cinfo->Se;
+  Al = cinfo->Al;
+  natural_order = cinfo->natural_order;
+  max_coef_bits = cinfo->data_precision + 2;
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+
+  /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
+  
+  r = 0;			/* r = run length of zeros */
+   
+  for (k = cinfo->Ss; k <= Se; k++) {
+    if ((temp = (*block)[natural_order[k]]) == 0) {
+      r++;
+      continue;
+    }
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value; so the code is
+     * interwoven with finding the abs value (temp) and output bits (temp2).
+     */
+    if (temp < 0) {
+      temp = -temp;		/* temp is abs value of input */
+      /* Apply the point transform, and watch out for case */
+      /* that nonzero coef is zero after point transform. */
+      if ((temp >>= Al) == 0) {
+	r++;
+	continue;
+      }
+      /* For a negative coef, want temp2 = bitwise complement of abs(coef) */
+      temp2 = ~temp;
+    } else {
+      /* Apply the point transform, and watch out for case */
+      /* that nonzero coef is zero after point transform. */
+      if ((temp >>= Al) == 0) {
+	r++;
+	continue;
+      }
+      temp2 = temp;
+    }
+
+    /* Emit any pending EOBRUN */
+    if (entropy->EOBRUN > 0)
+      emit_eobrun(entropy);
+    /* if run length > 15, must emit special run-length-16 codes (0xF0) */
+    while (r > 15) {
+      emit_ac_symbol(entropy, entropy->ac_tbl_no, 0xF0);
+      r -= 16;
+    }
+
+    /* Find the number of bits needed for the magnitude of the coefficient */
+    nbits = 0;
+    do nbits++;			/* there must be at least one 1 bit */
+    while ((temp >>= 1));
+    /* Check for out-of-range coefficient values */
+    if (nbits > max_coef_bits)
+      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
+    /* Count/emit Huffman symbol for run length / number of bits */
+    emit_ac_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits);
+
+    /* Emit that number of bits of the value, if positive, */
+    /* or the complement of its magnitude, if negative. */
+    emit_bits_e(entropy, (unsigned int) temp2, nbits);
+
+    r = 0;			/* reset zero run length */
+  }
+
+  if (r > 0) {			/* If there are trailing zeroes, */
+    entropy->EOBRUN++;		/* count an EOB */
+    if (entropy->EOBRUN == 0x7FFF)
+      emit_eobrun(entropy);	/* force it out to avoid overflow */
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component,
+ * although the spec is not very clear on the point.
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int Al, blkn;
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart_e(entropy, entropy->next_restart_num);
+
+  Al = cinfo->Al;
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    /* We simply emit the Al'th bit of the DC coefficient value. */
+    emit_bits_e(entropy, (unsigned int) (MCU_data[blkn][0][0] >> Al), 1);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  const int * natural_order;
+  JBLOCKROW block;
+  register int temp;
+  register int r, k;
+  int Se, Al;
+  int EOB;
+  char *BR_buffer;
+  unsigned int BR;
+  int absvalues[DCTSIZE2];
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart_e(entropy, entropy->next_restart_num);
+
+  Se = cinfo->Se;
+  Al = cinfo->Al;
+  natural_order = cinfo->natural_order;
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+
+  /* It is convenient to make a pre-pass to determine the transformed
+   * coefficients' absolute values and the EOB position.
+   */
+  EOB = 0;
+  for (k = cinfo->Ss; k <= Se; k++) {
+    temp = (*block)[natural_order[k]];
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value.
+     */
+    if (temp < 0)
+      temp = -temp;		/* temp is abs value of input */
+    temp >>= Al;		/* apply the point transform */
+    absvalues[k] = temp;	/* save abs value for main pass */
+    if (temp == 1)
+      EOB = k;			/* EOB = index of last newly-nonzero coef */
+  }
+
+  /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
+  
+  r = 0;			/* r = run length of zeros */
+  BR = 0;			/* BR = count of buffered bits added now */
+  BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
+
+  for (k = cinfo->Ss; k <= Se; k++) {
+    if ((temp = absvalues[k]) == 0) {
+      r++;
+      continue;
+    }
+
+    /* Emit any required ZRLs, but not if they can be folded into EOB */
+    while (r > 15 && k <= EOB) {
+      /* emit any pending EOBRUN and the BE correction bits */
+      emit_eobrun(entropy);
+      /* Emit ZRL */
+      emit_ac_symbol(entropy, entropy->ac_tbl_no, 0xF0);
+      r -= 16;
+      /* Emit buffered correction bits that must be associated with ZRL */
+      emit_buffered_bits(entropy, BR_buffer, BR);
+      BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
+      BR = 0;
+    }
+
+    /* If the coef was previously nonzero, it only needs a correction bit.
+     * NOTE: a straight translation of the spec's figure G.7 would suggest
+     * that we also need to test r > 15.  But if r > 15, we can only get here
+     * if k > EOB, which implies that this coefficient is not 1.
+     */
+    if (temp > 1) {
+      /* The correction bit is the next bit of the absolute value. */
+      BR_buffer[BR++] = (char) (temp & 1);
+      continue;
+    }
+
+    /* Emit any pending EOBRUN and the BE correction bits */
+    emit_eobrun(entropy);
+
+    /* Count/emit Huffman symbol for run length / number of bits */
+    emit_ac_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1);
+
+    /* Emit output bit for newly-nonzero coef */
+    temp = ((*block)[natural_order[k]] < 0) ? 0 : 1;
+    emit_bits_e(entropy, (unsigned int) temp, 1);
+
+    /* Emit buffered correction bits that must be associated with this code */
+    emit_buffered_bits(entropy, BR_buffer, BR);
+    BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
+    BR = 0;
+    r = 0;			/* reset zero run length */
+  }
+
+  if (r > 0 || BR > 0) {	/* If there are trailing zeroes, */
+    entropy->EOBRUN++;		/* count an EOB */
+    entropy->BE += BR;		/* concat my correction bits to older ones */
+    /* We force out the EOB if we risk either:
+     * 1. overflow of the EOB counter;
+     * 2. overflow of the correction bit buffer during the next MCU.
+     */
+    if (entropy->EOBRUN == 0x7FFF || entropy->BE > (MAX_CORR_BITS-DCTSIZE2+1))
+      emit_eobrun(entropy);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/* Encode a single block's worth of coefficients */
+
+LOCAL(boolean)
+encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val,
+		  c_derived_tbl *dctbl, c_derived_tbl *actbl)
+{
+  register int temp, temp2;
+  register int nbits;
+  register int r, k;
+  int Se = state->cinfo->lim_Se;
+  int max_coef_bits = state->cinfo->data_precision + 3;
+  const int * natural_order = state->cinfo->natural_order;
+
+  /* Encode the DC coefficient difference per section F.1.2.1 */
+
+  if ((temp = block[0] - last_dc_val) == 0) {
+    /* Emit the Huffman-coded symbol for the number of bits */
+    if (! emit_bits_s(state, dctbl->ehufco[0], dctbl->ehufsi[0]))
+      return FALSE;
+  } else {
+    if ((temp2 = temp) < 0) {
+      temp = -temp;		/* temp is abs value of input */
+      /* For a negative input, want temp2 = bitwise complement of abs(input) */
+      /* This code assumes we are on a two's complement machine */
+      temp2--;
+    }
+
+    /* Find the number of bits needed for the magnitude of the coefficient */
+    nbits = 0;
+    do nbits++;			/* there must be at least one 1 bit */
+    while ((temp >>= 1));
+    /* Check for out-of-range coefficient values.
+     * Since we're encoding a difference, the range limit is twice as much.
+     */
+    if (nbits > max_coef_bits)
+      ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
+
+    /* Emit the Huffman-coded symbol for the number of bits */
+    if (! emit_bits_s(state, dctbl->ehufco[nbits], dctbl->ehufsi[nbits]))
+      return FALSE;
+
+    /* Emit that number of bits of the value, if positive, */
+    /* or the complement of its magnitude, if negative. */
+    if (! emit_bits_s(state, (unsigned int) temp2, nbits))
+      return FALSE;
+  }
+
+  /* Encode the AC coefficients per section F.1.2.2 */
+
+  r = 0;			/* r = run length of zeros */
+
+  for (k = 1; k <= Se; k++) {
+    if ((temp = block[natural_order[k]]) == 0) {
+      r++;
+      continue;
+    }
+
+    /* if run length > 15, must emit special run-length-16 codes (0xF0) */
+    while (r > 15) {
+      if (! emit_bits_s(state, actbl->ehufco[0xF0], actbl->ehufsi[0xF0]))
+	return FALSE;
+      r -= 16;
+    }
+
+    if ((temp2 = temp) < 0) {
+      temp = -temp;		/* temp is abs value of input */
+      /* For a negative coef, want temp2 = bitwise complement of abs(coef) */
+      /* This code assumes we are on a two's complement machine */
+      temp2--;
+    }
+
+    /* Find the number of bits needed for the magnitude of the coefficient */
+    nbits = 0;
+    do nbits++;			/* there must be at least one 1 bit */
+    while ((temp >>= 1));
+    /* Check for out-of-range coefficient values.
+     * Use ">=" instead of ">" so can use the
+     * same one larger limit from DC check here.
+     */
+    if (nbits >= max_coef_bits)
+      ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
+
+    /* Emit Huffman symbol for run length / number of bits */
+    temp = (r << 4) + nbits;
+    if (! emit_bits_s(state, actbl->ehufco[temp], actbl->ehufsi[temp]))
+      return FALSE;
+
+    /* Emit that number of bits of the value, if positive, */
+    /* or the complement of its magnitude, if negative. */
+    if (! emit_bits_s(state, (unsigned int) temp2, nbits))
+      return FALSE;
+
+    r = 0;			/* reset zero run length */
+  }
+
+  /* If the last coef(s) were zero, emit an end-of-block code */
+  if (r > 0)
+    if (! emit_bits_s(state, actbl->ehufco[0], actbl->ehufsi[0]))
+      return FALSE;
+
+  return TRUE;
+}
+
+
+/*
+ * Encode and output one MCU's worth of Huffman-compressed coefficients.
+ */
+
+METHODDEF(boolean)
+encode_mcu_huff (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  working_state state;
+  int blkn, ci;
+  jpeg_component_info * compptr;
+
+  /* Load up working state */
+  state.next_output_byte = cinfo->dest->next_output_byte;
+  state.free_in_buffer = cinfo->dest->free_in_buffer;
+  ASSIGN_STATE(state.cur, entropy->saved);
+  state.cinfo = cinfo;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! emit_restart_s(&state, entropy->next_restart_num))
+	return FALSE;
+  }
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+    if (! encode_one_block(&state,
+			   MCU_data[blkn][0], state.cur.last_dc_val[ci],
+			   entropy->dc_derived_tbls[compptr->dc_tbl_no],
+			   entropy->ac_derived_tbls[compptr->ac_tbl_no]))
+      return FALSE;
+    /* Update last_dc_val */
+    state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];
+  }
+
+  /* Completed MCU, so update state */
+  cinfo->dest->next_output_byte = state.next_output_byte;
+  cinfo->dest->free_in_buffer = state.free_in_buffer;
+  ASSIGN_STATE(entropy->saved, state.cur);
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Finish up at the end of a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass_huff (j_compress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  working_state state;
+
+  if (cinfo->progressive_mode) {
+    entropy->next_output_byte = cinfo->dest->next_output_byte;
+    entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+    /* Flush out any buffered data */
+    emit_eobrun(entropy);
+    flush_bits_e(entropy);
+
+    cinfo->dest->next_output_byte = entropy->next_output_byte;
+    cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+  } else {
+    /* Load up working state ... flush_bits needs it */
+    state.next_output_byte = cinfo->dest->next_output_byte;
+    state.free_in_buffer = cinfo->dest->free_in_buffer;
+    ASSIGN_STATE(state.cur, entropy->saved);
+    state.cinfo = cinfo;
+
+    /* Flush out the last data */
+    if (! flush_bits_s(&state))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+
+    /* Update state */
+    cinfo->dest->next_output_byte = state.next_output_byte;
+    cinfo->dest->free_in_buffer = state.free_in_buffer;
+    ASSIGN_STATE(entropy->saved, state.cur);
+  }
+}
+
+
+/*
+ * Huffman coding optimization.
+ *
+ * We first scan the supplied data and count the number of uses of each symbol
+ * that is to be Huffman-coded. (This process MUST agree with the code above.)
+ * Then we build a Huffman coding tree for the observed counts.
+ * Symbols which are not needed at all for the particular image are not
+ * assigned any code, which saves space in the DHT marker as well as in
+ * the compressed data.
+ */
+
+
+/* Process a single block's worth of coefficients */
+
+LOCAL(void)
+htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,
+		 long dc_counts[], long ac_counts[])
+{
+  register int temp;
+  register int nbits;
+  register int r, k;
+  int Se = cinfo->lim_Se;
+  int max_coef_bits = cinfo->data_precision + 3;
+  const int * natural_order = cinfo->natural_order;
+
+  /* Encode the DC coefficient difference per section F.1.2.1 */
+
+  if ((temp = block[0] - last_dc_val) == 0) {
+    /* Count the Huffman symbol for the number of bits */
+    dc_counts[0]++;
+  } else {
+    if (temp < 0)
+      temp = -temp;		/* temp is abs value of input */
+
+    /* Find the number of bits needed for the magnitude of the coefficient */
+    nbits = 0;
+    do nbits++;			/* there must be at least one 1 bit */
+    while ((temp >>= 1));
+    /* Check for out-of-range coefficient values.
+     * Since we're encoding a difference, the range limit is twice as much.
+     */
+    if (nbits > max_coef_bits)
+      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
+    /* Count the Huffman symbol for the number of bits */
+    dc_counts[nbits]++;
+  }
+
+  /* Encode the AC coefficients per section F.1.2.2 */
+
+  r = 0;			/* r = run length of zeros */
+
+  for (k = 1; k <= Se; k++) {
+    if ((temp = block[natural_order[k]]) == 0) {
+      r++;
+      continue;
+    }
+
+    /* if run length > 15, must emit special run-length-16 codes (0xF0) */
+    while (r > 15) {
+      ac_counts[0xF0]++;
+      r -= 16;
+    }
+
+    if (temp < 0)
+      temp = -temp;		/* temp is abs value of input */
+
+    /* Find the number of bits needed for the magnitude of the coefficient */
+    nbits = 0;
+    do nbits++;			/* there must be at least one 1 bit */
+    while ((temp >>= 1));
+    /* Check for out-of-range coefficient values.
+     * Use ">=" instead of ">" so can use the
+     * same one larger limit from DC check here.
+     */
+    if (nbits >= max_coef_bits)
+      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
+    /* Count Huffman symbol for run length / number of bits */
+    ac_counts[(r << 4) + nbits]++;
+
+    r = 0;			/* reset zero run length */
+  }
+
+  /* If the last coef(s) were zero, emit an end-of-block code */
+  if (r > 0)
+    ac_counts[0]++;
+}
+
+
+/*
+ * Trial-encode one MCU's worth of Huffman-compressed coefficients.
+ * No data is actually output, so no suspension return is possible.
+ */
+
+METHODDEF(boolean)
+encode_mcu_gather (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int blkn, ci;
+  jpeg_component_info * compptr;
+
+  /* Take care of restart intervals if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      /* Re-initialize DC predictions to 0 */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+	entropy->saved.last_dc_val[ci] = 0;
+      /* Update restart state */
+      entropy->restarts_to_go = cinfo->restart_interval;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+    htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci],
+		    entropy->dc_count_ptrs[compptr->dc_tbl_no],
+		    entropy->ac_count_ptrs[compptr->ac_tbl_no]);
+    entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0];
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Generate the best Huffman code table for the given counts, fill htbl.
+ *
+ * The JPEG standard requires that no symbol be assigned a codeword of all
+ * one bits (so that padding bits added at the end of a compressed segment
+ * can't look like a valid code).  Because of the canonical ordering of
+ * codewords, this just means that there must be an unused slot in the
+ * longest codeword length category.  Section K.2 of the JPEG spec suggests
+ * reserving such a slot by pretending that symbol 256 is a valid symbol
+ * with count 1.  In theory that's not optimal; giving it count zero but
+ * including it in the symbol set anyway should give a better Huffman code.
+ * But the theoretically better code actually seems to come out worse in
+ * practice, because it produces more all-ones bytes (which incur stuffed
+ * zero bytes in the final file).  In any case the difference is tiny.
+ *
+ * The JPEG standard requires Huffman codes to be no more than 16 bits long.
+ * If some symbols have a very small but nonzero probability, the Huffman tree
+ * must be adjusted to meet the code length restriction.  We currently use
+ * the adjustment method suggested in JPEG section K.2.  This method is *not*
+ * optimal; it may not choose the best possible limited-length code.  But
+ * typically only very-low-frequency symbols will be given less-than-optimal
+ * lengths, so the code is almost optimal.  Experimental comparisons against
+ * an optimal limited-length-code algorithm indicate that the difference is
+ * microscopic --- usually less than a hundredth of a percent of total size.
+ * So the extra complexity of an optimal algorithm doesn't seem worthwhile.
+ */
+
+LOCAL(void)
+jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])
+{
+#define MAX_CLEN 32		/* assumed maximum initial code length */
+  UINT8 bits[MAX_CLEN+1];	/* bits[k] = # of symbols with code length k */
+  int codesize[257];		/* codesize[k] = code length of symbol k */
+  int others[257];		/* next symbol in current branch of tree */
+  int c1, c2, i, j;
+  UINT8 *p;
+  long v;
+
+  freq[256] = 1;		/* make sure 256 has a nonzero count */
+  /* Including the pseudo-symbol 256 in the Huffman procedure guarantees
+   * that no real symbol is given code-value of all ones, because 256
+   * will be placed last in the largest codeword category.
+   * In the symbol list build procedure this element serves as sentinel
+   * for the zero run loop.
+   */
+
+#ifndef DONT_USE_FANCY_HUFF_OPT
+
+  /* Build list of symbols sorted in order of descending frequency */
+  /* This approach has several benefits (thank to John Korejwa for the idea):
+   *     1.
+   * If a codelength category is split during the length limiting procedure
+   * below, the feature that more frequent symbols are assigned shorter
+   * codewords remains valid for the adjusted code.
+   *     2.
+   * To reduce consecutive ones in a Huffman data stream (thus reducing the
+   * number of stuff bytes in JPEG) it is preferable to follow 0 branches
+   * (and avoid 1 branches) as much as possible.  This is easily done by
+   * assigning symbols to leaves of the Huffman tree in order of decreasing
+   * frequency, with no secondary sort based on codelengths.
+   *     3.
+   * The symbol list can be built independently from the assignment of code
+   * lengths by the Huffman procedure below.
+   * Note: The symbol list build procedure must be performed first, because
+   * the Huffman procedure assigning the codelengths clobbers the frequency
+   * counts!
+   */
+
+  /* Here we use the others array as a linked list of nonzero frequencies
+   * to be sorted.  Already sorted elements are removed from the list.
+   */
+
+  /* Building list */
+
+  /* This item does not correspond to a valid symbol frequency and is used
+   * as starting index.
+   */
+  j = 256;
+
+  for (i = 0;; i++) {
+    if (freq[i] == 0)		/* skip zero frequencies */
+      continue;
+    if (i > 255)
+      break;
+    others[j] = i;		/* this symbol value */
+    j = i;			/* previous symbol value */
+  }
+  others[j] = -1;		/* mark end of list */
+
+  /* Sorting list */
+
+  p = htbl->huffval;
+  while ((c1 = others[256]) >= 0) {
+    v = freq[c1];
+    i = c1;			/* first symbol value */
+    j = 256;			/* pseudo symbol value for starting index */
+    while ((c2 = others[c1]) >= 0) {
+      if (freq[c2] > v) {
+	v = freq[c2];
+	i = c2;			/* this symbol value */
+	j = c1;			/* previous symbol value */
+      }
+      c1 = c2;
+    }
+    others[j] = others[i];	/* remove this symbol i from list */
+    *p++ = (UINT8) i;
+  }
+
+#endif /* DONT_USE_FANCY_HUFF_OPT */
+
+  /* This algorithm is explained in section K.2 of the JPEG standard */
+
+  MEMZERO(bits, SIZEOF(bits));
+  MEMZERO(codesize, SIZEOF(codesize));
+  for (i = 0; i < 257; i++)
+    others[i] = -1;		/* init links to empty */
+
+  /* Huffman's basic algorithm to assign optimal code lengths to symbols */
+
+  for (;;) {
+    /* Find the smallest nonzero frequency, set c1 = its symbol */
+    /* In case of ties, take the larger symbol number */
+    c1 = -1;
+    v = 1000000000L;
+    for (i = 0; i <= 256; i++) {
+      if (freq[i] && freq[i] <= v) {
+	v = freq[i];
+	c1 = i;
+      }
+    }
+
+    /* Find the next smallest nonzero frequency, set c2 = its symbol */
+    /* In case of ties, take the larger symbol number */
+    c2 = -1;
+    v = 1000000000L;
+    for (i = 0; i <= 256; i++) {
+      if (freq[i] && freq[i] <= v && i != c1) {
+	v = freq[i];
+	c2 = i;
+      }
+    }
+
+    /* Done if we've merged everything into one frequency */
+    if (c2 < 0)
+      break;
+
+    /* Else merge the two counts/trees */
+    freq[c1] += freq[c2];
+    freq[c2] = 0;
+
+    /* Increment the codesize of everything in c1's tree branch */
+    codesize[c1]++;
+    while (others[c1] >= 0) {
+      c1 = others[c1];
+      codesize[c1]++;
+    }
+
+    others[c1] = c2;		/* chain c2 onto c1's tree branch */
+
+    /* Increment the codesize of everything in c2's tree branch */
+    codesize[c2]++;
+    while (others[c2] >= 0) {
+      c2 = others[c2];
+      codesize[c2]++;
+    }
+  }
+
+  /* Now count the number of symbols of each code length */
+  for (i = 0; i <= 256; i++) {
+    if (codesize[i]) {
+      /* The JPEG standard seems to think that this can't happen, */
+      /* but I'm paranoid... */
+      if (codesize[i] > MAX_CLEN)
+	ERREXIT(cinfo, JERR_HUFF_CLEN_OUTOFBOUNDS);
+
+      bits[codesize[i]]++;
+    }
+  }
+
+  /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
+   * Huffman procedure assigned any such lengths, we must adjust the coding.
+   * Here is what the JPEG spec says about how this next bit works:
+   * Since symbols are paired for the longest Huffman code, the symbols are
+   * removed from this length category two at a time.  The prefix for the pair
+   * (which is one bit shorter) is allocated to one of the pair; then,
+   * skipping the BITS entry for that prefix length, a code word from the next
+   * shortest nonzero BITS entry is converted into a prefix for two code words
+   * one bit longer.
+   */
+
+  for (i = MAX_CLEN; i > 16; i--) {
+    while (bits[i] > 0) {
+      j = i - 2;		/* find length of new prefix to be used */
+      while (bits[j] == 0) {
+	if (j == 0)
+	  ERREXIT(cinfo, JERR_HUFF_CLEN_OUTOFBOUNDS);
+	j--;
+      }
+
+      bits[i] -= 2;		/* remove two symbols */
+      bits[i-1]++;		/* one goes in this length */
+      bits[j+1] += 2;		/* two new symbols in this length */
+      bits[j]--;		/* symbol of this length is now a prefix */
+    }
+  }
+
+  /* Remove the count for the pseudo-symbol 256 from the largest codelength */
+  while (bits[i] == 0)		/* find largest codelength still in use */
+    i--;
+  bits[i]--;
+
+  /* Return final symbol counts (only for lengths 0..16) */
+  MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));
+
+#ifdef DONT_USE_FANCY_HUFF_OPT
+
+  /* Return a list of the symbols sorted by code length */
+  /* Note: Due to the codelength changes made above, it can happen
+   * that more frequent symbols are assigned longer codewords.
+   */
+  p = htbl->huffval;
+  for (i = 1; i <= MAX_CLEN; i++) {
+    for (j = 0; j <= 255; j++) {
+      if (codesize[j] == i) {
+	*p++ = (UINT8) j;
+      }
+    }
+  }
+
+#endif /* DONT_USE_FANCY_HUFF_OPT */
+
+  /* Set sent_table FALSE so updated table will be written to JPEG file. */
+  htbl->sent_table = FALSE;
+}
+
+
+/*
+ * Finish up a statistics-gathering pass and create the new Huffman tables.
+ */
+
+METHODDEF(void)
+finish_pass_gather (j_compress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci, tbl;
+  jpeg_component_info * compptr;
+  JHUFF_TBL **htblptr;
+  boolean did_dc[NUM_HUFF_TBLS];
+  boolean did_ac[NUM_HUFF_TBLS];
+
+  if (cinfo->progressive_mode)
+    /* Flush out buffered data (all we care about is counting the EOB symbol) */
+    emit_eobrun(entropy);
+
+  /* It's important not to apply jpeg_gen_optimal_table more than once
+   * per table, because it clobbers the input frequency counts!
+   */
+  MEMZERO(did_dc, SIZEOF(did_dc));
+  MEMZERO(did_ac, SIZEOF(did_ac));
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* DC needs no table for refinement scan */
+    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
+      tbl = compptr->dc_tbl_no;
+      if (! did_dc[tbl]) {
+	htblptr = & cinfo->dc_huff_tbl_ptrs[tbl];
+	if (*htblptr == NULL)
+	  *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+	jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[tbl]);
+	did_dc[tbl] = TRUE;
+      }
+    }
+    /* AC needs no table when not present */
+    if (cinfo->Se) {
+      tbl = compptr->ac_tbl_no;
+      if (! did_ac[tbl]) {
+	htblptr = & cinfo->ac_huff_tbl_ptrs[tbl];
+	if (*htblptr == NULL)
+	  *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+	jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[tbl]);
+	did_ac[tbl] = TRUE;
+      }
+    }
+  }
+}
+
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ * If gather_statistics is TRUE, we do not output anything during the scan,
+ * just count the Huffman symbols used and generate Huffman code tables.
+ */
+
+METHODDEF(void)
+start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci, tbl;
+  jpeg_component_info * compptr;
+
+  if (gather_statistics)
+    entropy->pub.finish_pass = finish_pass_gather;
+  else
+    entropy->pub.finish_pass = finish_pass_huff;
+
+  if (cinfo->progressive_mode) {
+    entropy->cinfo = cinfo;
+    entropy->gather_statistics = gather_statistics;
+
+    /* We assume jcmaster.c already validated the scan parameters. */
+
+    /* Select execution routine */
+    if (cinfo->Ah == 0) {
+      if (cinfo->Ss == 0)
+	entropy->pub.encode_mcu = encode_mcu_DC_first;
+      else
+	entropy->pub.encode_mcu = encode_mcu_AC_first;
+    } else {
+      if (cinfo->Ss == 0)
+	entropy->pub.encode_mcu = encode_mcu_DC_refine;
+      else {
+	entropy->pub.encode_mcu = encode_mcu_AC_refine;
+	/* AC refinement needs a correction bit buffer */
+	if (entropy->bit_buffer == NULL)
+	  entropy->bit_buffer = (char *) (*cinfo->mem->alloc_small)
+	    ((j_common_ptr) cinfo, JPOOL_IMAGE, MAX_CORR_BITS * SIZEOF(char));
+      }
+    }
+
+    /* Initialize AC stuff */
+    entropy->ac_tbl_no = cinfo->cur_comp_info[0]->ac_tbl_no;
+    entropy->EOBRUN = 0;
+    entropy->BE = 0;
+  } else {
+    if (gather_statistics)
+      entropy->pub.encode_mcu = encode_mcu_gather;
+    else
+      entropy->pub.encode_mcu = encode_mcu_huff;
+  }
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* DC needs no table for refinement scan */
+    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
+      tbl = compptr->dc_tbl_no;
+      if (gather_statistics) {
+	/* Check for invalid table index */
+	/* (make_c_derived_tbl does this in the other path) */
+	if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
+	  ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
+	/* Allocate and zero the statistics tables */
+	/* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
+	if (entropy->dc_count_ptrs[tbl] == NULL)
+	  entropy->dc_count_ptrs[tbl] = (long *) (*cinfo->mem->alloc_small)
+	    ((j_common_ptr) cinfo, JPOOL_IMAGE, 257 * SIZEOF(long));
+	MEMZERO(entropy->dc_count_ptrs[tbl], 257 * SIZEOF(long));
+      } else {
+	/* Compute derived values for Huffman tables */
+	/* We may do this more than once for a table, but it's not expensive */
+	jpeg_make_c_derived_tbl(cinfo, TRUE, tbl,
+				& entropy->dc_derived_tbls[tbl]);
+      }
+      /* Initialize DC predictions to 0 */
+      entropy->saved.last_dc_val[ci] = 0;
+    }
+    /* AC needs no table when not present */
+    if (cinfo->Se) {
+      tbl = compptr->ac_tbl_no;
+      if (gather_statistics) {
+	if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
+	  ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
+	if (entropy->ac_count_ptrs[tbl] == NULL)
+	  entropy->ac_count_ptrs[tbl] = (long *) (*cinfo->mem->alloc_small)
+	    ((j_common_ptr) cinfo, JPOOL_IMAGE, 257 * SIZEOF(long));
+	MEMZERO(entropy->ac_count_ptrs[tbl], 257 * SIZEOF(long));
+      } else {
+	jpeg_make_c_derived_tbl(cinfo, FALSE, tbl,
+				& entropy->ac_derived_tbls[tbl]);
+      }
+    }
+  }
+
+  /* Initialize bit buffer to empty */
+  entropy->saved.put_buffer = 0;
+  entropy->saved.put_bits = 0;
+
+  /* Initialize restart stuff */
+  entropy->restarts_to_go = cinfo->restart_interval;
+  entropy->next_restart_num = 0;
+}
+
+
+/*
+ * Module initialization routine for Huffman entropy encoding.
+ */
+
+GLOBAL(void)
+jinit_huff_encoder (j_compress_ptr cinfo)
+{
+  huff_entropy_ptr entropy;
+  int i;
+
+  entropy = (huff_entropy_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(huff_entropy_encoder));
+  cinfo->entropy = &entropy->pub;
+  entropy->pub.start_pass = start_pass_huff;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
+    entropy->dc_count_ptrs[i] = entropy->ac_count_ptrs[i] = NULL;
+  }
+
+  if (cinfo->progressive_mode)
+    entropy->bit_buffer = NULL;	/* needed only in AC refinement scan */
+}
diff --git a/cmake/externals/libjpeg/jcinit.c b/cmake/externals/libjpeg/jcinit.c
new file mode 100644
index 000000000..2aea7ca2a
--- /dev/null
+++ b/cmake/externals/libjpeg/jcinit.c
@@ -0,0 +1,249 @@
+/*
+ * jcinit.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2003-2017 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains initialization logic for the JPEG compressor.
+ * This routine is in charge of selecting the modules to be executed and
+ * making an initialization call to each one.
+ *
+ * Logically, this code belongs in jcmaster.c.  It's split out because
+ * linking this routine implies linking the entire compression library.
+ * For a transcoding-only application, we want to be able to use jcmaster.c
+ * without linking in the whole library.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Compute JPEG image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
+ */
+
+GLOBAL(void)
+jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo)
+/* Do computations that are needed before master selection phase */
+{
+  /* Sanity check on input image dimensions to prevent overflow in
+   * following calculations.
+   * We do check jpeg_width and jpeg_height in initial_setup in jcmaster.c,
+   * but image_width and image_height can come from arbitrary data,
+   * and we need some space for multiplication by block_size.
+   */
+  if (((long) cinfo->image_width >> 24) || ((long) cinfo->image_height >> 24))
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
+
+#ifdef DCT_SCALING_SUPPORTED
+
+  /* Compute actual JPEG image dimensions and DCT scaling choices. */
+  if (cinfo->scale_num >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/1 scaling */
+    cinfo->jpeg_width = cinfo->image_width * cinfo->block_size;
+    cinfo->jpeg_height = cinfo->image_height * cinfo->block_size;
+    cinfo->min_DCT_h_scaled_size = 1;
+    cinfo->min_DCT_v_scaled_size = 1;
+  } else if (cinfo->scale_num * 2 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/2 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 2L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 2L);
+    cinfo->min_DCT_h_scaled_size = 2;
+    cinfo->min_DCT_v_scaled_size = 2;
+  } else if (cinfo->scale_num * 3 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/3 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 3L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 3L);
+    cinfo->min_DCT_h_scaled_size = 3;
+    cinfo->min_DCT_v_scaled_size = 3;
+  } else if (cinfo->scale_num * 4 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/4 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 4L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 4L);
+    cinfo->min_DCT_h_scaled_size = 4;
+    cinfo->min_DCT_v_scaled_size = 4;
+  } else if (cinfo->scale_num * 5 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/5 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 5L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 5L);
+    cinfo->min_DCT_h_scaled_size = 5;
+    cinfo->min_DCT_v_scaled_size = 5;
+  } else if (cinfo->scale_num * 6 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/6 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 6L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 6L);
+    cinfo->min_DCT_h_scaled_size = 6;
+    cinfo->min_DCT_v_scaled_size = 6;
+  } else if (cinfo->scale_num * 7 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/7 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 7L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 7L);
+    cinfo->min_DCT_h_scaled_size = 7;
+    cinfo->min_DCT_v_scaled_size = 7;
+  } else if (cinfo->scale_num * 8 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/8 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 8L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 8L);
+    cinfo->min_DCT_h_scaled_size = 8;
+    cinfo->min_DCT_v_scaled_size = 8;
+  } else if (cinfo->scale_num * 9 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/9 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 9L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 9L);
+    cinfo->min_DCT_h_scaled_size = 9;
+    cinfo->min_DCT_v_scaled_size = 9;
+  } else if (cinfo->scale_num * 10 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/10 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 10L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 10L);
+    cinfo->min_DCT_h_scaled_size = 10;
+    cinfo->min_DCT_v_scaled_size = 10;
+  } else if (cinfo->scale_num * 11 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/11 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 11L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 11L);
+    cinfo->min_DCT_h_scaled_size = 11;
+    cinfo->min_DCT_v_scaled_size = 11;
+  } else if (cinfo->scale_num * 12 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/12 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 12L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 12L);
+    cinfo->min_DCT_h_scaled_size = 12;
+    cinfo->min_DCT_v_scaled_size = 12;
+  } else if (cinfo->scale_num * 13 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/13 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 13L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 13L);
+    cinfo->min_DCT_h_scaled_size = 13;
+    cinfo->min_DCT_v_scaled_size = 13;
+  } else if (cinfo->scale_num * 14 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/14 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 14L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 14L);
+    cinfo->min_DCT_h_scaled_size = 14;
+    cinfo->min_DCT_v_scaled_size = 14;
+  } else if (cinfo->scale_num * 15 >= cinfo->scale_denom * cinfo->block_size) {
+    /* Provide block_size/15 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 15L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 15L);
+    cinfo->min_DCT_h_scaled_size = 15;
+    cinfo->min_DCT_v_scaled_size = 15;
+  } else {
+    /* Provide block_size/16 scaling */
+    cinfo->jpeg_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 16L);
+    cinfo->jpeg_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 16L);
+    cinfo->min_DCT_h_scaled_size = 16;
+    cinfo->min_DCT_v_scaled_size = 16;
+  }
+
+#else /* !DCT_SCALING_SUPPORTED */
+
+  /* Hardwire it to "no scaling" */
+  cinfo->jpeg_width = cinfo->image_width;
+  cinfo->jpeg_height = cinfo->image_height;
+  cinfo->min_DCT_h_scaled_size = DCTSIZE;
+  cinfo->min_DCT_v_scaled_size = DCTSIZE;
+
+#endif /* DCT_SCALING_SUPPORTED */
+}
+
+
+/*
+ * Master selection of compression modules.
+ * This is done once at the start of processing an image.  We determine
+ * which modules will be used and give them appropriate initialization calls.
+ */
+
+GLOBAL(void)
+jinit_compress_master (j_compress_ptr cinfo)
+{
+  long samplesperrow;
+  JDIMENSION jd_samplesperrow;
+
+  /* For now, precision must match compiled-in value... */
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Sanity check on input image dimensions */
+  if (cinfo->image_height <= 0 || cinfo->image_width <= 0 ||
+      cinfo->input_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  /* Width of an input scanline must be representable as JDIMENSION. */
+  samplesperrow = (long) cinfo->image_width * (long) cinfo->input_components;
+  jd_samplesperrow = (JDIMENSION) samplesperrow;
+  if ((long) jd_samplesperrow != samplesperrow)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+
+  /* Compute JPEG image dimensions and related values. */
+  jpeg_calc_jpeg_dimensions(cinfo);
+
+  /* Initialize master control (includes parameter checking/processing) */
+  jinit_c_master_control(cinfo, FALSE /* full compression */);
+
+  /* Preprocessing */
+  if (! cinfo->raw_data_in) {
+    jinit_color_converter(cinfo);
+    jinit_downsampler(cinfo);
+    jinit_c_prep_controller(cinfo, FALSE /* never need full buffer here */);
+  }
+  /* Forward DCT */
+  jinit_forward_dct(cinfo);
+  /* Entropy encoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code)
+    jinit_arith_encoder(cinfo);
+  else {
+    jinit_huff_encoder(cinfo);
+  }
+
+  /* Need a full-image coefficient buffer in any multi-pass mode. */
+  jinit_c_coef_controller(cinfo,
+		(boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding));
+  jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */);
+
+  jinit_marker_writer(cinfo);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Write the datastream header (SOI) immediately.
+   * Frame and scan headers are postponed till later.
+   * This lets application insert special markers after the SOI.
+   */
+  (*cinfo->marker->write_file_header) (cinfo);
+}
diff --git a/cmake/externals/libjpeg/jcmainct.c b/cmake/externals/libjpeg/jcmainct.c
new file mode 100644
index 000000000..39b97902e
--- /dev/null
+++ b/cmake/externals/libjpeg/jcmainct.c
@@ -0,0 +1,297 @@
+/*
+ * jcmainct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2003-2012 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the main buffer controller for compression.
+ * The main buffer lies between the pre-processor and the JPEG
+ * compressor proper; it holds downsampled data in the JPEG colorspace.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Note: currently, there is no operating mode in which a full-image buffer
+ * is needed at this step.  If there were, that mode could not be used with
+ * "raw data" input, since this module is bypassed in that case.  However,
+ * we've left the code here for possible use in special applications.
+ */
+#undef FULL_MAIN_BUFFER_SUPPORTED
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_main_controller pub; /* public fields */
+
+  JDIMENSION cur_iMCU_row;	/* number of current iMCU row */
+  JDIMENSION rowgroup_ctr;	/* counts row groups received in iMCU row */
+  boolean suspended;		/* remember if we suspended output */
+  J_BUF_MODE pass_mode;		/* current operating mode */
+
+  /* If using just a strip buffer, this points to the entire set of buffers
+   * (we allocate one for each component).  In the full-image case, this
+   * points to the currently accessible strips of the virtual arrays.
+   */
+  JSAMPARRAY buffer[MAX_COMPONENTS];
+
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+  /* If using full-image storage, this array holds pointers to virtual-array
+   * control blocks for each component.  Unused if not full-image storage.
+   */
+  jvirt_sarray_ptr whole_image[MAX_COMPONENTS];
+#endif
+} my_main_controller;
+
+typedef my_main_controller * my_main_ptr;
+
+
+/* Forward declarations */
+METHODDEF(void) process_data_simple_main
+	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
+	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+METHODDEF(void) process_data_buffer_main
+	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
+	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
+#endif
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_main (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+
+  /* Do nothing in raw-data mode. */
+  if (cinfo->raw_data_in)
+    return;
+
+  mainp->cur_iMCU_row = 0;	/* initialize counters */
+  mainp->rowgroup_ctr = 0;
+  mainp->suspended = FALSE;
+  mainp->pass_mode = pass_mode;	/* save mode for use by process_data */
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+    if (mainp->whole_image[0] != NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif
+    mainp->pub.process_data = process_data_simple_main;
+    break;
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+  case JBUF_SAVE_SOURCE:
+  case JBUF_CRANK_DEST:
+  case JBUF_SAVE_AND_PASS:
+    if (mainp->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    mainp->pub.process_data = process_data_buffer_main;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+}
+
+
+/*
+ * Process some data.
+ * This routine handles the simple pass-through mode,
+ * where we have only a strip buffer.
+ */
+
+METHODDEF(void)
+process_data_simple_main (j_compress_ptr cinfo,
+			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+			  JDIMENSION in_rows_avail)
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+
+  while (mainp->cur_iMCU_row < cinfo->total_iMCU_rows) {
+    /* Read input data if we haven't filled the main buffer yet */
+    if (mainp->rowgroup_ctr < (JDIMENSION) cinfo->min_DCT_v_scaled_size)
+      (*cinfo->prep->pre_process_data) (cinfo,
+					input_buf, in_row_ctr, in_rows_avail,
+					mainp->buffer, &mainp->rowgroup_ctr,
+					(JDIMENSION) cinfo->min_DCT_v_scaled_size);
+
+    /* If we don't have a full iMCU row buffered, return to application for
+     * more data.  Note that preprocessor will always pad to fill the iMCU row
+     * at the bottom of the image.
+     */
+    if (mainp->rowgroup_ctr != (JDIMENSION) cinfo->min_DCT_v_scaled_size)
+      return;
+
+    /* Send the completed row to the compressor */
+    if (! (*cinfo->coef->compress_data) (cinfo, mainp->buffer)) {
+      /* If compressor did not consume the whole row, then we must need to
+       * suspend processing and return to the application.  In this situation
+       * we pretend we didn't yet consume the last input row; otherwise, if
+       * it happened to be the last row of the image, the application would
+       * think we were done.
+       */
+      if (! mainp->suspended) {
+	(*in_row_ctr)--;
+	mainp->suspended = TRUE;
+      }
+      return;
+    }
+    /* We did finish the row.  Undo our little suspension hack if a previous
+     * call suspended; then mark the main buffer empty.
+     */
+    if (mainp->suspended) {
+      (*in_row_ctr)++;
+      mainp->suspended = FALSE;
+    }
+    mainp->rowgroup_ctr = 0;
+    mainp->cur_iMCU_row++;
+  }
+}
+
+
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+
+/*
+ * Process some data.
+ * This routine handles all of the modes that use a full-size buffer.
+ */
+
+METHODDEF(void)
+process_data_buffer_main (j_compress_ptr cinfo,
+			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+			  JDIMENSION in_rows_avail)
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+  int ci;
+  jpeg_component_info *compptr;
+  boolean writing = (mainp->pass_mode != JBUF_CRANK_DEST);
+
+  while (mainp->cur_iMCU_row < cinfo->total_iMCU_rows) {
+    /* Realign the virtual buffers if at the start of an iMCU row. */
+    if (mainp->rowgroup_ctr == 0) {
+      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	   ci++, compptr++) {
+	mainp->buffer[ci] = (*cinfo->mem->access_virt_sarray)
+	  ((j_common_ptr) cinfo, mainp->whole_image[ci], mainp->cur_iMCU_row *
+	   ((JDIMENSION) (compptr->v_samp_factor * cinfo->min_DCT_v_scaled_size)),
+	   (JDIMENSION) (compptr->v_samp_factor * cinfo->min_DCT_v_scaled_size),
+	   writing);
+      }
+      /* In a read pass, pretend we just read some source data. */
+      if (! writing) {
+	*in_row_ctr += (JDIMENSION)
+	  (cinfo->max_v_samp_factor * cinfo->min_DCT_v_scaled_size);
+	mainp->rowgroup_ctr = (JDIMENSION) cinfo->min_DCT_v_scaled_size;
+      }
+    }
+
+    /* If a write pass, read input data until the current iMCU row is full. */
+    /* Note: preprocessor will pad if necessary to fill the last iMCU row. */
+    if (writing) {
+      (*cinfo->prep->pre_process_data) (cinfo,
+					input_buf, in_row_ctr, in_rows_avail,
+					mainp->buffer, &mainp->rowgroup_ctr,
+					(JDIMENSION) cinfo->min_DCT_v_scaled_size);
+      /* Return to application if we need more data to fill the iMCU row. */
+      if (mainp->rowgroup_ctr < (JDIMENSION) cinfo->min_DCT_v_scaled_size)
+	return;
+    }
+
+    /* Emit data, unless this is a sink-only pass. */
+    if (mainp->pass_mode != JBUF_SAVE_SOURCE) {
+      if (! (*cinfo->coef->compress_data) (cinfo, mainp->buffer)) {
+	/* If compressor did not consume the whole row, then we must need to
+	 * suspend processing and return to the application.  In this situation
+	 * we pretend we didn't yet consume the last input row; otherwise, if
+	 * it happened to be the last row of the image, the application would
+	 * think we were done.
+	 */
+	if (! mainp->suspended) {
+	  (*in_row_ctr)--;
+	  mainp->suspended = TRUE;
+	}
+	return;
+      }
+      /* We did finish the row.  Undo our little suspension hack if a previous
+       * call suspended; then mark the main buffer empty.
+       */
+      if (mainp->suspended) {
+	(*in_row_ctr)++;
+	mainp->suspended = FALSE;
+      }
+    }
+
+    /* If get here, we are done with this iMCU row.  Mark buffer empty. */
+    mainp->rowgroup_ctr = 0;
+    mainp->cur_iMCU_row++;
+  }
+}
+
+#endif /* FULL_MAIN_BUFFER_SUPPORTED */
+
+
+/*
+ * Initialize main buffer controller.
+ */
+
+GLOBAL(void)
+jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_main_ptr mainp;
+  int ci;
+  jpeg_component_info *compptr;
+
+  mainp = (my_main_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_main_controller));
+  cinfo->main = &mainp->pub;
+  mainp->pub.start_pass = start_pass_main;
+
+  /* We don't need to create a buffer in raw-data mode. */
+  if (cinfo->raw_data_in)
+    return;
+
+  /* Create the buffer.  It holds downsampled data, so each component
+   * may be of a different size.
+   */
+  if (need_full_buffer) {
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+    /* Allocate a full-image virtual array for each component */
+    /* Note we pad the bottom to a multiple of the iMCU height */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      mainp->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+	 compptr->width_in_blocks * ((JDIMENSION) compptr->DCT_h_scaled_size),
+	 ((JDIMENSION) jround_up((long) compptr->height_in_blocks,
+				 (long) compptr->v_samp_factor)) *
+	 ((JDIMENSION) cinfo->min_DCT_v_scaled_size),
+	 (JDIMENSION) (compptr->v_samp_factor * compptr->DCT_v_scaled_size));
+    }
+#else
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif
+  } else {
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+    mainp->whole_image[0] = NULL; /* flag for no virtual arrays */
+#endif
+    /* Allocate a strip buffer for each component */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      mainp->buffer[ci] = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 compptr->width_in_blocks * ((JDIMENSION) compptr->DCT_h_scaled_size),
+	 (JDIMENSION) (compptr->v_samp_factor * compptr->DCT_v_scaled_size));
+    }
+  }
+}
diff --git a/cmake/externals/libjpeg/jcmarker.c b/cmake/externals/libjpeg/jcmarker.c
new file mode 100644
index 000000000..8874cd867
--- /dev/null
+++ b/cmake/externals/libjpeg/jcmarker.c
@@ -0,0 +1,717 @@
+/*
+ * jcmarker.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modified 2003-2019 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains routines to write JPEG datastream markers.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+typedef enum {			/* JPEG marker codes */
+  M_SOF0  = 0xc0,
+  M_SOF1  = 0xc1,
+  M_SOF2  = 0xc2,
+  M_SOF3  = 0xc3,
+
+  M_SOF5  = 0xc5,
+  M_SOF6  = 0xc6,
+  M_SOF7  = 0xc7,
+
+  M_JPG   = 0xc8,
+  M_SOF9  = 0xc9,
+  M_SOF10 = 0xca,
+  M_SOF11 = 0xcb,
+
+  M_SOF13 = 0xcd,
+  M_SOF14 = 0xce,
+  M_SOF15 = 0xcf,
+
+  M_DHT   = 0xc4,
+
+  M_DAC   = 0xcc,
+
+  M_RST0  = 0xd0,
+  M_RST1  = 0xd1,
+  M_RST2  = 0xd2,
+  M_RST3  = 0xd3,
+  M_RST4  = 0xd4,
+  M_RST5  = 0xd5,
+  M_RST6  = 0xd6,
+  M_RST7  = 0xd7,
+
+  M_SOI   = 0xd8,
+  M_EOI   = 0xd9,
+  M_SOS   = 0xda,
+  M_DQT   = 0xdb,
+  M_DNL   = 0xdc,
+  M_DRI   = 0xdd,
+  M_DHP   = 0xde,
+  M_EXP   = 0xdf,
+
+  M_APP0  = 0xe0,
+  M_APP1  = 0xe1,
+  M_APP2  = 0xe2,
+  M_APP3  = 0xe3,
+  M_APP4  = 0xe4,
+  M_APP5  = 0xe5,
+  M_APP6  = 0xe6,
+  M_APP7  = 0xe7,
+  M_APP8  = 0xe8,
+  M_APP9  = 0xe9,
+  M_APP10 = 0xea,
+  M_APP11 = 0xeb,
+  M_APP12 = 0xec,
+  M_APP13 = 0xed,
+  M_APP14 = 0xee,
+  M_APP15 = 0xef,
+
+  M_JPG0  = 0xf0,
+  M_JPG8  = 0xf8,
+  M_JPG13 = 0xfd,
+  M_COM   = 0xfe,
+
+  M_TEM   = 0x01,
+
+  M_ERROR = 0x100
+} JPEG_MARKER;
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_marker_writer pub; /* public fields */
+
+  unsigned int last_restart_interval; /* last DRI value emitted; 0 after SOI */
+} my_marker_writer;
+
+typedef my_marker_writer * my_marker_ptr;
+
+
+/*
+ * Basic output routines.
+ *
+ * Note that we do not support suspension while writing a marker.
+ * Therefore, an application using suspension must ensure that there is
+ * enough buffer space for the initial markers (typ. 600-700 bytes) before
+ * calling jpeg_start_compress, and enough space to write the trailing EOI
+ * (a few bytes) before calling jpeg_finish_compress.  Multipass compression
+ * modes are not supported at all with suspension, so those two are the only
+ * points where markers will be written.
+ */
+
+LOCAL(void)
+emit_byte (j_compress_ptr cinfo, int val)
+/* Emit a byte */
+{
+  struct jpeg_destination_mgr * dest = cinfo->dest;
+
+  *(dest->next_output_byte)++ = (JOCTET) val;
+  if (--dest->free_in_buffer == 0) {
+    if (! (*dest->empty_output_buffer) (cinfo))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+  }
+}
+
+
+LOCAL(void)
+emit_marker (j_compress_ptr cinfo, JPEG_MARKER mark)
+/* Emit a marker code */
+{
+  emit_byte(cinfo, 0xFF);
+  emit_byte(cinfo, (int) mark);
+}
+
+
+LOCAL(void)
+emit_2bytes (j_compress_ptr cinfo, int value)
+/* Emit a 2-byte integer; these are always MSB first in JPEG files */
+{
+  emit_byte(cinfo, (value >> 8) & 0xFF);
+  emit_byte(cinfo, value & 0xFF);
+}
+
+
+/*
+ * Routines to write specific marker types.
+ */
+
+LOCAL(int)
+emit_dqt (j_compress_ptr cinfo, int index)
+/* Emit a DQT marker */
+/* Returns the precision used (0 = 8bits, 1 = 16bits) for baseline checking */
+{
+  JQUANT_TBL * qtbl = cinfo->quant_tbl_ptrs[index];
+  int prec;
+  int i;
+
+  if (qtbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, index);
+
+  prec = 0;
+  for (i = 0; i <= cinfo->lim_Se; i++) {
+    if (qtbl->quantval[cinfo->natural_order[i]] > 255)
+      prec = 1;
+  }
+
+  if (! qtbl->sent_table) {
+    emit_marker(cinfo, M_DQT);
+
+    emit_2bytes(cinfo,
+      prec ? cinfo->lim_Se * 2 + 2 + 1 + 2 : cinfo->lim_Se + 1 + 1 + 2);
+
+    emit_byte(cinfo, index + (prec<<4));
+
+    for (i = 0; i <= cinfo->lim_Se; i++) {
+      /* The table entries must be emitted in zigzag order. */
+      unsigned int qval = qtbl->quantval[cinfo->natural_order[i]];
+      if (prec)
+	emit_byte(cinfo, (int) (qval >> 8));
+      emit_byte(cinfo, (int) (qval & 0xFF));
+    }
+
+    qtbl->sent_table = TRUE;
+  }
+
+  return prec;
+}
+
+
+LOCAL(void)
+emit_dht (j_compress_ptr cinfo, int index, boolean is_ac)
+/* Emit a DHT marker */
+{
+  JHUFF_TBL * htbl;
+  int length, i;
+  
+  if (is_ac) {
+    htbl = cinfo->ac_huff_tbl_ptrs[index];
+    index += 0x10;		/* output index has AC bit set */
+  } else {
+    htbl = cinfo->dc_huff_tbl_ptrs[index];
+  }
+
+  if (htbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, index);
+  
+  if (! htbl->sent_table) {
+    emit_marker(cinfo, M_DHT);
+    
+    length = 0;
+    for (i = 1; i <= 16; i++)
+      length += htbl->bits[i];
+    
+    emit_2bytes(cinfo, length + 2 + 1 + 16);
+    emit_byte(cinfo, index);
+    
+    for (i = 1; i <= 16; i++)
+      emit_byte(cinfo, htbl->bits[i]);
+    
+    for (i = 0; i < length; i++)
+      emit_byte(cinfo, htbl->huffval[i]);
+    
+    htbl->sent_table = TRUE;
+  }
+}
+
+
+LOCAL(void)
+emit_dac (j_compress_ptr cinfo)
+/* Emit a DAC marker */
+/* Since the useful info is so small, we want to emit all the tables in */
+/* one DAC marker.  Therefore this routine does its own scan of the table. */
+{
+#ifdef C_ARITH_CODING_SUPPORTED
+  char dc_in_use[NUM_ARITH_TBLS];
+  char ac_in_use[NUM_ARITH_TBLS];
+  int length, i;
+  jpeg_component_info *compptr;
+
+  for (i = 0; i < NUM_ARITH_TBLS; i++)
+    dc_in_use[i] = ac_in_use[i] = 0;
+
+  for (i = 0; i < cinfo->comps_in_scan; i++) {
+    compptr = cinfo->cur_comp_info[i];
+    /* DC needs no table for refinement scan */
+    if (cinfo->Ss == 0 && cinfo->Ah == 0)
+      dc_in_use[compptr->dc_tbl_no] = 1;
+    /* AC needs no table when not present */
+    if (cinfo->Se)
+      ac_in_use[compptr->ac_tbl_no] = 1;
+  }
+
+  length = 0;
+  for (i = 0; i < NUM_ARITH_TBLS; i++)
+    length += dc_in_use[i] + ac_in_use[i];
+
+  if (length) {
+    emit_marker(cinfo, M_DAC);
+
+    emit_2bytes(cinfo, length*2 + 2);
+
+    for (i = 0; i < NUM_ARITH_TBLS; i++) {
+      if (dc_in_use[i]) {
+	emit_byte(cinfo, i);
+	emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4));
+      }
+      if (ac_in_use[i]) {
+	emit_byte(cinfo, i + 0x10);
+	emit_byte(cinfo, cinfo->arith_ac_K[i]);
+      }
+    }
+  }
+#endif /* C_ARITH_CODING_SUPPORTED */
+}
+
+
+LOCAL(void)
+emit_dri (j_compress_ptr cinfo)
+/* Emit a DRI marker */
+{
+  emit_marker(cinfo, M_DRI);
+  
+  emit_2bytes(cinfo, 4);	/* fixed length */
+
+  emit_2bytes(cinfo, (int) cinfo->restart_interval);
+}
+
+
+LOCAL(void)
+emit_lse_ict (j_compress_ptr cinfo)
+/* Emit an LSE inverse color transform specification marker */
+{
+  /* Support only 1 transform */
+  if (cinfo->color_transform != JCT_SUBTRACT_GREEN ||
+      cinfo->num_components < 3)
+    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+
+  emit_marker(cinfo, M_JPG8);
+  
+  emit_2bytes(cinfo, 24);	/* fixed length */
+
+  emit_byte(cinfo, 0x0D);	/* ID inverse transform specification */
+  emit_2bytes(cinfo, MAXJSAMPLE);	/* MAXTRANS */
+  emit_byte(cinfo, 3);		/* Nt=3 */
+  emit_byte(cinfo, cinfo->comp_info[1].component_id);
+  emit_byte(cinfo, cinfo->comp_info[0].component_id);
+  emit_byte(cinfo, cinfo->comp_info[2].component_id);
+  emit_byte(cinfo, 0x80);	/* F1: CENTER1=1, NORM1=0 */
+  emit_2bytes(cinfo, 0);	/* A(1,1)=0 */
+  emit_2bytes(cinfo, 0);	/* A(1,2)=0 */
+  emit_byte(cinfo, 0);		/* F2: CENTER2=0, NORM2=0 */
+  emit_2bytes(cinfo, 1);	/* A(2,1)=1 */
+  emit_2bytes(cinfo, 0);	/* A(2,2)=0 */
+  emit_byte(cinfo, 0);		/* F3: CENTER3=0, NORM3=0 */
+  emit_2bytes(cinfo, 1);	/* A(3,1)=1 */
+  emit_2bytes(cinfo, 0);	/* A(3,2)=0 */
+}
+
+
+LOCAL(void)
+emit_sof (j_compress_ptr cinfo, JPEG_MARKER code)
+/* Emit a SOF marker */
+{
+  int ci;
+  jpeg_component_info *compptr;
+  
+  emit_marker(cinfo, code);
+  
+  emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */
+
+  /* Make sure image isn't bigger than SOF field can handle */
+  if ((long) cinfo->jpeg_height > 65535L ||
+      (long) cinfo->jpeg_width > 65535L)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) 65535);
+
+  emit_byte(cinfo, cinfo->data_precision);
+  emit_2bytes(cinfo, (int) cinfo->jpeg_height);
+  emit_2bytes(cinfo, (int) cinfo->jpeg_width);
+
+  emit_byte(cinfo, cinfo->num_components);
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    emit_byte(cinfo, compptr->component_id);
+    emit_byte(cinfo, (compptr->h_samp_factor << 4) + compptr->v_samp_factor);
+    emit_byte(cinfo, compptr->quant_tbl_no);
+  }
+}
+
+
+LOCAL(void)
+emit_sos (j_compress_ptr cinfo)
+/* Emit a SOS marker */
+{
+  int i, td, ta;
+  jpeg_component_info *compptr;
+  
+  emit_marker(cinfo, M_SOS);
+  
+  emit_2bytes(cinfo, 2 * cinfo->comps_in_scan + 2 + 1 + 3); /* length */
+  
+  emit_byte(cinfo, cinfo->comps_in_scan);
+  
+  for (i = 0; i < cinfo->comps_in_scan; i++) {
+    compptr = cinfo->cur_comp_info[i];
+    emit_byte(cinfo, compptr->component_id);
+
+    /* We emit 0 for unused field(s); this is recommended by the P&M text
+     * but does not seem to be specified in the standard.
+     */
+
+    /* DC needs no table for refinement scan */
+    td = cinfo->Ss == 0 && cinfo->Ah == 0 ? compptr->dc_tbl_no : 0;
+    /* AC needs no table when not present */
+    ta = cinfo->Se ? compptr->ac_tbl_no : 0;
+
+    emit_byte(cinfo, (td << 4) + ta);
+  }
+
+  emit_byte(cinfo, cinfo->Ss);
+  emit_byte(cinfo, cinfo->Se);
+  emit_byte(cinfo, (cinfo->Ah << 4) + cinfo->Al);
+}
+
+
+LOCAL(void)
+emit_pseudo_sos (j_compress_ptr cinfo)
+/* Emit a pseudo SOS marker */
+{
+  emit_marker(cinfo, M_SOS);
+  
+  emit_2bytes(cinfo, 2 + 1 + 3); /* length */
+  
+  emit_byte(cinfo, 0); /* Ns */
+
+  emit_byte(cinfo, 0); /* Ss */
+  emit_byte(cinfo, cinfo->block_size * cinfo->block_size - 1); /* Se */
+  emit_byte(cinfo, 0); /* Ah/Al */
+}
+
+
+LOCAL(void)
+emit_jfif_app0 (j_compress_ptr cinfo)
+/* Emit a JFIF-compliant APP0 marker */
+{
+  /*
+   * Length of APP0 block	(2 bytes)
+   * Block ID			(4 bytes - ASCII "JFIF")
+   * Zero byte			(1 byte to terminate the ID string)
+   * Version Major, Minor	(2 bytes - major first)
+   * Units			(1 byte - 0x00 = none, 0x01 = inch, 0x02 = cm)
+   * Xdpu			(2 bytes - dots per unit horizontal)
+   * Ydpu			(2 bytes - dots per unit vertical)
+   * Thumbnail X size		(1 byte)
+   * Thumbnail Y size		(1 byte)
+   */
+  
+  emit_marker(cinfo, M_APP0);
+  
+  emit_2bytes(cinfo, 2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1); /* length */
+
+  emit_byte(cinfo, 0x4A);	/* Identifier: ASCII "JFIF" */
+  emit_byte(cinfo, 0x46);
+  emit_byte(cinfo, 0x49);
+  emit_byte(cinfo, 0x46);
+  emit_byte(cinfo, 0);
+  emit_byte(cinfo, cinfo->JFIF_major_version); /* Version fields */
+  emit_byte(cinfo, cinfo->JFIF_minor_version);
+  emit_byte(cinfo, cinfo->density_unit); /* Pixel size information */
+  emit_2bytes(cinfo, (int) cinfo->X_density);
+  emit_2bytes(cinfo, (int) cinfo->Y_density);
+  emit_byte(cinfo, 0);		/* No thumbnail image */
+  emit_byte(cinfo, 0);
+}
+
+
+LOCAL(void)
+emit_adobe_app14 (j_compress_ptr cinfo)
+/* Emit an Adobe APP14 marker */
+{
+  /*
+   * Length of APP14 block	(2 bytes)
+   * Block ID			(5 bytes - ASCII "Adobe")
+   * Version Number		(2 bytes - currently 100)
+   * Flags0			(2 bytes - currently 0)
+   * Flags1			(2 bytes - currently 0)
+   * Color transform		(1 byte)
+   *
+   * Although Adobe TN 5116 mentions Version = 101, all the Adobe files
+   * now in circulation seem to use Version = 100, so that's what we write.
+   *
+   * We write the color transform byte as 1 if the JPEG color space is
+   * YCbCr, 2 if it's YCCK, 0 otherwise.  Adobe's definition has to do with
+   * whether the encoder performed a transformation, which is pretty useless.
+   */
+  
+  emit_marker(cinfo, M_APP14);
+  
+  emit_2bytes(cinfo, 2 + 5 + 2 + 2 + 2 + 1); /* length */
+
+  emit_byte(cinfo, 0x41);	/* Identifier: ASCII "Adobe" */
+  emit_byte(cinfo, 0x64);
+  emit_byte(cinfo, 0x6F);
+  emit_byte(cinfo, 0x62);
+  emit_byte(cinfo, 0x65);
+  emit_2bytes(cinfo, 100);	/* Version */
+  emit_2bytes(cinfo, 0);	/* Flags0 */
+  emit_2bytes(cinfo, 0);	/* Flags1 */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_YCbCr:
+    emit_byte(cinfo, 1);	/* Color transform = 1 */
+    break;
+  case JCS_YCCK:
+    emit_byte(cinfo, 2);	/* Color transform = 2 */
+    break;
+  default:
+    emit_byte(cinfo, 0);	/* Color transform = 0 */
+  }
+}
+
+
+/*
+ * These routines allow writing an arbitrary marker with parameters.
+ * The only intended use is to emit COM or APPn markers after calling
+ * write_file_header and before calling write_frame_header.
+ * Other uses are not guaranteed to produce desirable results.
+ * Counting the parameter bytes properly is the caller's responsibility.
+ */
+
+METHODDEF(void)
+write_marker_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
+/* Emit an arbitrary marker header */
+{
+  if (datalen > (unsigned int) 65533)		/* safety check */
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  emit_marker(cinfo, (JPEG_MARKER) marker);
+
+  emit_2bytes(cinfo, (int) (datalen + 2));	/* total length */
+}
+
+METHODDEF(void)
+write_marker_byte (j_compress_ptr cinfo, int val)
+/* Emit one byte of marker parameters following write_marker_header */
+{
+  emit_byte(cinfo, val);
+}
+
+
+/*
+ * Write datastream header.
+ * This consists of an SOI and optional APPn markers.
+ * We recommend use of the JFIF marker, but not the Adobe marker,
+ * when using YCbCr or grayscale data.  The JFIF marker is also used
+ * for other standard JPEG colorspaces.  The Adobe marker is helpful
+ * to distinguish RGB, CMYK, and YCCK colorspaces.
+ * Note that an application can write additional header markers after
+ * jpeg_start_compress returns.
+ */
+
+METHODDEF(void)
+write_file_header (j_compress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+
+  emit_marker(cinfo, M_SOI);	/* first the SOI */
+
+  /* SOI is defined to reset restart interval to 0 */
+  marker->last_restart_interval = 0;
+
+  if (cinfo->write_JFIF_header)	/* next an optional JFIF APP0 */
+    emit_jfif_app0(cinfo);
+  if (cinfo->write_Adobe_marker) /* next an optional Adobe APP14 */
+    emit_adobe_app14(cinfo);
+}
+
+
+/*
+ * Write frame header.
+ * This consists of DQT and SOFn markers,
+ * a conditional LSE marker and a conditional pseudo SOS marker.
+ * Note that we do not emit the SOF until we have emitted the DQT(s).
+ * This avoids compatibility problems with incorrect implementations that
+ * try to error-check the quant table numbers as soon as they see the SOF.
+ */
+
+METHODDEF(void)
+write_frame_header (j_compress_ptr cinfo)
+{
+  int ci, prec;
+  boolean is_baseline;
+  jpeg_component_info *compptr;
+  
+  /* Emit DQT for each quantization table.
+   * Note that emit_dqt() suppresses any duplicate tables.
+   */
+  prec = 0;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    prec += emit_dqt(cinfo, compptr->quant_tbl_no);
+  }
+  /* now prec is nonzero iff there are any 16-bit quant tables. */
+
+  /* Check for a non-baseline specification.
+   * Note we assume that Huffman table numbers won't be changed later.
+   */
+  if (cinfo->arith_code || cinfo->progressive_mode ||
+      cinfo->data_precision != 8 || cinfo->block_size != DCTSIZE) {
+    is_baseline = FALSE;
+  } else {
+    is_baseline = TRUE;
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      if (compptr->dc_tbl_no > 1 || compptr->ac_tbl_no > 1)
+	is_baseline = FALSE;
+    }
+    if (prec && is_baseline) {
+      is_baseline = FALSE;
+      /* If it's baseline except for quantizer size, warn the user */
+      TRACEMS(cinfo, 0, JTRC_16BIT_TABLES);
+    }
+  }
+
+  /* Emit the proper SOF marker */
+  if (cinfo->arith_code) {
+    if (cinfo->progressive_mode)
+      emit_sof(cinfo, M_SOF10); /* SOF code for progressive arithmetic */
+    else
+      emit_sof(cinfo, M_SOF9);  /* SOF code for sequential arithmetic */
+  } else {
+    if (cinfo->progressive_mode)
+      emit_sof(cinfo, M_SOF2);	/* SOF code for progressive Huffman */
+    else if (is_baseline)
+      emit_sof(cinfo, M_SOF0);	/* SOF code for baseline implementation */
+    else
+      emit_sof(cinfo, M_SOF1);	/* SOF code for non-baseline Huffman file */
+  }
+
+  /* Check to emit LSE inverse color transform specification marker */
+  if (cinfo->color_transform)
+    emit_lse_ict(cinfo);
+
+  /* Check to emit pseudo SOS marker */
+  if (cinfo->progressive_mode && cinfo->block_size != DCTSIZE)
+    emit_pseudo_sos(cinfo);
+}
+
+
+/*
+ * Write scan header.
+ * This consists of DHT or DAC markers, optional DRI, and SOS.
+ * Compressed data will be written following the SOS.
+ */
+
+METHODDEF(void)
+write_scan_header (j_compress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+  int i;
+  jpeg_component_info *compptr;
+
+  if (cinfo->arith_code) {
+    /* Emit arith conditioning info.  We may have some duplication
+     * if the file has multiple scans, but it's so small it's hardly
+     * worth worrying about.
+     */
+    emit_dac(cinfo);
+  } else {
+    /* Emit Huffman tables.
+     * Note that emit_dht() suppresses any duplicate tables.
+     */
+    for (i = 0; i < cinfo->comps_in_scan; i++) {
+      compptr = cinfo->cur_comp_info[i];
+      /* DC needs no table for refinement scan */
+      if (cinfo->Ss == 0 && cinfo->Ah == 0)
+	emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
+      /* AC needs no table when not present */
+      if (cinfo->Se)
+	emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
+    }
+  }
+
+  /* Emit DRI if required --- note that DRI value could change for each scan.
+   * We avoid wasting space with unnecessary DRIs, however.
+   */
+  if (cinfo->restart_interval != marker->last_restart_interval) {
+    emit_dri(cinfo);
+    marker->last_restart_interval = cinfo->restart_interval;
+  }
+
+  emit_sos(cinfo);
+}
+
+
+/*
+ * Write datastream trailer.
+ */
+
+METHODDEF(void)
+write_file_trailer (j_compress_ptr cinfo)
+{
+  emit_marker(cinfo, M_EOI);
+}
+
+
+/*
+ * Write an abbreviated table-specification datastream.
+ * This consists of SOI, DQT and DHT tables, and EOI.
+ * Any table that is defined and not marked sent_table = TRUE will be
+ * emitted.  Note that all tables will be marked sent_table = TRUE at exit.
+ */
+
+METHODDEF(void)
+write_tables_only (j_compress_ptr cinfo)
+{
+  int i;
+
+  emit_marker(cinfo, M_SOI);
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    if (cinfo->quant_tbl_ptrs[i] != NULL)
+      (void) emit_dqt(cinfo, i);
+  }
+
+  if (! cinfo->arith_code) {
+    for (i = 0; i < NUM_HUFF_TBLS; i++) {
+      if (cinfo->dc_huff_tbl_ptrs[i] != NULL)
+	emit_dht(cinfo, i, FALSE);
+      if (cinfo->ac_huff_tbl_ptrs[i] != NULL)
+	emit_dht(cinfo, i, TRUE);
+    }
+  }
+
+  emit_marker(cinfo, M_EOI);
+}
+
+
+/*
+ * Initialize the marker writer module.
+ */
+
+GLOBAL(void)
+jinit_marker_writer (j_compress_ptr cinfo)
+{
+  my_marker_ptr marker;
+
+  /* Create the subobject */
+  marker = (my_marker_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_marker_writer));
+  cinfo->marker = &marker->pub;
+  /* Initialize method pointers */
+  marker->pub.write_file_header = write_file_header;
+  marker->pub.write_frame_header = write_frame_header;
+  marker->pub.write_scan_header = write_scan_header;
+  marker->pub.write_file_trailer = write_file_trailer;
+  marker->pub.write_tables_only = write_tables_only;
+  marker->pub.write_marker_header = write_marker_header;
+  marker->pub.write_marker_byte = write_marker_byte;
+  /* Initialize private state */
+  marker->last_restart_interval = 0;
+}
diff --git a/cmake/externals/libjpeg/jcmaster.c b/cmake/externals/libjpeg/jcmaster.c
new file mode 100644
index 000000000..a70af0c02
--- /dev/null
+++ b/cmake/externals/libjpeg/jcmaster.c
@@ -0,0 +1,675 @@
+/*
+ * jcmaster.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2003-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains master control logic for the JPEG compressor.
+ * These routines are concerned with parameter validation, initial setup,
+ * and inter-pass control (determining the number of passes and the work 
+ * to be done in each pass).
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private state */
+
+typedef enum {
+	main_pass,		/* input data, also do first output step */
+	huff_opt_pass,		/* Huffman code optimization pass */
+	output_pass		/* data output pass */
+} c_pass_type;
+
+typedef struct {
+  struct jpeg_comp_master pub;	/* public fields */
+
+  c_pass_type pass_type;	/* the type of the current pass */
+
+  int pass_number;		/* # of passes completed */
+  int total_passes;		/* total # of passes needed */
+
+  int scan_number;		/* current index in scan_info[] */
+} my_comp_master;
+
+typedef my_comp_master * my_master_ptr;
+
+
+/*
+ * Support routines that do various essential calculations.
+ */
+
+LOCAL(void)
+initial_setup (j_compress_ptr cinfo)
+/* Do computations that are needed before master selection phase */
+{
+  int ci, ssize;
+  jpeg_component_info *compptr;
+
+  /* Sanity check on block_size */
+  if (cinfo->block_size < 1 || cinfo->block_size > 16)
+    ERREXIT2(cinfo, JERR_BAD_DCTSIZE, cinfo->block_size, cinfo->block_size);
+
+  /* Derive natural_order from block_size */
+  switch (cinfo->block_size) {
+  case 2: cinfo->natural_order = jpeg_natural_order2; break;
+  case 3: cinfo->natural_order = jpeg_natural_order3; break;
+  case 4: cinfo->natural_order = jpeg_natural_order4; break;
+  case 5: cinfo->natural_order = jpeg_natural_order5; break;
+  case 6: cinfo->natural_order = jpeg_natural_order6; break;
+  case 7: cinfo->natural_order = jpeg_natural_order7; break;
+  default: cinfo->natural_order = jpeg_natural_order;
+  }
+
+  /* Derive lim_Se from block_size */
+  cinfo->lim_Se = cinfo->block_size < DCTSIZE ?
+    cinfo->block_size * cinfo->block_size - 1 : DCTSIZE2-1;
+
+  /* Sanity check on image dimensions */
+  if (cinfo->jpeg_height <= 0 || cinfo->jpeg_width <= 0 ||
+      cinfo->num_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  /* Make sure image isn't bigger than I can handle */
+  if ((long) cinfo->jpeg_height > (long) JPEG_MAX_DIMENSION ||
+      (long) cinfo->jpeg_width > (long) JPEG_MAX_DIMENSION)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
+
+  /* Only 8 to 12 bits data precision are supported for DCT based JPEG */
+  if (cinfo->data_precision < 8 || cinfo->data_precision > 12)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Check that number of components won't exceed internal array sizes */
+  if (cinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	     MAX_COMPONENTS);
+
+  /* Compute maximum sampling factors; check factor validity */
+  cinfo->max_h_samp_factor = 1;
+  cinfo->max_v_samp_factor = 1;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
+	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
+      ERREXIT(cinfo, JERR_BAD_SAMPLING);
+    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
+				   compptr->h_samp_factor);
+    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
+				   compptr->v_samp_factor);
+  }
+
+  /* Compute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Fill in the correct component_index value; don't rely on application */
+    compptr->component_index = ci;
+    /* In selecting the actual DCT scaling for each component, we try to
+     * scale down the chroma components via DCT scaling rather than downsampling.
+     * This saves time if the downsampler gets to use 1:1 scaling.
+     * Note this code adapts subsampling ratios which are powers of 2.
+     */
+    ssize = 1;
+#ifdef DCT_SCALING_SUPPORTED
+    if (! cinfo->raw_data_in)
+      while (cinfo->min_DCT_h_scaled_size * ssize <=
+	     (cinfo->do_fancy_downsampling ? DCTSIZE : DCTSIZE / 2) &&
+	     (cinfo->max_h_samp_factor % (compptr->h_samp_factor * ssize * 2)) ==
+	     0) {
+	ssize = ssize * 2;
+      }
+#endif
+    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size * ssize;
+    ssize = 1;
+#ifdef DCT_SCALING_SUPPORTED
+    if (! cinfo->raw_data_in)
+      while (cinfo->min_DCT_v_scaled_size * ssize <=
+	     (cinfo->do_fancy_downsampling ? DCTSIZE : DCTSIZE / 2) &&
+	     (cinfo->max_v_samp_factor % (compptr->v_samp_factor * ssize * 2)) ==
+	     0) {
+	ssize = ssize * 2;
+      }
+#endif
+    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size * ssize;
+
+    /* We don't support DCT ratios larger than 2. */
+    if (compptr->DCT_h_scaled_size > compptr->DCT_v_scaled_size * 2)
+	compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size * 2;
+    else if (compptr->DCT_v_scaled_size > compptr->DCT_h_scaled_size * 2)
+	compptr->DCT_v_scaled_size = compptr->DCT_h_scaled_size * 2;
+
+    /* Size in DCT blocks */
+    compptr->width_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->jpeg_width * (long) compptr->h_samp_factor,
+		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
+    compptr->height_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->jpeg_height * (long) compptr->v_samp_factor,
+		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+    /* Size in samples */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->jpeg_width *
+		    (long) (compptr->h_samp_factor * compptr->DCT_h_scaled_size),
+		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->jpeg_height *
+		    (long) (compptr->v_samp_factor * compptr->DCT_v_scaled_size),
+		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+    /* Don't need quantization scale after DCT,
+     * until color conversion says otherwise.
+     */
+    compptr->component_needed = FALSE;
+  }
+
+  /* Compute number of fully interleaved MCU rows (number of times that
+   * main controller will call coefficient controller).
+   */
+  cinfo->total_iMCU_rows = (JDIMENSION)
+    jdiv_round_up((long) cinfo->jpeg_height,
+		  (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+}
+
+
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+
+LOCAL(void)
+validate_script (j_compress_ptr cinfo)
+/* Verify that the scan script in cinfo->scan_info[] is valid; also
+ * determine whether it uses progressive JPEG, and set cinfo->progressive_mode.
+ */
+{
+  const jpeg_scan_info * scanptr;
+  int scanno, ncomps, ci, coefi, thisi;
+  int Ss, Se, Ah, Al;
+  boolean component_sent[MAX_COMPONENTS];
+#ifdef C_PROGRESSIVE_SUPPORTED
+  int * last_bitpos_ptr;
+  int last_bitpos[MAX_COMPONENTS][DCTSIZE2];
+  /* -1 until that coefficient has been seen; then last Al for it */
+#endif
+
+  if (cinfo->num_scans <= 0)
+    ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, 0);
+
+  /* For sequential JPEG, all scans must have Ss=0, Se=DCTSIZE2-1;
+   * for progressive JPEG, no scan can have this.
+   */
+  scanptr = cinfo->scan_info;
+  if (scanptr->Ss != 0 || scanptr->Se != DCTSIZE2-1) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+    cinfo->progressive_mode = TRUE;
+    last_bitpos_ptr = & last_bitpos[0][0];
+    for (ci = 0; ci < cinfo->num_components; ci++) 
+      for (coefi = 0; coefi < DCTSIZE2; coefi++)
+	*last_bitpos_ptr++ = -1;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    cinfo->progressive_mode = FALSE;
+    for (ci = 0; ci < cinfo->num_components; ci++) 
+      component_sent[ci] = FALSE;
+  }
+
+  for (scanno = 1; scanno <= cinfo->num_scans; scanptr++, scanno++) {
+    /* Validate component indexes */
+    ncomps = scanptr->comps_in_scan;
+    if (ncomps <= 0 || ncomps > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, ncomps, MAX_COMPS_IN_SCAN);
+    for (ci = 0; ci < ncomps; ci++) {
+      thisi = scanptr->component_index[ci];
+      if (thisi < 0 || thisi >= cinfo->num_components)
+	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+      /* Components must appear in SOF order within each scan */
+      if (ci > 0 && thisi <= scanptr->component_index[ci-1])
+	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+    }
+    /* Validate progression parameters */
+    Ss = scanptr->Ss;
+    Se = scanptr->Se;
+    Ah = scanptr->Ah;
+    Al = scanptr->Al;
+    if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+      /* The JPEG spec simply gives the ranges 0..13 for Ah and Al, but that
+       * seems wrong: the upper bound ought to depend on data precision.
+       * Perhaps they really meant 0..N+1 for N-bit precision.
+       * Here we allow 0..10 for 8-bit data; Al larger than 10 results in
+       * out-of-range reconstructed DC values during the first DC scan,
+       * which might cause problems for some decoders.
+       */
+      if (Ss < 0 || Ss >= DCTSIZE2 || Se < Ss || Se >= DCTSIZE2 ||
+	  Ah < 0 || Ah > (cinfo->data_precision > 8 ? 13 : 10) ||
+	  Al < 0 || Al > (cinfo->data_precision > 8 ? 13 : 10))
+	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      if (Ss == 0) {
+	if (Se != 0)		/* DC and AC together not OK */
+	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      } else {
+	if (ncomps != 1)	/* AC scans must be for only one component */
+	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      }
+      for (ci = 0; ci < ncomps; ci++) {
+	last_bitpos_ptr = & last_bitpos[scanptr->component_index[ci]][0];
+	if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */
+	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+	for (coefi = Ss; coefi <= Se; coefi++) {
+	  if (last_bitpos_ptr[coefi] < 0) {
+	    /* first scan of this coefficient */
+	    if (Ah != 0)
+	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+	  } else {
+	    /* not first scan */
+	    if (Ah != last_bitpos_ptr[coefi] || Al != Ah-1)
+	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+	  }
+	  last_bitpos_ptr[coefi] = Al;
+	}
+      }
+#endif
+    } else {
+      /* For sequential JPEG, all progression parameters must be these: */
+      if (Ss != 0 || Se != DCTSIZE2-1 || Ah != 0 || Al != 0)
+	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      /* Make sure components are not sent twice */
+      for (ci = 0; ci < ncomps; ci++) {
+	thisi = scanptr->component_index[ci];
+	if (component_sent[thisi])
+	  ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+	component_sent[thisi] = TRUE;
+      }
+    }
+  }
+
+  /* Now verify that everything got sent. */
+  if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+    /* For progressive mode, we only check that at least some DC data
+     * got sent for each component; the spec does not require that all bits
+     * of all coefficients be transmitted.  Would it be wiser to enforce
+     * transmission of all coefficient bits??
+     */
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      if (last_bitpos[ci][0] < 0)
+	ERREXIT(cinfo, JERR_MISSING_DATA);
+    }
+#endif
+  } else {
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      if (! component_sent[ci])
+	ERREXIT(cinfo, JERR_MISSING_DATA);
+    }
+  }
+}
+
+
+LOCAL(void)
+reduce_script (j_compress_ptr cinfo)
+/* Adapt scan script for use with reduced block size;
+ * assume that script has been validated before.
+ */
+{
+  jpeg_scan_info * scanptr;
+  int idxout, idxin;
+
+  /* Circumvent const declaration for this function */
+  scanptr = (jpeg_scan_info *) cinfo->scan_info;
+  idxout = 0;
+
+  for (idxin = 0; idxin < cinfo->num_scans; idxin++) {
+    /* After skipping, idxout becomes smaller than idxin */
+    if (idxin != idxout)
+      /* Copy rest of data;
+       * note we stay in given chunk of allocated memory.
+       */
+      scanptr[idxout] = scanptr[idxin];
+    if (scanptr[idxout].Ss > cinfo->lim_Se)
+      /* Entire scan out of range - skip this entry */
+      continue;
+    if (scanptr[idxout].Se > cinfo->lim_Se)
+      /* Limit scan to end of block */
+      scanptr[idxout].Se = cinfo->lim_Se;
+    idxout++;
+  }
+
+  cinfo->num_scans = idxout;
+}
+
+#endif /* C_MULTISCAN_FILES_SUPPORTED */
+
+
+LOCAL(void)
+select_scan_parameters (j_compress_ptr cinfo)
+/* Set up the scan parameters for the current scan */
+{
+  int ci;
+
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+  if (cinfo->scan_info != NULL) {
+    /* Prepare for current scan --- the script is already validated */
+    my_master_ptr master = (my_master_ptr) cinfo->master;
+    const jpeg_scan_info * scanptr = cinfo->scan_info + master->scan_number;
+
+    cinfo->comps_in_scan = scanptr->comps_in_scan;
+    for (ci = 0; ci < scanptr->comps_in_scan; ci++) {
+      cinfo->cur_comp_info[ci] =
+	&cinfo->comp_info[scanptr->component_index[ci]];
+    }
+    if (cinfo->progressive_mode) {
+      cinfo->Ss = scanptr->Ss;
+      cinfo->Se = scanptr->Se;
+      cinfo->Ah = scanptr->Ah;
+      cinfo->Al = scanptr->Al;
+      return;
+    }
+  }
+  else
+#endif
+  {
+    /* Prepare for single sequential-JPEG scan containing all components */
+    if (cinfo->num_components > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	       MAX_COMPS_IN_SCAN);
+    cinfo->comps_in_scan = cinfo->num_components;
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      cinfo->cur_comp_info[ci] = &cinfo->comp_info[ci];
+    }
+  }
+  cinfo->Ss = 0;
+  cinfo->Se = cinfo->block_size * cinfo->block_size - 1;
+  cinfo->Ah = 0;
+  cinfo->Al = 0;
+}
+
+
+LOCAL(void)
+per_scan_setup (j_compress_ptr cinfo)
+/* Do computations that are needed before processing a JPEG scan */
+/* cinfo->comps_in_scan and cinfo->cur_comp_info[] are already set */
+{
+  int ci, mcublks, tmp;
+  jpeg_component_info *compptr;
+
+  if (cinfo->comps_in_scan == 1) {
+
+    /* Noninterleaved (single-component) scan */
+    compptr = cinfo->cur_comp_info[0];
+
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = compptr->width_in_blocks;
+    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
+
+    /* For noninterleaved scan, always one block per MCU */
+    compptr->MCU_width = 1;
+    compptr->MCU_height = 1;
+    compptr->MCU_blocks = 1;
+    compptr->MCU_sample_width = compptr->DCT_h_scaled_size;
+    compptr->last_col_width = 1;
+    /* For noninterleaved scans, it is convenient to define last_row_height
+     * as the number of block rows present in the last iMCU row.
+     */
+    tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+    if (tmp == 0) tmp = compptr->v_samp_factor;
+    compptr->last_row_height = tmp;
+
+    /* Prepare array describing MCU composition */
+    cinfo->blocks_in_MCU = 1;
+    cinfo->MCU_membership[0] = 0;
+
+  } else {
+
+    /* Interleaved (multi-component) scan */
+    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
+	       MAX_COMPS_IN_SCAN);
+
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = (JDIMENSION)
+      jdiv_round_up((long) cinfo->jpeg_width,
+		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
+    cinfo->MCU_rows_in_scan = cinfo->total_iMCU_rows;
+
+    cinfo->blocks_in_MCU = 0;
+
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Sampling factors give # of blocks of component in each MCU */
+      compptr->MCU_width = compptr->h_samp_factor;
+      compptr->MCU_height = compptr->v_samp_factor;
+      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
+      compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_h_scaled_size;
+      /* Figure number of non-dummy blocks in last MCU column & row */
+      tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
+      if (tmp == 0) tmp = compptr->MCU_width;
+      compptr->last_col_width = tmp;
+      tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
+      if (tmp == 0) tmp = compptr->MCU_height;
+      compptr->last_row_height = tmp;
+      /* Prepare array describing MCU composition */
+      mcublks = compptr->MCU_blocks;
+      if (cinfo->blocks_in_MCU + mcublks > C_MAX_BLOCKS_IN_MCU)
+	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
+      while (mcublks-- > 0) {
+	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
+      }
+    }
+
+  }
+
+  /* Convert restart specified in rows to actual MCU count. */
+  /* Note that count must fit in 16 bits, so we provide limiting. */
+  if (cinfo->restart_in_rows > 0) {
+    long nominal = (long) cinfo->restart_in_rows * (long) cinfo->MCUs_per_row;
+    cinfo->restart_interval = (unsigned int) MIN(nominal, 65535L);
+  }
+}
+
+
+/*
+ * Per-pass setup.
+ * This is called at the beginning of each pass.  We determine which modules
+ * will be active during this pass and give them appropriate start_pass calls.
+ * We also set is_last_pass to indicate whether any more passes will be
+ * required.
+ */
+
+METHODDEF(void)
+prepare_for_pass (j_compress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  switch (master->pass_type) {
+  case main_pass:
+    /* Initial pass: will collect input data, and do either Huffman
+     * optimization or data output for the first scan.
+     */
+    select_scan_parameters(cinfo);
+    per_scan_setup(cinfo);
+    if (! cinfo->raw_data_in) {
+      (*cinfo->cconvert->start_pass) (cinfo);
+      (*cinfo->downsample->start_pass) (cinfo);
+      (*cinfo->prep->start_pass) (cinfo, JBUF_PASS_THRU);
+    }
+    (*cinfo->fdct->start_pass) (cinfo);
+    (*cinfo->entropy->start_pass) (cinfo, cinfo->optimize_coding);
+    (*cinfo->coef->start_pass) (cinfo,
+				(master->total_passes > 1 ?
+				 JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
+    (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
+    if (cinfo->optimize_coding) {
+      /* No immediate data output; postpone writing frame/scan headers */
+      master->pub.call_pass_startup = FALSE;
+    } else {
+      /* Will write frame/scan headers at first jpeg_write_scanlines call */
+      master->pub.call_pass_startup = TRUE;
+    }
+    break;
+#ifdef ENTROPY_OPT_SUPPORTED
+  case huff_opt_pass:
+    /* Do Huffman optimization for a scan after the first one. */
+    select_scan_parameters(cinfo);
+    per_scan_setup(cinfo);
+    if (cinfo->Ss != 0 || cinfo->Ah == 0) {
+      (*cinfo->entropy->start_pass) (cinfo, TRUE);
+      (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
+      master->pub.call_pass_startup = FALSE;
+      break;
+    }
+    /* Special case: Huffman DC refinement scans need no Huffman table
+     * and therefore we can skip the optimization pass for them.
+     */
+    master->pass_type = output_pass;
+    master->pass_number++;
+    /*FALLTHROUGH*/
+#endif
+  case output_pass:
+    /* Do a data-output pass. */
+    /* We need not repeat per-scan setup if prior optimization pass did it. */
+    if (! cinfo->optimize_coding) {
+      select_scan_parameters(cinfo);
+      per_scan_setup(cinfo);
+    }
+    (*cinfo->entropy->start_pass) (cinfo, FALSE);
+    (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
+    /* We emit frame/scan headers now */
+    if (master->scan_number == 0)
+      (*cinfo->marker->write_frame_header) (cinfo);
+    (*cinfo->marker->write_scan_header) (cinfo);
+    master->pub.call_pass_startup = FALSE;
+    break;
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+  }
+
+  master->pub.is_last_pass = (master->pass_number == master->total_passes-1);
+
+  /* Set up progress monitor's pass info if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->completed_passes = master->pass_number;
+    cinfo->progress->total_passes = master->total_passes;
+  }
+}
+
+
+/*
+ * Special start-of-pass hook.
+ * This is called by jpeg_write_scanlines if call_pass_startup is TRUE.
+ * In single-pass processing, we need this hook because we don't want to
+ * write frame/scan headers during jpeg_start_compress; we want to let the
+ * application write COM markers etc. between jpeg_start_compress and the
+ * jpeg_write_scanlines loop.
+ * In multi-pass processing, this routine is not used.
+ */
+
+METHODDEF(void)
+pass_startup (j_compress_ptr cinfo)
+{
+  cinfo->master->call_pass_startup = FALSE; /* reset flag so call only once */
+
+  (*cinfo->marker->write_frame_header) (cinfo);
+  (*cinfo->marker->write_scan_header) (cinfo);
+}
+
+
+/*
+ * Finish up at end of pass.
+ */
+
+METHODDEF(void)
+finish_pass_master (j_compress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  /* The entropy coder always needs an end-of-pass call,
+   * either to analyze statistics or to flush its output buffer.
+   */
+  (*cinfo->entropy->finish_pass) (cinfo);
+
+  /* Update state for next pass */
+  switch (master->pass_type) {
+  case main_pass:
+    /* next pass is either output of scan 0 (after optimization)
+     * or output of scan 1 (if no optimization).
+     */
+    master->pass_type = output_pass;
+    if (! cinfo->optimize_coding)
+      master->scan_number++;
+    break;
+  case huff_opt_pass:
+    /* next pass is always output of current scan */
+    master->pass_type = output_pass;
+    break;
+  case output_pass:
+    /* next pass is either optimization or output of next scan */
+    if (cinfo->optimize_coding)
+      master->pass_type = huff_opt_pass;
+    master->scan_number++;
+    break;
+  }
+
+  master->pass_number++;
+}
+
+
+/*
+ * Initialize master compression control.
+ */
+
+GLOBAL(void)
+jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only)
+{
+  my_master_ptr master;
+
+  master = (my_master_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_comp_master));
+  cinfo->master = &master->pub;
+  master->pub.prepare_for_pass = prepare_for_pass;
+  master->pub.pass_startup = pass_startup;
+  master->pub.finish_pass = finish_pass_master;
+  master->pub.is_last_pass = FALSE;
+
+  /* Validate parameters, determine derived values */
+  initial_setup(cinfo);
+
+  if (cinfo->scan_info != NULL) {
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+    validate_script(cinfo);
+    if (cinfo->block_size < DCTSIZE)
+      reduce_script(cinfo);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    cinfo->progressive_mode = FALSE;
+    cinfo->num_scans = 1;
+  }
+
+  if (cinfo->optimize_coding)
+    cinfo->arith_code = FALSE; /* disable arithmetic coding */
+  else if (! cinfo->arith_code &&
+	   (cinfo->progressive_mode ||
+	    (cinfo->block_size > 1 && cinfo->block_size < DCTSIZE)))
+    /* TEMPORARY HACK ??? */
+    /* assume default tables no good for progressive or reduced AC mode */
+    cinfo->optimize_coding = TRUE; /* force Huffman optimization */
+
+  /* Initialize my private state */
+  if (transcode_only) {
+    /* no main pass in transcoding */
+    if (cinfo->optimize_coding)
+      master->pass_type = huff_opt_pass;
+    else
+      master->pass_type = output_pass;
+  } else {
+    /* for normal compression, first pass is always this type: */
+    master->pass_type = main_pass;
+  }
+  master->scan_number = 0;
+  master->pass_number = 0;
+  if (cinfo->optimize_coding)
+    master->total_passes = cinfo->num_scans * 2;
+  else
+    master->total_passes = cinfo->num_scans;
+}
diff --git a/cmake/externals/libjpeg/jcomapi.c b/cmake/externals/libjpeg/jcomapi.c
new file mode 100644
index 000000000..678c5d12d
--- /dev/null
+++ b/cmake/externals/libjpeg/jcomapi.c
@@ -0,0 +1,244 @@
+/*
+ * jcomapi.c
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * Modified 2019 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface routines that are used for both
+ * compression and decompression.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Abort processing of a JPEG compression or decompression operation,
+ * but don't destroy the object itself.
+ *
+ * For this, we merely clean up all the nonpermanent memory pools.
+ * Note that temp files (virtual arrays) are not allowed to belong to
+ * the permanent pool, so we will be able to close all temp files here.
+ * Closing a data source or destination, if necessary, is the application's
+ * responsibility.
+ */
+
+GLOBAL(void)
+jpeg_abort (j_common_ptr cinfo)
+{
+  int pool;
+
+  /* Do nothing if called on a not-initialized or destroyed JPEG object. */
+  if (cinfo->mem == NULL)
+    return;
+
+  /* Releasing pools in reverse order might help avoid fragmentation
+   * with some (brain-damaged) malloc libraries.
+   */
+  for (pool = JPOOL_NUMPOOLS-1; pool > JPOOL_PERMANENT; pool--) {
+    (*cinfo->mem->free_pool) (cinfo, pool);
+  }
+
+  /* Reset overall state for possible reuse of object */
+  if (cinfo->is_decompressor) {
+    cinfo->global_state = DSTATE_START;
+    /* Try to keep application from accessing now-deleted marker list.
+     * A bit kludgy to do it here, but this is the most central place.
+     */
+    ((j_decompress_ptr) cinfo)->marker_list = NULL;
+  } else {
+    cinfo->global_state = CSTATE_START;
+  }
+}
+
+
+/*
+ * Destruction of a JPEG object.
+ *
+ * Everything gets deallocated except the master jpeg_compress_struct itself
+ * and the error manager struct.  Both of these are supplied by the application
+ * and must be freed, if necessary, by the application.  (Often they are on
+ * the stack and so don't need to be freed anyway.)
+ * Closing a data source or destination, if necessary, is the application's
+ * responsibility.
+ */
+
+GLOBAL(void)
+jpeg_destroy (j_common_ptr cinfo)
+{
+  /* We need only tell the memory manager to release everything. */
+  /* NB: mem pointer is NULL if memory mgr failed to initialize. */
+  if (cinfo->mem != NULL)
+    (*cinfo->mem->self_destruct) (cinfo);
+  cinfo->mem = NULL;		/* be safe if jpeg_destroy is called twice */
+  cinfo->global_state = 0;	/* mark it destroyed */
+}
+
+
+/*
+ * Convenience routines for allocating quantization and Huffman tables.
+ * (Would jutils.c be a more reasonable place to put these?)
+ */
+
+GLOBAL(JQUANT_TBL *)
+jpeg_alloc_quant_table (j_common_ptr cinfo)
+{
+  JQUANT_TBL *tbl;
+
+  tbl = (JQUANT_TBL *)
+    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JQUANT_TBL));
+  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
+  return tbl;
+}
+
+
+GLOBAL(JHUFF_TBL *)
+jpeg_alloc_huff_table (j_common_ptr cinfo)
+{
+  JHUFF_TBL *tbl;
+
+  tbl = (JHUFF_TBL *)
+    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JHUFF_TBL));
+  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
+  return tbl;
+}
+
+
+/*
+ * Set up the standard Huffman tables (cf. JPEG standard section K.3).
+ * IMPORTANT: these are only valid for 8-bit data precision!
+ * (Would jutils.c be a more reasonable place to put this?)
+ */
+
+GLOBAL(JHUFF_TBL *)
+jpeg_std_huff_table (j_common_ptr cinfo, boolean isDC, int tblno)
+{
+  JHUFF_TBL **htblptr, *htbl;
+  const UINT8 *bits, *val;
+  int nsymbols, len;
+
+  static const UINT8 bits_dc_luminance[17] =
+    { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
+  static const UINT8 val_dc_luminance[] =
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+
+  static const UINT8 bits_dc_chrominance[17] =
+    { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
+  static const UINT8 val_dc_chrominance[] =
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+
+  static const UINT8 bits_ac_luminance[17] =
+    { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
+  static const UINT8 val_ac_luminance[] =
+    { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+      0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+      0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+      0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+      0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+      0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+      0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+      0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+      0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+      0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+      0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+      0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+      0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+      0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+      0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+      0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+      0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+      0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+      0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+      0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+      0xf9, 0xfa };
+
+  static const UINT8 bits_ac_chrominance[17] =
+    { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
+  static const UINT8 val_ac_chrominance[] =
+    { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+      0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+      0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+      0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+      0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+      0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+      0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+      0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+      0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+      0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+      0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+      0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+      0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+      0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+      0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+      0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+      0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+      0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+      0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+      0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+      0xf9, 0xfa };
+
+  if (cinfo->is_decompressor) {
+    if (isDC)
+      htblptr = ((j_decompress_ptr) cinfo)->dc_huff_tbl_ptrs;
+    else
+      htblptr = ((j_decompress_ptr) cinfo)->ac_huff_tbl_ptrs;
+  } else {
+    if (isDC)
+      htblptr = ((j_compress_ptr) cinfo)->dc_huff_tbl_ptrs;
+    else
+      htblptr = ((j_compress_ptr) cinfo)->ac_huff_tbl_ptrs;
+  }
+
+  switch (tblno) {
+  case 0:
+    if (isDC) {
+      bits = bits_dc_luminance;
+      val = val_dc_luminance;
+    } else {
+      bits = bits_ac_luminance;
+      val = val_ac_luminance;
+    }
+    break;
+  case 1:
+    if (isDC) {
+      bits = bits_dc_chrominance;
+      val = val_dc_chrominance;
+    } else {
+      bits = bits_ac_chrominance;
+      val = val_ac_chrominance;
+    }
+    break;
+  default:
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+    return NULL; /* avoid compiler warnings for uninitialized variables */
+  }
+
+  if (htblptr[tblno] == NULL)
+    htblptr[tblno] = jpeg_alloc_huff_table(cinfo);
+
+  htbl = htblptr[tblno];
+
+  /* Copy the number-of-symbols-of-each-code-length counts */
+  MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));
+
+  /* Validate the counts.  We do this here mainly so we can copy the right
+   * number of symbols from the val[] array, without risking marching off
+   * the end of memory.  jxhuff.c will do a more thorough test later.
+   */
+  nsymbols = 0;
+  for (len = 1; len <= 16; len++)
+    nsymbols += bits[len];
+  if (nsymbols > 256)
+    ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+
+  if (nsymbols > 0)
+    MEMCOPY(htbl->huffval, val, nsymbols * SIZEOF(UINT8));
+
+  /* Initialize sent_table FALSE so table will be written to JPEG file. */
+  htbl->sent_table = FALSE;
+
+  return htbl;
+}
diff --git a/cmake/externals/libjpeg/jconfig.h b/cmake/externals/libjpeg/jconfig.h
new file mode 100644
index 000000000..8d6c8f5b3
--- /dev/null
+++ b/cmake/externals/libjpeg/jconfig.h
@@ -0,0 +1,64 @@
+/* jconfig.vc --- jconfig.h for Microsoft Visual C++ on Windows 9x or NT.
+ * This file also works for Borland/Embarcadero C++ for Win32 or Win64
+ * (CLI: bcc32, bcc32c, bcc32x, bcc64; GUI IDE: C++Builder/RAD Studio).
+ * See jconfig.txt for explanations.
+ */
+
+#define HAVE_PROTOTYPES
+#define HAVE_UNSIGNED_CHAR
+#define HAVE_UNSIGNED_SHORT
+/* #define void char */
+/* #define const */
+#undef CHAR_IS_UNSIGNED
+#define HAVE_STDDEF_H
+#define HAVE_STDLIB_H
+#undef NEED_BSD_STRINGS
+#undef NEED_SYS_TYPES_H
+#undef NEED_FAR_POINTERS	/* we presume a 32-bit flat memory model */
+#undef NEED_SHORT_EXTERNAL_NAMES
+#undef INCOMPLETE_TYPES_BROKEN
+
+/* Define "boolean" as unsigned char, not enum, per Windows custom */
+#ifndef __RPCNDR_H__		/* don't conflict if rpcndr.h already read */
+typedef unsigned char boolean;
+#endif
+#ifndef FALSE			/* in case these macros already exist */
+#define FALSE	0		/* values of boolean */
+#endif
+#ifndef TRUE
+#define TRUE	1
+#endif
+#define HAVE_BOOLEAN		/* prevent jmorecfg.h from redefining it */
+
+/* Define custom RGB color order, prevent jmorecfg.h from redefinition */
+#undef JPEG_HAVE_RGB_CUSTOM
+/* Use Windows custom BGR color order defined in jmorecfg.h */
+#undef JPEG_USE_RGB_CUSTOM
+
+/* Define custom file I/O functions, prevent jinclude.h from redefinition */
+#undef JPEG_HAVE_FILE_IO_CUSTOM
+/* Use Delphi custom file I/O functions defined in jinclude.h */
+#undef JPEG_USE_FILE_IO_CUSTOM
+
+
+#ifdef JPEG_INTERNALS
+
+#undef RIGHT_SHIFT_IS_UNSIGNED
+
+#endif /* JPEG_INTERNALS */
+
+#ifdef JPEG_CJPEG_DJPEG
+
+#define BMP_SUPPORTED		/* BMP image file format */
+#define GIF_SUPPORTED		/* GIF image file format */
+#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
+#undef RLE_SUPPORTED		/* Utah RLE image file format */
+#define TARGA_SUPPORTED		/* Targa image file format */
+
+#define TWO_FILE_COMMANDLINE	/* optional */
+#define USE_SETMODE	/* Microsoft/Borland/Embarcadero have setmode() */
+#undef NEED_SIGNAL_CATCHER
+#undef DONT_USE_B_MODE
+#undef PROGRESS_REPORT		/* optional */
+
+#endif /* JPEG_CJPEG_DJPEG */
diff --git a/cmake/externals/libjpeg/jcparam.c b/cmake/externals/libjpeg/jcparam.c
new file mode 100644
index 000000000..261ae86ca
--- /dev/null
+++ b/cmake/externals/libjpeg/jcparam.c
@@ -0,0 +1,591 @@
+/*
+ * jcparam.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modified 2003-2022 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains optional default-setting code for the JPEG compressor.
+ * Applications do not have to use this file, but those that don't use it
+ * must know a lot more about the innards of the JPEG code.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Quantization table setup routines
+ */
+
+GLOBAL(void)
+jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl,
+		      const unsigned int *basic_table,
+		      int scale_factor, boolean force_baseline)
+/* Define a quantization table equal to the basic_table times
+ * a scale factor (given as a percentage).
+ * If force_baseline is TRUE, the computed quantization table entries
+ * are limited to 1..255 for JPEG baseline compatibility.
+ */
+{
+  JQUANT_TBL ** qtblptr;
+  int i;
+  long temp;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (which_tbl < 0 || which_tbl >= NUM_QUANT_TBLS)
+    ERREXIT1(cinfo, JERR_DQT_INDEX, which_tbl);
+
+  qtblptr = & cinfo->quant_tbl_ptrs[which_tbl];
+
+  if (*qtblptr == NULL)
+    *qtblptr = jpeg_alloc_quant_table((j_common_ptr) cinfo);
+
+  for (i = 0; i < DCTSIZE2; i++) {
+    temp = ((long) basic_table[i] * scale_factor + 50L) / 100L;
+    /* limit the values to the valid range */
+    if (temp <= 0L) temp = 1L;
+    if (temp > 32767L) temp = 32767L; /* max quantizer needed for 12 bits */
+    if (force_baseline && temp > 255L)
+      temp = 255L;		/* limit to baseline range if requested */
+    (*qtblptr)->quantval[i] = (UINT16) temp;
+  }
+
+  /* Initialize sent_table FALSE so table will be written to JPEG file. */
+  (*qtblptr)->sent_table = FALSE;
+}
+
+
+/* These are the sample quantization tables given in JPEG spec section K.1.
+ * NOTE: chrominance DC value is changed from 17 to 16 for lossless support.
+ * The spec says that the values given produce "good" quality,
+ * and when divided by 2, "very good" quality.
+ */
+static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
+  16,  11,  10,  16,  24,  40,  51,  61,
+  12,  12,  14,  19,  26,  58,  60,  55,
+  14,  13,  16,  24,  40,  57,  69,  56,
+  14,  17,  22,  29,  51,  87,  80,  62,
+  18,  22,  37,  56,  68, 109, 103,  77,
+  24,  35,  55,  64,  81, 104, 113,  92,
+  49,  64,  78,  87, 103, 121, 120, 101,
+  72,  92,  95,  98, 112, 100, 103,  99
+};
+static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
+  16,  18,  24,  47,  99,  99,  99,  99,
+  18,  21,  26,  66,  99,  99,  99,  99,
+  24,  26,  56,  99,  99,  99,  99,  99,
+  47,  66,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99
+};
+
+
+GLOBAL(void)
+jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline)
+/* Set or change the 'quality' (quantization) setting, using default tables
+ * and straight percentage-scaling quality scales.
+ * This entry point allows different scalings for luminance and chrominance.
+ */
+{
+  /* Set up two quantization tables using the specified scaling */
+  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
+		       cinfo->q_scale_factor[0], force_baseline);
+  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
+		       cinfo->q_scale_factor[1], force_baseline);
+}
+
+
+GLOBAL(void)
+jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
+			 boolean force_baseline)
+/* Set or change the 'quality' (quantization) setting, using default tables
+ * and a straight percentage-scaling quality scale.  In most cases it's better
+ * to use jpeg_set_quality (below); this entry point is provided for
+ * applications that insist on a linear percentage scaling.
+ */
+{
+  /* Set up two quantization tables using the specified scaling */
+  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
+		       scale_factor, force_baseline);
+  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
+		       scale_factor, force_baseline);
+}
+
+
+GLOBAL(int)
+jpeg_quality_scaling (int quality)
+/* Convert a user-specified quality rating to a percentage scaling factor
+ * for an underlying quantization table, using our recommended scaling curve.
+ * The input 'quality' factor should be 0 (terrible) to 100 (very good).
+ */
+{
+  /* Safety limit on quality factor.  Convert 0 to 1 to avoid zero divide. */
+  if (quality <= 0) quality = 1;
+  if (quality > 100) quality = 100;
+
+  /* The basic table is used as-is (scaling 100) for a quality of 50.
+   * Qualities 50..100 are converted to scaling percentage 200 - 2*Q;
+   * note that at Q=100 the scaling is 0, which will cause jpeg_add_quant_table
+   * to make all the table entries 1 (hence, minimum quantization loss).
+   * Qualities 1..50 are converted to scaling percentage 5000/Q.
+   */
+  if (quality < 50)
+    quality = 5000 / quality;
+  else
+    quality = 200 - quality*2;
+
+  return quality;
+}
+
+
+GLOBAL(void)
+jpeg_set_quality (j_compress_ptr cinfo, int quality, boolean force_baseline)
+/* Set or change the 'quality' (quantization) setting, using default tables.
+ * This is the standard quality-adjusting entry point for typical user
+ * interfaces; only those who want detailed control over quantization tables
+ * would use the preceding routines directly.
+ */
+{
+  /* Convert user 0-100 rating to percentage scaling */
+  quality = jpeg_quality_scaling(quality);
+
+  /* Set up standard quality tables */
+  jpeg_set_linear_quality(cinfo, quality, force_baseline);
+}
+
+
+/*
+ * Reset standard Huffman tables
+ */
+
+LOCAL(void)
+std_huff_tables (j_compress_ptr cinfo)
+{
+  if (cinfo->dc_huff_tbl_ptrs[0] != NULL)
+    (void) jpeg_std_huff_table((j_common_ptr) cinfo, TRUE, 0);
+
+  if (cinfo->ac_huff_tbl_ptrs[0] != NULL)
+    (void) jpeg_std_huff_table((j_common_ptr) cinfo, FALSE, 0);
+
+  if (cinfo->dc_huff_tbl_ptrs[1] != NULL)
+    (void) jpeg_std_huff_table((j_common_ptr) cinfo, TRUE, 1);
+
+  if (cinfo->ac_huff_tbl_ptrs[1] != NULL)
+    (void) jpeg_std_huff_table((j_common_ptr) cinfo, FALSE, 1);
+}
+
+
+/*
+ * Default parameter setup for compression.
+ *
+ * Applications that don't choose to use this routine must do their
+ * own setup of all these parameters.  Alternately, you can call this
+ * to establish defaults and then alter parameters selectively.  This
+ * is the recommended approach since, if we add any new parameters,
+ * your code will still work (they'll be set to reasonable defaults).
+ */
+
+GLOBAL(void)
+jpeg_set_defaults (j_compress_ptr cinfo)
+{
+  int i;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Allocate comp_info array large enough for maximum component count.
+   * Array is made permanent in case application wants to compress
+   * multiple images at same param settings.
+   */
+  if (cinfo->comp_info == NULL)
+    cinfo->comp_info = (jpeg_component_info *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  MAX_COMPONENTS * SIZEOF(jpeg_component_info));
+
+  /* Initialize everything not dependent on the color space */
+
+  cinfo->scale_num = 1;		/* 1:1 scaling */
+  cinfo->scale_denom = 1;
+  cinfo->data_precision = BITS_IN_JSAMPLE;
+  /* Set up two quantization tables using default quality of 75 */
+  jpeg_set_quality(cinfo, 75, TRUE);
+  /* Reset standard Huffman tables */
+  std_huff_tables(cinfo);
+
+  /* Initialize default arithmetic coding conditioning */
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    cinfo->arith_dc_L[i] = 0;
+    cinfo->arith_dc_U[i] = 1;
+    cinfo->arith_ac_K[i] = 5;
+  }
+
+  /* Default is no multiple-scan output */
+  cinfo->scan_info = NULL;
+  cinfo->num_scans = 0;
+
+  /* Expect normal source image, not raw downsampled data */
+  cinfo->raw_data_in = FALSE;
+
+  /* The standard Huffman tables are only valid for 8-bit data precision.
+   * If the precision is higher, use arithmetic coding.
+   * (Alternatively, using Huffman coding would be possible with forcing
+   * optimization on so that usable tables will be computed, or by
+   * supplying default tables that are valid for the desired precision.)
+   * Otherwise, use Huffman coding by default.
+   */
+  cinfo->arith_code = cinfo->data_precision > 8 ? TRUE : FALSE;
+
+  /* By default, don't do extra passes to optimize entropy coding */
+  cinfo->optimize_coding = FALSE;
+
+  /* By default, use the simpler non-cosited sampling alignment */
+  cinfo->CCIR601_sampling = FALSE;
+
+  /* By default, apply fancy downsampling */
+  cinfo->do_fancy_downsampling = TRUE;
+
+  /* No input smoothing */
+  cinfo->smoothing_factor = 0;
+
+  /* DCT algorithm preference */
+  cinfo->dct_method = JDCT_DEFAULT;
+
+  /* No restart markers */
+  cinfo->restart_interval = 0;
+  cinfo->restart_in_rows = 0;
+
+  /* Fill in default JFIF marker parameters.  Note that whether the marker
+   * will actually be written is determined by jpeg_set_colorspace.
+   *
+   * By default, the library emits JFIF version code 1.01.
+   * An application that wants to emit JFIF 1.02 extension markers should set
+   * JFIF_minor_version to 2.  We could probably get away with just defaulting
+   * to 1.02, but there may still be some decoders in use that will complain
+   * about that; saying 1.01 should minimize compatibility problems.
+   *
+   * For wide gamut colorspaces (BG_RGB and BG_YCC), the major version will be
+   * overridden by jpeg_set_colorspace and set to 2.
+   */
+  cinfo->JFIF_major_version = 1; /* Default JFIF version = 1.01 */
+  cinfo->JFIF_minor_version = 1;
+  cinfo->density_unit = 0;	/* Pixel size is unknown by default */
+  cinfo->X_density = 1;		/* Pixel aspect ratio is square by default */
+  cinfo->Y_density = 1;
+
+  /* No color transform */
+  cinfo->color_transform = JCT_NONE;
+
+  /* Choose JPEG colorspace based on input space, set defaults accordingly */
+
+  jpeg_default_colorspace(cinfo);
+}
+
+
+/*
+ * Select an appropriate JPEG colorspace for in_color_space.
+ */
+
+GLOBAL(void)
+jpeg_default_colorspace (j_compress_ptr cinfo)
+{
+  switch (cinfo->in_color_space) {
+  case JCS_UNKNOWN:
+    jpeg_set_colorspace(cinfo, JCS_UNKNOWN);
+    break;
+  case JCS_GRAYSCALE:
+    jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
+    break;
+  case JCS_RGB:
+    jpeg_set_colorspace(cinfo, JCS_YCbCr);
+    break;
+  case JCS_YCbCr:
+    jpeg_set_colorspace(cinfo, JCS_YCbCr);
+    break;
+  case JCS_CMYK:
+    jpeg_set_colorspace(cinfo, JCS_CMYK); /* By default, no translation */
+    break;
+  case JCS_YCCK:
+    jpeg_set_colorspace(cinfo, JCS_YCCK);
+    break;
+  case JCS_BG_RGB:
+    /* No translation for now -- conversion to BG_YCC not yet supportet */
+    jpeg_set_colorspace(cinfo, JCS_BG_RGB);
+    break;
+  case JCS_BG_YCC:
+    jpeg_set_colorspace(cinfo, JCS_BG_YCC);
+    break;
+  default:
+    ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+  }
+}
+
+
+/*
+ * Set the JPEG colorspace, and choose colorspace-dependent default values.
+ */
+
+GLOBAL(void)
+jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace)
+{
+  jpeg_component_info * compptr;
+  int ci;
+
+#define SET_COMP(index,id,hsamp,vsamp,quant,dctbl,actbl)  \
+  (compptr = &cinfo->comp_info[index], \
+   compptr->component_id = (id), \
+   compptr->h_samp_factor = (hsamp), \
+   compptr->v_samp_factor = (vsamp), \
+   compptr->quant_tbl_no = (quant), \
+   compptr->dc_tbl_no = (dctbl), \
+   compptr->ac_tbl_no = (actbl) )
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* For all colorspaces, we use Q and Huff tables 0 for luminance components,
+   * tables 1 for chrominance components.
+   */
+
+  cinfo->jpeg_color_space = colorspace;
+
+  cinfo->write_JFIF_header = FALSE; /* No marker for non-JFIF colorspaces */
+  cinfo->write_Adobe_marker = FALSE; /* write no Adobe marker by default */
+
+  switch (colorspace) {
+  case JCS_UNKNOWN:
+    cinfo->num_components = cinfo->input_components;
+    if (cinfo->num_components < 1 || cinfo->num_components > MAX_COMPONENTS)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	       MAX_COMPONENTS);
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      SET_COMP(ci, ci, 1,1, 0, 0,0);
+    }
+    break;
+  case JCS_GRAYSCALE:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->num_components = 1;
+    /* JFIF specifies component ID 1 */
+    SET_COMP(0, 0x01, 1,1, 0, 0,0);
+    break;
+  case JCS_RGB:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag RGB */
+    cinfo->num_components = 3;
+    SET_COMP(0, 0x52 /* 'R' */, 1,1,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
+    SET_COMP(1, 0x47 /* 'G' */, 1,1, 0, 0,0);
+    SET_COMP(2, 0x42 /* 'B' */, 1,1,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
+    break;
+  case JCS_YCbCr:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->num_components = 3;
+    /* JFIF specifies component IDs 1,2,3 */
+    /* We default to 2x2 subsamples of chrominance */
+    SET_COMP(0, 0x01, 2,2, 0, 0,0);
+    SET_COMP(1, 0x02, 1,1, 1, 1,1);
+    SET_COMP(2, 0x03, 1,1, 1, 1,1);
+    break;
+  case JCS_CMYK:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag CMYK */
+    cinfo->num_components = 4;
+    SET_COMP(0, 0x43 /* 'C' */, 1,1, 0, 0,0);
+    SET_COMP(1, 0x4D /* 'M' */, 1,1, 0, 0,0);
+    SET_COMP(2, 0x59 /* 'Y' */, 1,1, 0, 0,0);
+    SET_COMP(3, 0x4B /* 'K' */, 1,1, 0, 0,0);
+    break;
+  case JCS_YCCK:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag YCCK */
+    cinfo->num_components = 4;
+    SET_COMP(0, 0x01, 2,2, 0, 0,0);
+    SET_COMP(1, 0x02, 1,1, 1, 1,1);
+    SET_COMP(2, 0x03, 1,1, 1, 1,1);
+    SET_COMP(3, 0x04, 2,2, 0, 0,0);
+    break;
+  case JCS_BG_RGB:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->JFIF_major_version = 2;   /* Set JFIF major version = 2 */
+    cinfo->num_components = 3;
+    /* Add offset 0x20 to the normal R/G/B component IDs */
+    SET_COMP(0, 0x72 /* 'r' */, 1,1,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
+    SET_COMP(1, 0x67 /* 'g' */, 1,1, 0, 0,0);
+    SET_COMP(2, 0x62 /* 'b' */, 1,1,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
+    break;
+  case JCS_BG_YCC:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->JFIF_major_version = 2;   /* Set JFIF major version = 2 */
+    cinfo->num_components = 3;
+    /* Add offset 0x20 to the normal Cb/Cr component IDs */
+    /* We default to 2x2 subsamples of chrominance */
+    SET_COMP(0, 0x01, 2,2, 0, 0,0);
+    SET_COMP(1, 0x22, 1,1, 1, 1,1);
+    SET_COMP(2, 0x23, 1,1, 1, 1,1);
+    break;
+  default:
+    ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+  }
+}
+
+
+#ifdef C_PROGRESSIVE_SUPPORTED
+
+LOCAL(jpeg_scan_info *)
+fill_a_scan (jpeg_scan_info * scanptr, int ci,
+	     int Ss, int Se, int Ah, int Al)
+/* Support routine: generate one scan for specified component */
+{
+  scanptr->comps_in_scan = 1;
+  scanptr->component_index[0] = ci;
+  scanptr->Ss = Ss;
+  scanptr->Se = Se;
+  scanptr->Ah = Ah;
+  scanptr->Al = Al;
+  scanptr++;
+  return scanptr;
+}
+
+LOCAL(jpeg_scan_info *)
+fill_scans (jpeg_scan_info * scanptr, int ncomps,
+	    int Ss, int Se, int Ah, int Al)
+/* Support routine: generate one scan for each component */
+{
+  int ci;
+
+  for (ci = 0; ci < ncomps; ci++) {
+    scanptr->comps_in_scan = 1;
+    scanptr->component_index[0] = ci;
+    scanptr->Ss = Ss;
+    scanptr->Se = Se;
+    scanptr->Ah = Ah;
+    scanptr->Al = Al;
+    scanptr++;
+  }
+  return scanptr;
+}
+
+LOCAL(jpeg_scan_info *)
+fill_dc_scans (jpeg_scan_info * scanptr, int ncomps, int Ah, int Al)
+/* Support routine: generate interleaved DC scan if possible, else N scans */
+{
+  int ci;
+
+  if (ncomps <= MAX_COMPS_IN_SCAN) {
+    /* Single interleaved DC scan */
+    scanptr->comps_in_scan = ncomps;
+    for (ci = 0; ci < ncomps; ci++)
+      scanptr->component_index[ci] = ci;
+    scanptr->Ss = scanptr->Se = 0;
+    scanptr->Ah = Ah;
+    scanptr->Al = Al;
+    scanptr++;
+  } else {
+    /* Noninterleaved DC scan for each component */
+    scanptr = fill_scans(scanptr, ncomps, 0, 0, Ah, Al);
+  }
+  return scanptr;
+}
+
+
+/*
+ * Create a recommended progressive-JPEG script.
+ * cinfo->num_components and cinfo->jpeg_color_space must be correct.
+ */
+
+GLOBAL(void)
+jpeg_simple_progression (j_compress_ptr cinfo)
+{
+  int ncomps = cinfo->num_components;
+  int nscans;
+  jpeg_scan_info * scanptr;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Figure space needed for script.  Calculation must match code below! */
+  if (ncomps == 3 &&
+      (cinfo->jpeg_color_space == JCS_YCbCr ||
+       cinfo->jpeg_color_space == JCS_BG_YCC)) {
+    /* Custom script for YCC color images. */
+    nscans = 10;
+  } else {
+    /* All-purpose script for other color spaces. */
+    if (ncomps > MAX_COMPS_IN_SCAN)
+      nscans = 6 * ncomps;	/* 2 DC + 4 AC scans per component */
+    else
+      nscans = 2 + 4 * ncomps;	/* 2 DC scans; 4 AC scans per component */
+  }
+
+  /* Allocate space for script.
+   * We need to put it in the permanent pool in case the application performs
+   * multiple compressions without changing the settings.  To avoid a memory
+   * leak if jpeg_simple_progression is called repeatedly for the same JPEG
+   * object, we try to re-use previously allocated space, and we allocate
+   * enough space to handle YCC even if initially asked for grayscale.
+   */
+  if (cinfo->script_space == NULL || cinfo->script_space_size < nscans) {
+    cinfo->script_space_size = MAX(nscans, 10);
+    cinfo->script_space = (jpeg_scan_info *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+			cinfo->script_space_size * SIZEOF(jpeg_scan_info));
+  }
+  scanptr = cinfo->script_space;
+  cinfo->scan_info = scanptr;
+  cinfo->num_scans = nscans;
+
+  if (ncomps == 3 &&
+      (cinfo->jpeg_color_space == JCS_YCbCr ||
+       cinfo->jpeg_color_space == JCS_BG_YCC)) {
+    /* Custom script for YCC color images. */
+    /* Initial DC scan */
+    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
+    /* Initial AC scan: get some luma data out in a hurry */
+    scanptr = fill_a_scan(scanptr, 0, 1, 5, 0, 2);
+    /* Chroma data is too small to be worth expending many scans on */
+    scanptr = fill_a_scan(scanptr, 2, 1, 63, 0, 1);
+    scanptr = fill_a_scan(scanptr, 1, 1, 63, 0, 1);
+    /* Complete spectral selection for luma AC */
+    scanptr = fill_a_scan(scanptr, 0, 6, 63, 0, 2);
+    /* Refine next bit of luma AC */
+    scanptr = fill_a_scan(scanptr, 0, 1, 63, 2, 1);
+    /* Finish DC successive approximation */
+    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
+    /* Finish AC successive approximation */
+    scanptr = fill_a_scan(scanptr, 2, 1, 63, 1, 0);
+    scanptr = fill_a_scan(scanptr, 1, 1, 63, 1, 0);
+    /* Luma bottom bit comes last since it's usually largest scan */
+    scanptr = fill_a_scan(scanptr, 0, 1, 63, 1, 0);
+  } else {
+    /* All-purpose script for other color spaces. */
+    /* Successive approximation first pass */
+    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
+    scanptr = fill_scans(scanptr, ncomps, 1, 5, 0, 2);
+    scanptr = fill_scans(scanptr, ncomps, 6, 63, 0, 2);
+    /* Successive approximation second pass */
+    scanptr = fill_scans(scanptr, ncomps, 1, 63, 2, 1);
+    /* Successive approximation final pass */
+    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
+    scanptr = fill_scans(scanptr, ncomps, 1, 63, 1, 0);
+  }
+}
+
+#endif /* C_PROGRESSIVE_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jcprepct.c b/cmake/externals/libjpeg/jcprepct.c
new file mode 100644
index 000000000..586964bd4
--- /dev/null
+++ b/cmake/externals/libjpeg/jcprepct.c
@@ -0,0 +1,358 @@
+/*
+ * jcprepct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2003-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the compression preprocessing controller.
+ * This controller manages the color conversion, downsampling,
+ * and edge expansion steps.
+ *
+ * Most of the complexity here is associated with buffering input rows
+ * as required by the downsampler.  See the comments at the head of
+ * jcsample.c for the downsampler's needs.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* At present, jcsample.c can request context rows only for smoothing.
+ * In the future, we might also need context rows for CCIR601 sampling
+ * or other more-complex downsampling procedures.  The code to support
+ * context rows should be compiled only if needed.
+ */
+#ifdef INPUT_SMOOTHING_SUPPORTED
+#define CONTEXT_ROWS_SUPPORTED
+#endif
+
+
+/*
+ * For the simple (no-context-row) case, we just need to buffer one
+ * row group's worth of pixels for the downsampling step.  At the bottom of
+ * the image, we pad to a full row group by replicating the last pixel row.
+ * The downsampler's last output row is then replicated if needed to pad
+ * out to a full iMCU row.
+ *
+ * When providing context rows, we must buffer three row groups' worth of
+ * pixels.  Three row groups are physically allocated, but the row pointer
+ * arrays are made five row groups high, with the extra pointers above and
+ * below "wrapping around" to point to the last and first real row groups.
+ * This allows the downsampler to access the proper context rows.
+ * At the top and bottom of the image, we create dummy context rows by
+ * copying the first or last real pixel row.  This copying could be avoided
+ * by pointer hacking as is done in jdmainct.c, but it doesn't seem worth the
+ * trouble on the compression side.
+ */
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_prep_controller pub; /* public fields */
+
+  /* Downsampling input buffer.  This buffer holds color-converted data
+   * until we have enough to do a downsample step.
+   */
+  JSAMPARRAY color_buf[MAX_COMPONENTS];
+
+  JDIMENSION rows_to_go;	/* counts rows remaining in source image */
+  int next_buf_row;		/* index of next row to store in color_buf */
+
+#ifdef CONTEXT_ROWS_SUPPORTED	/* only needed for context case */
+  int this_row_group;		/* starting row index of group to process */
+  int next_buf_stop;		/* downsample when we reach this index */
+#endif
+} my_prep_controller;
+
+typedef my_prep_controller * my_prep_ptr;
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_prep (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+
+  if (pass_mode != JBUF_PASS_THRU)
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  /* Initialize total-height counter for detecting bottom of image */
+  prep->rows_to_go = cinfo->image_height;
+  /* Mark the conversion buffer empty */
+  prep->next_buf_row = 0;
+#ifdef CONTEXT_ROWS_SUPPORTED
+  /* Preset additional state variables for context mode.
+   * These aren't used in non-context mode, so we needn't test which mode.
+   */
+  prep->this_row_group = 0;
+  /* Set next_buf_stop to stop after two row groups have been read in. */
+  prep->next_buf_stop = 2 * cinfo->max_v_samp_factor;
+#endif
+}
+
+
+/*
+ * Expand an image vertically from height input_rows to height output_rows,
+ * by duplicating the bottom row.
+ */
+
+LOCAL(void)
+expand_bottom_edge (JSAMPARRAY image_data, JDIMENSION num_cols,
+		    int input_rows, int output_rows)
+{
+  register int row;
+
+  for (row = input_rows; row < output_rows; row++) {
+    jcopy_sample_rows(image_data + input_rows - 1,
+		      image_data + row,
+		      1, num_cols);
+  }
+}
+
+
+/*
+ * Process some data in the simple no-context case.
+ *
+ * Preprocessor output data is counted in "row groups".  A row group
+ * is defined to be v_samp_factor sample rows of each component.
+ * Downsampling will produce this much data from each max_v_samp_factor
+ * input rows.
+ */
+
+METHODDEF(void)
+pre_process_data (j_compress_ptr cinfo,
+		  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+		  JDIMENSION in_rows_avail,
+		  JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+		  JDIMENSION out_row_groups_avail)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+  int numrows, ci;
+  JDIMENSION inrows;
+  jpeg_component_info * compptr;
+
+  while (*in_row_ctr < in_rows_avail &&
+	 *out_row_group_ctr < out_row_groups_avail) {
+    /* Do color conversion to fill the conversion buffer. */
+    inrows = in_rows_avail - *in_row_ctr;
+    numrows = cinfo->max_v_samp_factor - prep->next_buf_row;
+    numrows = (int) MIN((JDIMENSION) numrows, inrows);
+    (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
+				       prep->color_buf,
+				       (JDIMENSION) prep->next_buf_row,
+				       numrows);
+    *in_row_ctr += numrows;
+    prep->next_buf_row += numrows;
+    prep->rows_to_go -= numrows;
+    /* If at bottom of image, pad to fill the conversion buffer. */
+    if (prep->rows_to_go == 0 &&
+	prep->next_buf_row < cinfo->max_v_samp_factor) {
+      for (ci = 0; ci < cinfo->num_components; ci++) {
+	expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
+			   prep->next_buf_row, cinfo->max_v_samp_factor);
+      }
+      prep->next_buf_row = cinfo->max_v_samp_factor;
+    }
+    /* If we've filled the conversion buffer, empty it. */
+    if (prep->next_buf_row == cinfo->max_v_samp_factor) {
+      (*cinfo->downsample->downsample) (cinfo,
+					prep->color_buf, (JDIMENSION) 0,
+					output_buf, *out_row_group_ctr);
+      prep->next_buf_row = 0;
+      (*out_row_group_ctr)++;
+    }
+    /* If at bottom of image, pad the output to a full iMCU height.
+     * Note we assume the caller is providing a one-iMCU-height output buffer!
+     */
+    if (prep->rows_to_go == 0 &&
+	*out_row_group_ctr < out_row_groups_avail) {
+      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	   ci++, compptr++) {
+	numrows = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+		  cinfo->min_DCT_v_scaled_size;
+	expand_bottom_edge(output_buf[ci],
+			   compptr->width_in_blocks * compptr->DCT_h_scaled_size,
+			   (int) (*out_row_group_ctr * numrows),
+			   (int) (out_row_groups_avail * numrows));
+      }
+      *out_row_group_ctr = out_row_groups_avail;
+      break;			/* can exit outer loop without test */
+    }
+  }
+}
+
+
+#ifdef CONTEXT_ROWS_SUPPORTED
+
+/*
+ * Process some data in the context case.
+ */
+
+METHODDEF(void)
+pre_process_context (j_compress_ptr cinfo,
+		     JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+		     JDIMENSION in_rows_avail,
+		     JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+		     JDIMENSION out_row_groups_avail)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+  int numrows, ci;
+  int buf_height = cinfo->max_v_samp_factor * 3;
+  JDIMENSION inrows;
+
+  while (*out_row_group_ctr < out_row_groups_avail) {
+    if (*in_row_ctr < in_rows_avail) {
+      /* Do color conversion to fill the conversion buffer. */
+      inrows = in_rows_avail - *in_row_ctr;
+      numrows = prep->next_buf_stop - prep->next_buf_row;
+      numrows = (int) MIN((JDIMENSION) numrows, inrows);
+      (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
+					 prep->color_buf,
+					 (JDIMENSION) prep->next_buf_row,
+					 numrows);
+      /* Pad at top of image, if first time through */
+      if (prep->rows_to_go == cinfo->image_height) {
+	for (ci = 0; ci < cinfo->num_components; ci++) {
+	  int row;
+	  for (row = 1; row <= cinfo->max_v_samp_factor; row++) {
+	    jcopy_sample_rows(prep->color_buf[ci],
+			      prep->color_buf[ci] - row,
+			      1, cinfo->image_width);
+	  }
+	}
+      }
+      *in_row_ctr += numrows;
+      prep->next_buf_row += numrows;
+      prep->rows_to_go -= numrows;
+    } else {
+      /* Return for more data, unless we are at the bottom of the image. */
+      if (prep->rows_to_go != 0)
+	break;
+      /* When at bottom of image, pad to fill the conversion buffer. */
+      if (prep->next_buf_row < prep->next_buf_stop) {
+	for (ci = 0; ci < cinfo->num_components; ci++) {
+	  expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
+			     prep->next_buf_row, prep->next_buf_stop);
+	}
+	prep->next_buf_row = prep->next_buf_stop;
+      }
+    }
+    /* If we've gotten enough data, downsample a row group. */
+    if (prep->next_buf_row == prep->next_buf_stop) {
+      (*cinfo->downsample->downsample) (cinfo,
+					prep->color_buf,
+					(JDIMENSION) prep->this_row_group,
+					output_buf, *out_row_group_ctr);
+      (*out_row_group_ctr)++;
+      /* Advance pointers with wraparound as necessary. */
+      prep->this_row_group += cinfo->max_v_samp_factor;
+      if (prep->this_row_group >= buf_height)
+	prep->this_row_group = 0;
+      if (prep->next_buf_row >= buf_height)
+	prep->next_buf_row = 0;
+      prep->next_buf_stop = prep->next_buf_row + cinfo->max_v_samp_factor;
+    }
+  }
+}
+
+
+/*
+ * Create the wrapped-around downsampling input buffer needed for context mode.
+ */
+
+LOCAL(void)
+create_context_buffer (j_compress_ptr cinfo)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+  int rgroup_height = cinfo->max_v_samp_factor;
+  int ci, i;
+  jpeg_component_info * compptr;
+  JSAMPARRAY true_buffer, fake_buffer;
+
+  /* Grab enough space for fake row pointers for all the components;
+   * we need five row groups' worth of pointers for each component.
+   */
+  fake_buffer = (JSAMPARRAY) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE,
+     (cinfo->num_components * 5 * rgroup_height) * SIZEOF(JSAMPROW));
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Allocate the actual buffer space (3 row groups) for this component.
+     * We make the buffer wide enough to allow the downsampler to edge-expand
+     * horizontally within the buffer, if it so chooses.
+     */
+    true_buffer = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       (JDIMENSION) (((long) compptr->width_in_blocks *
+		      cinfo->min_DCT_h_scaled_size *
+		      cinfo->max_h_samp_factor) / compptr->h_samp_factor),
+       (JDIMENSION) (3 * rgroup_height));
+    /* Copy true buffer row pointers into the middle of the fake row array */
+    MEMCOPY(fake_buffer + rgroup_height, true_buffer,
+	    3 * rgroup_height * SIZEOF(JSAMPROW));
+    /* Fill in the above and below wraparound pointers */
+    for (i = 0; i < rgroup_height; i++) {
+      fake_buffer[i] = true_buffer[2 * rgroup_height + i];
+      fake_buffer[4 * rgroup_height + i] = true_buffer[i];
+    }
+    prep->color_buf[ci] = fake_buffer + rgroup_height;
+    fake_buffer += 5 * rgroup_height; /* point to space for next component */
+  }
+}
+
+#endif /* CONTEXT_ROWS_SUPPORTED */
+
+
+/*
+ * Initialize preprocessing controller.
+ */
+
+GLOBAL(void)
+jinit_c_prep_controller (j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_prep_ptr prep;
+  int ci;
+  jpeg_component_info * compptr;
+
+  if (need_full_buffer)		/* safety check */
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  prep = (my_prep_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_prep_controller));
+  cinfo->prep = &prep->pub;
+  prep->pub.start_pass = start_pass_prep;
+
+  /* Allocate the color conversion buffer.
+   * We make the buffer wide enough to allow the downsampler to edge-expand
+   * horizontally within the buffer, if it so chooses.
+   */
+  if (cinfo->downsample->need_context_rows) {
+    /* Set up to provide context rows */
+#ifdef CONTEXT_ROWS_SUPPORTED
+    prep->pub.pre_process_data = pre_process_context;
+    create_context_buffer(cinfo);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    /* No context, just make it tall enough for one row group */
+    prep->pub.pre_process_data = pre_process_data;
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      prep->color_buf[ci] = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 (JDIMENSION) (((long) compptr->width_in_blocks *
+			cinfo->min_DCT_h_scaled_size *
+			cinfo->max_h_samp_factor) / compptr->h_samp_factor),
+	 (JDIMENSION) cinfo->max_v_samp_factor);
+    }
+  }
+}
diff --git a/cmake/externals/libjpeg/jcsample.c b/cmake/externals/libjpeg/jcsample.c
new file mode 100644
index 000000000..2372c4173
--- /dev/null
+++ b/cmake/externals/libjpeg/jcsample.c
@@ -0,0 +1,545 @@
+/*
+ * jcsample.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modified 2003-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains downsampling routines.
+ *
+ * Downsampling input data is counted in "row groups".  A row group
+ * is defined to be max_v_samp_factor pixel rows of each component,
+ * from which the downsampler produces v_samp_factor sample rows.
+ * A single row group is processed in each call to the downsampler module.
+ *
+ * The downsampler is responsible for edge-expansion of its output data
+ * to fill an integral number of DCT blocks horizontally.  The source buffer
+ * may be modified if it is helpful for this purpose (the source buffer is
+ * allocated wide enough to correspond to the desired output width).
+ * The caller (the prep controller) is responsible for vertical padding.
+ *
+ * The downsampler may request "context rows" by setting need_context_rows
+ * during startup.  In this case, the input arrays will contain at least
+ * one row group's worth of pixels above and below the passed-in data;
+ * the caller will create dummy rows at image top and bottom by replicating
+ * the first or last real pixel row.
+ *
+ * An excellent reference for image resampling is
+ *   Digital Image Warping, George Wolberg, 1990.
+ *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
+ *
+ * The downsampling algorithm used here is a simple average of the source
+ * pixels covered by the output pixel.  The hi-falutin sampling literature
+ * refers to this as a "box filter".  In general the characteristics of a box
+ * filter are not very good, but for the specific cases we normally use (1:1
+ * and 2:1 ratios) the box is equivalent to a "triangle filter" which is not
+ * nearly so bad.  If you intend to use other sampling ratios, you'd be well
+ * advised to improve this code.
+ *
+ * A simple input-smoothing capability is provided.  This is mainly intended
+ * for cleaning up color-dithered GIF input files (if you find it inadequate,
+ * we suggest using an external filtering program such as pnmconvol).  When
+ * enabled, each input pixel P is replaced by a weighted sum of itself and its
+ * eight neighbors.  P's weight is 1-8*SF and each neighbor's weight is SF,
+ * where SF = (smoothing_factor / 1024).
+ * Currently, smoothing is only supported for 2h2v sampling factors.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Pointer to routine to downsample a single component */
+typedef JMETHOD(void, downsample1_ptr,
+		(j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY output_data));
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_downsampler pub;	/* public fields */
+
+  /* Downsampling method pointers, one per component */
+  downsample1_ptr methods[MAX_COMPONENTS];
+
+  /* Height of an output row group for each component. */
+  int rowgroup_height[MAX_COMPONENTS];
+
+  /* These arrays save pixel expansion factors so that int_downsample need not
+   * recompute them each time.  They are unused for other downsampling methods.
+   */
+  UINT8 h_expand[MAX_COMPONENTS];
+  UINT8 v_expand[MAX_COMPONENTS];
+} my_downsampler;
+
+typedef my_downsampler * my_downsample_ptr;
+
+
+/*
+ * Initialize for a downsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_downsample (j_compress_ptr cinfo)
+{
+  /* no work for now */
+}
+
+
+/*
+ * Expand a component horizontally from width input_cols to width output_cols,
+ * by duplicating the rightmost samples.
+ */
+
+LOCAL(void)
+expand_right_edge (JSAMPARRAY image_data, int num_rows,
+		   JDIMENSION input_cols, JDIMENSION output_cols)
+{
+  register JSAMPROW ptr;
+  register JSAMPLE pixval;
+  register int count;
+  int row;
+  int numcols = (int) (output_cols - input_cols);
+
+  if (numcols > 0) {
+    for (row = 0; row < num_rows; row++) {
+      ptr = image_data[row] + input_cols;
+      pixval = ptr[-1];		/* don't need GETJSAMPLE() here */
+      for (count = numcols; count > 0; count--)
+	*ptr++ = pixval;
+    }
+  }
+}
+
+
+/*
+ * Do downsampling for a whole row group (all components).
+ *
+ * In this version we simply downsample each component independently.
+ */
+
+METHODDEF(void)
+sep_downsample (j_compress_ptr cinfo,
+		JSAMPIMAGE input_buf, JDIMENSION in_row_index,
+		JSAMPIMAGE output_buf, JDIMENSION out_row_group_index)
+{
+  my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample;
+  int ci;
+  jpeg_component_info * compptr;
+  JSAMPARRAY in_ptr, out_ptr;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    in_ptr = input_buf[ci] + in_row_index;
+    out_ptr = output_buf[ci] +
+	      (out_row_group_index * downsample->rowgroup_height[ci]);
+    (*downsample->methods[ci]) (cinfo, compptr, in_ptr, out_ptr);
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * One row group is processed per call.
+ * This version handles arbitrary integral sampling ratios, without smoothing.
+ * Note that this version is not actually used for customary sampling ratios.
+ */
+
+METHODDEF(void)
+int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample;
+  int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v;
+  JDIMENSION outcol, outcol_h;	/* outcol_h == outcol*h_expand */
+  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
+  JSAMPROW inptr, outptr;
+  INT32 outvalue;
+
+  h_expand = downsample->h_expand[compptr->component_index];
+  v_expand = downsample->v_expand[compptr->component_index];
+  numpix = h_expand * v_expand;
+  numpix2 = numpix/2;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, output_cols * h_expand);
+
+  inrow = outrow = 0;
+  while (inrow < cinfo->max_v_samp_factor) {
+    outptr = output_data[outrow];
+    for (outcol = 0, outcol_h = 0; outcol < output_cols;
+	 outcol++, outcol_h += h_expand) {
+      outvalue = 0;
+      for (v = 0; v < v_expand; v++) {
+	inptr = input_data[inrow+v] + outcol_h;
+	for (h = 0; h < h_expand; h++) {
+	  outvalue += (INT32) GETJSAMPLE(*inptr++);
+	}
+      }
+      *outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix);
+    }
+    inrow += v_expand;
+    outrow++;
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the special case of a full-size component,
+ * without smoothing.
+ */
+
+METHODDEF(void)
+fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		     JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  /* Copy the data */
+  jcopy_sample_rows(input_data, output_data,
+		    cinfo->max_v_samp_factor, cinfo->image_width);
+  /* Edge-expand */
+  expand_right_edge(output_data, cinfo->max_v_samp_factor, cinfo->image_width,
+		    compptr->width_in_blocks * compptr->DCT_h_scaled_size);
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the common case of 2:1 horizontal and 1:1 vertical,
+ * without smoothing.
+ *
+ * A note about the "bias" calculations: when rounding fractional values to
+ * integer, we do not want to always round 0.5 up to the next integer.
+ * If we did that, we'd introduce a noticeable bias towards larger values.
+ * Instead, this code is arranged so that 0.5 will be rounded up or down at
+ * alternate pixel locations (a simple ordered dither pattern).
+ */
+
+METHODDEF(void)
+h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int inrow;
+  JDIMENSION outcol;
+  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
+  register JSAMPROW inptr, outptr;
+  register int bias;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, output_cols * 2);
+
+  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
+    outptr = output_data[inrow];
+    inptr = input_data[inrow];
+    bias = 0;			/* bias = 0,1,0,1,... for successive samples */
+    for (outcol = 0; outcol < output_cols; outcol++) {
+      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1])
+			      + bias) >> 1);
+      bias ^= 1;		/* 0=>1, 1=>0 */
+      inptr += 2;
+    }
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
+ * without smoothing.
+ */
+
+METHODDEF(void)
+h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int inrow, outrow;
+  JDIMENSION outcol;
+  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
+  register JSAMPROW inptr0, inptr1, outptr;
+  register int bias;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, output_cols * 2);
+
+  inrow = outrow = 0;
+  while (inrow < cinfo->max_v_samp_factor) {
+    outptr = output_data[outrow];
+    inptr0 = input_data[inrow];
+    inptr1 = input_data[inrow+1];
+    bias = 1;			/* bias = 1,2,1,2,... for successive samples */
+    for (outcol = 0; outcol < output_cols; outcol++) {
+      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+			      GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1])
+			      + bias) >> 2);
+      bias ^= 3;		/* 1=>2, 2=>1 */
+      inptr0 += 2; inptr1 += 2;
+    }
+    inrow += 2;
+    outrow++;
+  }
+}
+
+
+#ifdef INPUT_SMOOTHING_SUPPORTED
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
+ * with smoothing.  One row of context is required.
+ */
+
+METHODDEF(void)
+h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+			JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int inrow, outrow;
+  JDIMENSION colctr;
+  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
+  register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr;
+  INT32 membersum, neighsum, memberscale, neighscale;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
+		    cinfo->image_width, output_cols * 2);
+
+  /* We don't bother to form the individual "smoothed" input pixel values;
+   * we can directly compute the output which is the average of the four
+   * smoothed values.  Each of the four member pixels contributes a fraction
+   * (1-8*SF) to its own smoothed image and a fraction SF to each of the three
+   * other smoothed pixels, therefore a total fraction (1-5*SF)/4 to the final
+   * output.  The four corner-adjacent neighbor pixels contribute a fraction
+   * SF to just one smoothed pixel, or SF/4 to the final output; while the
+   * eight edge-adjacent neighbors contribute SF to each of two smoothed
+   * pixels, or SF/2 overall.  In order to use integer arithmetic, these
+   * factors are scaled by 2^16 = 65536.
+   * Also recall that SF = smoothing_factor / 1024.
+   */
+
+  memberscale = 16384 - cinfo->smoothing_factor * 80; /* scaled (1-5*SF)/4 */
+  neighscale = cinfo->smoothing_factor * 16; /* scaled SF/4 */
+
+  inrow = outrow = 0;
+  while (inrow < cinfo->max_v_samp_factor) {
+    outptr = output_data[outrow];
+    inptr0 = input_data[inrow];
+    inptr1 = input_data[inrow+1];
+    above_ptr = input_data[inrow-1];
+    below_ptr = input_data[inrow+2];
+
+    /* Special case for first column: pretend column -1 is same as column 0 */
+    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+	       GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) +
+	       GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]);
+    neighsum += neighsum;
+    neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) +
+		GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]);
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+    inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
+
+    for (colctr = output_cols - 2; colctr > 0; colctr--) {
+      /* sum of pixels directly mapped to this output element */
+      membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+		  GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+      /* sum of edge-neighbor pixels */
+      neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+		 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+		 GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) +
+		 GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]);
+      /* The edge-neighbors count twice as much as corner-neighbors */
+      neighsum += neighsum;
+      /* Add in the corner-neighbors */
+      neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) +
+		  GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]);
+      /* form final output scaled up by 2^16 */
+      membersum = membersum * memberscale + neighsum * neighscale;
+      /* round, descale and output it */
+      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+      inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
+    }
+
+    /* Special case for last column */
+    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+	       GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) +
+	       GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]);
+    neighsum += neighsum;
+    neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) +
+		GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]);
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
+
+    inrow += 2;
+    outrow++;
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the special case of a full-size component,
+ * with smoothing.  One row of context is required.
+ */
+
+METHODDEF(void)
+fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
+			    JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int inrow;
+  JDIMENSION colctr;
+  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
+  register JSAMPROW inptr, above_ptr, below_ptr, outptr;
+  INT32 membersum, neighsum, memberscale, neighscale;
+  int colsum, lastcolsum, nextcolsum;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
+		    cinfo->image_width, output_cols);
+
+  /* Each of the eight neighbor pixels contributes a fraction SF to the
+   * smoothed pixel, while the main pixel contributes (1-8*SF).  In order
+   * to use integer arithmetic, these factors are multiplied by 2^16 = 65536.
+   * Also recall that SF = smoothing_factor / 1024.
+   */
+
+  memberscale = 65536L - cinfo->smoothing_factor * 512L; /* scaled 1-8*SF */
+  neighscale = cinfo->smoothing_factor * 64; /* scaled SF */
+
+  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
+    outptr = output_data[inrow];
+    inptr = input_data[inrow];
+    above_ptr = input_data[inrow-1];
+    below_ptr = input_data[inrow+1];
+
+    /* Special case for first column */
+    colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) +
+	     GETJSAMPLE(*inptr);
+    membersum = GETJSAMPLE(*inptr++);
+    nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
+		 GETJSAMPLE(*inptr);
+    neighsum = colsum + (colsum - membersum) + nextcolsum;
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+    lastcolsum = colsum; colsum = nextcolsum;
+
+    for (colctr = output_cols - 2; colctr > 0; colctr--) {
+      membersum = GETJSAMPLE(*inptr++);
+      above_ptr++; below_ptr++;
+      nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
+		   GETJSAMPLE(*inptr);
+      neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
+      membersum = membersum * memberscale + neighsum * neighscale;
+      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+      lastcolsum = colsum; colsum = nextcolsum;
+    }
+
+    /* Special case for last column */
+    membersum = GETJSAMPLE(*inptr);
+    neighsum = lastcolsum + (colsum - membersum) + colsum;
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
+
+  }
+}
+
+#endif /* INPUT_SMOOTHING_SUPPORTED */
+
+
+/*
+ * Module initialization routine for downsampling.
+ * Note that we must select a routine for each component.
+ */
+
+GLOBAL(void)
+jinit_downsampler (j_compress_ptr cinfo)
+{
+  my_downsample_ptr downsample;
+  int ci;
+  jpeg_component_info * compptr;
+  boolean smoothok = TRUE;
+  int h_in_group, v_in_group, h_out_group, v_out_group;
+
+  downsample = (my_downsample_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_downsampler));
+  cinfo->downsample = &downsample->pub;
+  downsample->pub.start_pass = start_pass_downsample;
+  downsample->pub.downsample = sep_downsample;
+  downsample->pub.need_context_rows = FALSE;
+
+  if (cinfo->CCIR601_sampling)
+    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
+
+  /* Verify we can handle the sampling factors, and set up method pointers */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Compute size of an "output group" for DCT scaling.  This many samples
+     * are to be converted from max_h_samp_factor * max_v_samp_factor pixels.
+     */
+    h_out_group = (compptr->h_samp_factor * compptr->DCT_h_scaled_size) /
+		  cinfo->min_DCT_h_scaled_size;
+    v_out_group = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+		  cinfo->min_DCT_v_scaled_size;
+    h_in_group = cinfo->max_h_samp_factor;
+    v_in_group = cinfo->max_v_samp_factor;
+    downsample->rowgroup_height[ci] = v_out_group; /* save for use later */
+    if (h_in_group == h_out_group && v_in_group == v_out_group) {
+#ifdef INPUT_SMOOTHING_SUPPORTED
+      if (cinfo->smoothing_factor) {
+	downsample->methods[ci] = fullsize_smooth_downsample;
+	downsample->pub.need_context_rows = TRUE;
+      } else
+#endif
+	downsample->methods[ci] = fullsize_downsample;
+    } else if (h_in_group == h_out_group * 2 &&
+	       v_in_group == v_out_group) {
+      smoothok = FALSE;
+      downsample->methods[ci] = h2v1_downsample;
+    } else if (h_in_group == h_out_group * 2 &&
+	       v_in_group == v_out_group * 2) {
+#ifdef INPUT_SMOOTHING_SUPPORTED
+      if (cinfo->smoothing_factor) {
+	downsample->methods[ci] = h2v2_smooth_downsample;
+	downsample->pub.need_context_rows = TRUE;
+      } else
+#endif
+	downsample->methods[ci] = h2v2_downsample;
+    } else if ((h_in_group % h_out_group) == 0 &&
+	       (v_in_group % v_out_group) == 0) {
+      smoothok = FALSE;
+      downsample->methods[ci] = int_downsample;
+      downsample->h_expand[ci] = (UINT8) (h_in_group / h_out_group);
+      downsample->v_expand[ci] = (UINT8) (v_in_group / v_out_group);
+    } else
+      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
+  }
+
+#ifdef INPUT_SMOOTHING_SUPPORTED
+  if (cinfo->smoothing_factor && !smoothok)
+    TRACEMS(cinfo, 0, JTRC_SMOOTH_NOTIMPL);
+#endif
+}
diff --git a/cmake/externals/libjpeg/jctrans.c b/cmake/externals/libjpeg/jctrans.c
new file mode 100644
index 000000000..261dd2996
--- /dev/null
+++ b/cmake/externals/libjpeg/jctrans.c
@@ -0,0 +1,399 @@
+/*
+ * jctrans.c
+ *
+ * Copyright (C) 1995-1998, Thomas G. Lane.
+ * Modified 2000-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains library routines for transcoding compression,
+ * that is, writing raw DCT coefficient arrays to an output JPEG file.
+ * The routines in jcapimin.c will also be needed by a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Forward declarations */
+LOCAL(void) transencode_master_selection
+	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
+LOCAL(void) transencode_coef_controller
+	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
+
+
+/*
+ * Compression initialization for writing raw-coefficient data.
+ * Before calling this, all parameters and a data destination must be set up.
+ * Call jpeg_finish_compress() to actually write the data.
+ *
+ * The number of passed virtual arrays must match cinfo->num_components.
+ * Note that the virtual arrays need not be filled or even realized at
+ * the time write_coefficients is called; indeed, if the virtual arrays
+ * were requested from this compression object's memory manager, they
+ * typically will be realized during this routine and filled afterwards.
+ */
+
+GLOBAL(void)
+jpeg_write_coefficients (j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays)
+{
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Mark all tables to be written */
+  jpeg_suppress_tables(cinfo, FALSE);
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Perform master selection of active modules */
+  transencode_master_selection(cinfo, coef_arrays);
+  /* Wait for jpeg_finish_compress() call */
+  cinfo->next_scanline = 0;	/* so jpeg_write_marker works */
+  cinfo->global_state = CSTATE_WRCOEFS;
+}
+
+
+/*
+ * Initialize the compression object with default parameters,
+ * then copy from the source object all parameters needed for lossless
+ * transcoding.  Parameters that can be varied without loss (such as
+ * scan script and Huffman optimization) are left in their default states.
+ */
+
+GLOBAL(void)
+jpeg_copy_critical_parameters (j_decompress_ptr srcinfo,
+			       j_compress_ptr dstinfo)
+{
+  JQUANT_TBL ** qtblptr;
+  jpeg_component_info *incomp, *outcomp;
+  JQUANT_TBL *c_quant, *slot_quant;
+  int tblno, ci, coefi;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (dstinfo->global_state != CSTATE_START)
+    ERREXIT1(dstinfo, JERR_BAD_STATE, dstinfo->global_state);
+  /* Copy fundamental image dimensions */
+  dstinfo->image_width = srcinfo->image_width;
+  dstinfo->image_height = srcinfo->image_height;
+  dstinfo->input_components = srcinfo->num_components;
+  dstinfo->in_color_space = srcinfo->jpeg_color_space;
+  dstinfo->jpeg_width = srcinfo->output_width;
+  dstinfo->jpeg_height = srcinfo->output_height;
+  dstinfo->min_DCT_h_scaled_size = srcinfo->min_DCT_h_scaled_size;
+  dstinfo->min_DCT_v_scaled_size = srcinfo->min_DCT_v_scaled_size;
+  /* Initialize all parameters to default values */
+  jpeg_set_defaults(dstinfo);
+  /* jpeg_set_defaults may choose wrong colorspace, eg YCbCr if input is RGB.
+   * Fix it to get the right header markers for the image colorspace.
+   * Note: Entropy table assignment in jpeg_set_colorspace
+   * depends on color_transform.
+   * Adaption is also required for setting the appropriate
+   * entropy coding mode dependent on image data precision.
+   */
+  dstinfo->color_transform = srcinfo->color_transform;
+  jpeg_set_colorspace(dstinfo, srcinfo->jpeg_color_space);
+  dstinfo->data_precision = srcinfo->data_precision;
+  dstinfo->arith_code = srcinfo->data_precision > 8 ? TRUE : FALSE;
+  dstinfo->CCIR601_sampling = srcinfo->CCIR601_sampling;
+  /* Copy the source's quantization tables. */
+  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
+    if (srcinfo->quant_tbl_ptrs[tblno] != NULL) {
+      qtblptr = & dstinfo->quant_tbl_ptrs[tblno];
+      if (*qtblptr == NULL)
+	*qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo);
+      MEMCOPY((*qtblptr)->quantval,
+	      srcinfo->quant_tbl_ptrs[tblno]->quantval,
+	      SIZEOF((*qtblptr)->quantval));
+      (*qtblptr)->sent_table = FALSE;
+    }
+  }
+  /* Copy the source's per-component info.
+   * Note we assume jpeg_set_defaults has allocated the dest comp_info array.
+   */
+  dstinfo->num_components = srcinfo->num_components;
+  if (dstinfo->num_components < 1 || dstinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(dstinfo, JERR_COMPONENT_COUNT, dstinfo->num_components,
+	     MAX_COMPONENTS);
+  for (ci = 0, incomp = srcinfo->comp_info, outcomp = dstinfo->comp_info;
+       ci < dstinfo->num_components; ci++, incomp++, outcomp++) {
+    outcomp->component_id = incomp->component_id;
+    outcomp->h_samp_factor = incomp->h_samp_factor;
+    outcomp->v_samp_factor = incomp->v_samp_factor;
+    outcomp->quant_tbl_no = incomp->quant_tbl_no;
+    /* Make sure saved quantization table for component matches the qtable
+     * slot.  If not, the input file re-used this qtable slot.
+     * IJG encoder currently cannot duplicate this.
+     */
+    tblno = outcomp->quant_tbl_no;
+    if (tblno < 0 || tblno >= NUM_QUANT_TBLS ||
+	srcinfo->quant_tbl_ptrs[tblno] == NULL)
+      ERREXIT1(dstinfo, JERR_NO_QUANT_TABLE, tblno);
+    slot_quant = srcinfo->quant_tbl_ptrs[tblno];
+    c_quant = incomp->quant_table;
+    if (c_quant != NULL) {
+      for (coefi = 0; coefi < DCTSIZE2; coefi++) {
+	if (c_quant->quantval[coefi] != slot_quant->quantval[coefi])
+	  ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno);
+      }
+    }
+    /* Note: we do not copy the source's entropy table assignments;
+     * instead we rely on jpeg_set_colorspace to have made a suitable choice.
+     */
+  }
+  /* Also copy JFIF version and resolution information, if available.
+   * Strictly speaking this isn't "critical" info, but it's nearly
+   * always appropriate to copy it if available.  In particular,
+   * if the application chooses to copy JFIF 1.02 extension markers from
+   * the source file, we need to copy the version to make sure we don't
+   * emit a file that has 1.02 extensions but a claimed version of 1.01.
+   */
+  if (srcinfo->saw_JFIF_marker) {
+    if (srcinfo->JFIF_major_version == 1 ||
+	srcinfo->JFIF_major_version == 2) {
+      dstinfo->JFIF_major_version = srcinfo->JFIF_major_version;
+      dstinfo->JFIF_minor_version = srcinfo->JFIF_minor_version;
+    }
+    dstinfo->density_unit = srcinfo->density_unit;
+    dstinfo->X_density = srcinfo->X_density;
+    dstinfo->Y_density = srcinfo->Y_density;
+  }
+}
+
+
+LOCAL(void)
+jpeg_calc_trans_dimensions (j_compress_ptr cinfo)
+/* Do computations that are needed before master selection phase */
+{
+  if (cinfo->min_DCT_h_scaled_size != cinfo->min_DCT_v_scaled_size)
+    ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
+	     cinfo->min_DCT_h_scaled_size, cinfo->min_DCT_v_scaled_size);
+
+  cinfo->block_size = cinfo->min_DCT_h_scaled_size;
+}
+
+
+/*
+ * Master selection of compression modules for transcoding.
+ * This substitutes for jcinit.c's initialization of the full compressor.
+ */
+
+LOCAL(void)
+transencode_master_selection (j_compress_ptr cinfo,
+			      jvirt_barray_ptr * coef_arrays)
+{
+  /* Do computations that are needed before master selection phase */
+  jpeg_calc_trans_dimensions(cinfo);
+
+  /* Initialize master control (includes parameter checking/processing) */
+  jinit_c_master_control(cinfo, TRUE /* transcode only */);
+
+  /* Entropy encoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code)
+    jinit_arith_encoder(cinfo);
+  else {
+    jinit_huff_encoder(cinfo);
+  }
+
+  /* We need a special coefficient buffer controller. */
+  transencode_coef_controller(cinfo, coef_arrays);
+
+  jinit_marker_writer(cinfo);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Write the datastream header (SOI, JFIF) immediately.
+   * Frame and scan headers are postponed till later.
+   * This lets application insert special markers after the SOI.
+   */
+  (*cinfo->marker->write_file_header) (cinfo);
+}
+
+
+/*
+ * The rest of this file is a special implementation of the coefficient
+ * buffer controller.  This is similar to jccoefct.c, but it handles only
+ * output from presupplied virtual arrays.  Furthermore, we generate any
+ * dummy padding blocks on-the-fly rather than expecting them to be present
+ * in the arrays.
+ */
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_coef_controller pub; /* public fields */
+
+  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
+  JDIMENSION MCU_ctr;		/* counts MCUs processed in current row */
+  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+
+  /* Virtual block array for each component. */
+  jvirt_barray_ptr * whole_image;
+
+  /* Workspace for constructing dummy blocks at right/bottom edges. */
+  JBLOCK dummy_buffer[C_MAX_BLOCKS_IN_MCU];
+} my_coef_controller;
+
+typedef my_coef_controller * my_coef_ptr;
+
+
+LOCAL(void)
+start_iMCU_row (j_compress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row */
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->MCU_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  if (pass_mode != JBUF_CRANK_DEST)
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  coef->iMCU_row_num = 0;
+  start_iMCU_row(cinfo);
+}
+
+
+/*
+ * Process some data.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the scan.
+ * The data is obtained from the virtual arrays and fed to the entropy coder.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf is ignored; it is likely to be a NULL pointer.
+ */
+
+METHODDEF(boolean)
+compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, ci, xindex, yindex, yoffset, blockcnt;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan. */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, FALSE);
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
+	 MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+						: compptr->last_col_width;
+	start_col = MCU_col_num * compptr->MCU_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  if (coef->iMCU_row_num < last_iMCU_row ||
+	      yoffset + yindex < compptr->last_row_height) {
+	    /* Fill in pointers to real blocks in this row */
+	    buffer_ptr = buffer[ci][yoffset + yindex] + start_col;
+	    xindex = blockcnt;
+	    do {
+	      MCU_buffer[blkn++] = buffer_ptr++;
+	    } while (--xindex);
+	    /* Dummy blocks at right edge */
+	    if ((xindex = compptr->MCU_width - blockcnt) == 0)
+	      continue;
+	  } else {
+	    /* At bottom of image, need a whole row of dummy blocks */
+	    xindex = compptr->MCU_width;
+	  }
+	  /* Fill in any dummy blocks needed in this row.
+	   * Dummy blocks are filled in the same way as in jccoefct.c:
+	   * all zeroes in the AC entries, DC entries equal to previous
+	   * block's DC value.  The init routine has already zeroed the
+	   * AC entries, so we need only set the DC entries correctly.
+	   */
+	  buffer_ptr = coef->dummy_buffer + blkn;
+	  do {
+	    buffer_ptr[0][0] = MCU_buffer[blkn-1][0][0];
+	    MCU_buffer[blkn++] = buffer_ptr++;
+	  } while (--xindex);
+	}
+      }
+      /* Try to write the MCU. */
+      if (! (*cinfo->entropy->encode_mcu) (cinfo, MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->MCU_ctr = MCU_col_num;
+	return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Initialize coefficient buffer controller.
+ *
+ * Each passed coefficient array must be the right size for that
+ * coefficient: width_in_blocks wide and height_in_blocks high,
+ * with unitheight at least v_samp_factor.
+ */
+
+LOCAL(void)
+transencode_coef_controller (j_compress_ptr cinfo,
+			     jvirt_barray_ptr * coef_arrays)
+{
+  my_coef_ptr coef;
+
+  coef = (my_coef_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_coef_controller));
+  cinfo->coef = &coef->pub;
+  coef->pub.start_pass = start_pass_coef;
+  coef->pub.compress_data = compress_output;
+
+  /* Save pointer to virtual arrays */
+  coef->whole_image = coef_arrays;
+
+  /* Pre-zero space for dummy DCT blocks */
+  MEMZERO(coef->dummy_buffer, SIZEOF(coef->dummy_buffer));
+}
diff --git a/cmake/externals/libjpeg/jdapimin.c b/cmake/externals/libjpeg/jdapimin.c
new file mode 100644
index 000000000..785e52722
--- /dev/null
+++ b/cmake/externals/libjpeg/jdapimin.c
@@ -0,0 +1,412 @@
+/*
+ * jdapimin.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * Modified 2009-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the decompression half
+ * of the JPEG library.  These are the "minimum" API routines that may be
+ * needed in either the normal full-decompression case or the
+ * transcoding-only case.
+ *
+ * Most of the routines intended to be called directly by an application
+ * are in this file or in jdapistd.c.  But also see jcomapi.c for routines
+ * shared by compression and decompression, and jdtrans.c for the transcoding
+ * case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Initialization of a JPEG decompression object.
+ * The error manager must already be set up (in case memory manager fails).
+ */
+
+GLOBAL(void)
+jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, size_t structsize)
+{
+  int i;
+
+  /* Guard against version mismatches between library and caller. */
+  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
+  if (version != JPEG_LIB_VERSION)
+    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
+  if (structsize != SIZEOF(struct jpeg_decompress_struct))
+    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
+	     (int) SIZEOF(struct jpeg_decompress_struct), (int) structsize);
+
+  /* For debugging purposes, we zero the whole master structure.
+   * But the application has already set the err pointer, and may have set
+   * client_data, so we have to save and restore those fields.
+   * Note: if application hasn't set client_data, tools like Purify may
+   * complain here.
+   */
+  {
+    struct jpeg_error_mgr * err = cinfo->err;
+    void * client_data = cinfo->client_data; /* ignore Purify complaint here */
+    MEMZERO(cinfo, SIZEOF(struct jpeg_decompress_struct));
+    cinfo->err = err;
+    cinfo->client_data = client_data;
+  }
+  cinfo->is_decompressor = TRUE;
+
+  /* Initialize a memory manager instance for this object */
+  jinit_memory_mgr((j_common_ptr) cinfo);
+
+  /* Zero out pointers to permanent structures. */
+  cinfo->progress = NULL;
+  cinfo->src = NULL;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++)
+    cinfo->quant_tbl_ptrs[i] = NULL;
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    cinfo->dc_huff_tbl_ptrs[i] = NULL;
+    cinfo->ac_huff_tbl_ptrs[i] = NULL;
+  }
+
+  /* Initialize marker processor so application can override methods
+   * for COM, APPn markers before calling jpeg_read_header.
+   */
+  cinfo->marker_list = NULL;
+  jinit_marker_reader(cinfo);
+
+  /* And initialize the overall input controller. */
+  jinit_input_controller(cinfo);
+
+  /* OK, I'm ready */
+  cinfo->global_state = DSTATE_START;
+}
+
+
+/*
+ * Destruction of a JPEG decompression object
+ */
+
+GLOBAL(void)
+jpeg_destroy_decompress (j_decompress_ptr cinfo)
+{
+  jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Abort processing of a JPEG decompression operation,
+ * but don't destroy the object itself.
+ */
+
+GLOBAL(void)
+jpeg_abort_decompress (j_decompress_ptr cinfo)
+{
+  jpeg_abort((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Set default decompression parameters.
+ */
+
+LOCAL(void)
+default_decompress_parms (j_decompress_ptr cinfo)
+{
+  int cid0, cid1, cid2, cid3;
+
+  /* Guess the input colorspace, and set output colorspace accordingly. */
+  /* Note application may override our guesses. */
+  switch (cinfo->num_components) {
+  case 1:
+    cinfo->jpeg_color_space = JCS_GRAYSCALE;
+    cinfo->out_color_space = JCS_GRAYSCALE;
+    break;
+
+  case 3:
+    cid0 = cinfo->comp_info[0].component_id;
+    cid1 = cinfo->comp_info[1].component_id;
+    cid2 = cinfo->comp_info[2].component_id;
+
+    /* For robust detection of standard colorspaces
+     * regardless of the presence of special markers,
+     * check component IDs from SOF marker first.
+     */
+    if      (cid0 == 0x01 && cid1 == 0x02 && cid2 == 0x03)
+      cinfo->jpeg_color_space = JCS_YCbCr;
+    else if (cid0 == 0x01 && cid1 == 0x22 && cid2 == 0x23)
+      cinfo->jpeg_color_space = JCS_BG_YCC;
+    else if (cid0 == 0x52 && cid1 == 0x47 && cid2 == 0x42)
+      cinfo->jpeg_color_space = JCS_RGB;	/* ASCII 'R', 'G', 'B' */
+    else if (cid0 == 0x72 && cid1 == 0x67 && cid2 == 0x62)
+      cinfo->jpeg_color_space = JCS_BG_RGB;	/* ASCII 'r', 'g', 'b' */
+    else if (cinfo->saw_JFIF_marker)
+      cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
+    else if (cinfo->saw_Adobe_marker) {
+      switch (cinfo->Adobe_transform) {
+      case 0:
+	cinfo->jpeg_color_space = JCS_RGB;
+	break;
+      case 1:
+	cinfo->jpeg_color_space = JCS_YCbCr;
+	break;
+      default:
+	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+	cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
+      }
+    } else {
+      TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
+      cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
+    }
+    /* Always guess RGB is proper output colorspace. */
+    cinfo->out_color_space = JCS_RGB;
+    break;
+
+  case 4:
+    cid0 = cinfo->comp_info[0].component_id;
+    cid1 = cinfo->comp_info[1].component_id;
+    cid2 = cinfo->comp_info[2].component_id;
+    cid3 = cinfo->comp_info[3].component_id;
+
+    /* For robust detection of standard colorspaces
+     * regardless of the presence of special markers,
+     * check component IDs from SOF marker first.
+     */
+    if      (cid0 == 0x01 && cid1 == 0x02 && cid2 == 0x03 && cid3 == 0x04)
+      cinfo->jpeg_color_space = JCS_YCCK;
+    else if (cid0 == 0x43 && cid1 == 0x4D && cid2 == 0x59 && cid3 == 0x4B)
+      cinfo->jpeg_color_space = JCS_CMYK;   /* ASCII 'C', 'M', 'Y', 'K' */
+    else if (cinfo->saw_Adobe_marker) {
+      switch (cinfo->Adobe_transform) {
+      case 0:
+	cinfo->jpeg_color_space = JCS_CMYK;
+	break;
+      case 2:
+	cinfo->jpeg_color_space = JCS_YCCK;
+	break;
+      default:
+	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+	cinfo->jpeg_color_space = JCS_YCCK;	/* assume it's YCCK */
+      }
+    } else {
+      /* Unknown IDs and no special markers, assume straight CMYK. */
+      cinfo->jpeg_color_space = JCS_CMYK;
+    }
+    cinfo->out_color_space = JCS_CMYK;
+    break;
+
+  default:
+    cinfo->jpeg_color_space = JCS_UNKNOWN;
+    cinfo->out_color_space = JCS_UNKNOWN;
+  }
+
+  /* Set defaults for other decompression parameters. */
+  cinfo->scale_num = cinfo->block_size;		/* 1:1 scaling */
+  cinfo->scale_denom = cinfo->block_size;
+  cinfo->output_gamma = 1.0;
+  cinfo->buffered_image = FALSE;
+  cinfo->raw_data_out = FALSE;
+  cinfo->dct_method = JDCT_DEFAULT;
+  cinfo->do_fancy_upsampling = TRUE;
+  cinfo->do_block_smoothing = TRUE;
+  cinfo->quantize_colors = FALSE;
+  /* We set these in case application only sets quantize_colors. */
+  cinfo->dither_mode = JDITHER_FS;
+#ifdef QUANT_2PASS_SUPPORTED
+  cinfo->two_pass_quantize = TRUE;
+#else
+  cinfo->two_pass_quantize = FALSE;
+#endif
+  cinfo->desired_number_of_colors = 256;
+  cinfo->colormap = NULL;
+  /* Initialize for no mode change in buffered-image mode. */
+  cinfo->enable_1pass_quant = FALSE;
+  cinfo->enable_external_quant = FALSE;
+  cinfo->enable_2pass_quant = FALSE;
+}
+
+
+/*
+ * Decompression startup: read start of JPEG datastream to see what's there.
+ * Need only initialize JPEG object and supply a data source before calling.
+ *
+ * This routine will read as far as the first SOS marker (ie, actual start of
+ * compressed data), and will save all tables and parameters in the JPEG
+ * object.  It will also initialize the decompression parameters to default
+ * values, and finally return JPEG_HEADER_OK.  On return, the application may
+ * adjust the decompression parameters and then call jpeg_start_decompress.
+ * (Or, if the application only wanted to determine the image parameters,
+ * the data need not be decompressed.  In that case, call jpeg_abort or
+ * jpeg_destroy to release any temporary space.)
+ * If an abbreviated (tables only) datastream is presented, the routine will
+ * return JPEG_HEADER_TABLES_ONLY upon reaching EOI.  The application may then
+ * re-use the JPEG object to read the abbreviated image datastream(s).
+ * It is unnecessary (but OK) to call jpeg_abort in this case.
+ * The JPEG_SUSPENDED return code only occurs if the data source module
+ * requests suspension of the decompressor.  In this case the application
+ * should load more source data and then re-call jpeg_read_header to resume
+ * processing.
+ * If a non-suspending data source is used and require_image is TRUE, then the
+ * return code need not be inspected since only JPEG_HEADER_OK is possible.
+ *
+ * This routine is now just a front end to jpeg_consume_input, with some
+ * extra error checking.
+ */
+
+GLOBAL(int)
+jpeg_read_header (j_decompress_ptr cinfo, boolean require_image)
+{
+  int retcode;
+
+  if (cinfo->global_state != DSTATE_START &&
+      cinfo->global_state != DSTATE_INHEADER)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  retcode = jpeg_consume_input(cinfo);
+
+  switch (retcode) {
+  case JPEG_REACHED_SOS:
+    retcode = JPEG_HEADER_OK;
+    break;
+  case JPEG_REACHED_EOI:
+    if (require_image)		/* Complain if application wanted an image */
+      ERREXIT(cinfo, JERR_NO_IMAGE);
+    /* Reset to start state; it would be safer to require the application to
+     * call jpeg_abort, but we can't change it now for compatibility reasons.
+     * A side effect is to free any temporary memory (there shouldn't be any).
+     */
+    jpeg_abort((j_common_ptr) cinfo); /* sets state = DSTATE_START */
+    retcode = JPEG_HEADER_TABLES_ONLY;
+    break;
+  case JPEG_SUSPENDED:
+    /* no work */
+    break;
+  }
+
+  return retcode;
+}
+
+
+/*
+ * Consume data in advance of what the decompressor requires.
+ * This can be called at any time once the decompressor object has
+ * been created and a data source has been set up.
+ *
+ * This routine is essentially a state machine that handles a couple
+ * of critical state-transition actions, namely initial setup and
+ * transition from header scanning to ready-for-start_decompress.
+ * All the actual input is done via the input controller's consume_input
+ * method.
+ */
+
+GLOBAL(int)
+jpeg_consume_input (j_decompress_ptr cinfo)
+{
+  int retcode = JPEG_SUSPENDED;
+
+  /* NB: every possible DSTATE value should be listed in this switch */
+  switch (cinfo->global_state) {
+  case DSTATE_START:
+    /* Start-of-datastream actions: reset appropriate modules */
+    (*cinfo->inputctl->reset_input_controller) (cinfo);
+    /* Initialize application's data source module */
+    (*cinfo->src->init_source) (cinfo);
+    cinfo->global_state = DSTATE_INHEADER;
+    /*FALLTHROUGH*/
+  case DSTATE_INHEADER:
+    retcode = (*cinfo->inputctl->consume_input) (cinfo);
+    if (retcode == JPEG_REACHED_SOS) { /* Found SOS, prepare to decompress */
+      /* Set up default parameters based on header data */
+      default_decompress_parms(cinfo);
+      /* Set global state: ready for start_decompress */
+      cinfo->global_state = DSTATE_READY;
+    }
+    break;
+  case DSTATE_READY:
+    /* Can't advance past first SOS until start_decompress is called */
+    retcode = JPEG_REACHED_SOS;
+    break;
+  case DSTATE_PRELOAD:
+  case DSTATE_PRESCAN:
+  case DSTATE_SCANNING:
+  case DSTATE_RAW_OK:
+  case DSTATE_BUFIMAGE:
+  case DSTATE_BUFPOST:
+  case DSTATE_STOPPING:
+    retcode = (*cinfo->inputctl->consume_input) (cinfo);
+    break;
+  default:
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  return retcode;
+}
+
+
+/*
+ * Have we finished reading the input file?
+ */
+
+GLOBAL(boolean)
+jpeg_input_complete (j_decompress_ptr cinfo)
+{
+  /* Check for valid jpeg object */
+  if (cinfo->global_state < DSTATE_START ||
+      cinfo->global_state > DSTATE_STOPPING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return cinfo->inputctl->eoi_reached;
+}
+
+
+/*
+ * Is there more than one scan?
+ */
+
+GLOBAL(boolean)
+jpeg_has_multiple_scans (j_decompress_ptr cinfo)
+{
+  /* Only valid after jpeg_read_header completes */
+  if (cinfo->global_state < DSTATE_READY ||
+      cinfo->global_state > DSTATE_STOPPING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return cinfo->inputctl->has_multiple_scans;
+}
+
+
+/*
+ * Finish JPEG decompression.
+ *
+ * This will normally just verify the file trailer and release temp storage.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_finish_decompress (j_decompress_ptr cinfo)
+{
+  if ((cinfo->global_state == DSTATE_SCANNING ||
+       cinfo->global_state == DSTATE_RAW_OK) && ! cinfo->buffered_image) {
+    /* Terminate final pass of non-buffered mode */
+    if (cinfo->output_scanline < cinfo->output_height)
+      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
+    (*cinfo->master->finish_output_pass) (cinfo);
+    cinfo->global_state = DSTATE_STOPPING;
+  } else if (cinfo->global_state == DSTATE_BUFIMAGE) {
+    /* Finishing after a buffered-image operation */
+    cinfo->global_state = DSTATE_STOPPING;
+  } else if (cinfo->global_state != DSTATE_STOPPING) {
+    /* STOPPING = repeat call after a suspension, anything else is error */
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  /* Read until EOI */
+  while (! cinfo->inputctl->eoi_reached) {
+    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
+      return FALSE;		/* Suspend, come back later */
+  }
+  /* Do final cleanup */
+  (*cinfo->src->term_source) (cinfo);
+  /* We can use jpeg_abort to release memory and reset global_state */
+  jpeg_abort((j_common_ptr) cinfo);
+  return TRUE;
+}
diff --git a/cmake/externals/libjpeg/jdapistd.c b/cmake/externals/libjpeg/jdapistd.c
new file mode 100644
index 000000000..7f3a78b25
--- /dev/null
+++ b/cmake/externals/libjpeg/jdapistd.c
@@ -0,0 +1,276 @@
+/*
+ * jdapistd.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2002-2013 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the decompression half
+ * of the JPEG library.  These are the "standard" API routines that are
+ * used in the normal full-decompression case.  They are not used by a
+ * transcoding-only application.  Note that if an application links in
+ * jpeg_start_decompress, it will end up linking in the entire decompressor.
+ * We thus must separate this file from jdapimin.c to avoid linking the
+ * whole decompression library into a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Forward declarations */
+LOCAL(boolean) output_pass_setup JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * Decompression initialization.
+ * jpeg_read_header must be completed before calling this.
+ *
+ * If a multipass operating mode was selected, this will do all but the
+ * last pass, and thus may take a great deal of time.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_start_decompress (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    /* First call: initialize master control, select active modules */
+    jinit_master_decompress(cinfo);
+    if (cinfo->buffered_image) {
+      /* No more work here; expecting jpeg_start_output next */
+      cinfo->global_state = DSTATE_BUFIMAGE;
+      return TRUE;
+    }
+    cinfo->global_state = DSTATE_PRELOAD;
+  }
+  if (cinfo->global_state == DSTATE_PRELOAD) {
+    /* If file has multiple scans, absorb them all into the coef buffer */
+    if (cinfo->inputctl->has_multiple_scans) {
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+      for (;;) {
+	int retcode;
+	/* Call progress monitor hook if present */
+	if (cinfo->progress != NULL)
+	  (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+	/* Absorb some more input */
+	retcode = (*cinfo->inputctl->consume_input) (cinfo);
+	if (retcode == JPEG_SUSPENDED)
+	  return FALSE;
+	if (retcode == JPEG_REACHED_EOI)
+	  break;
+	/* Advance progress counter if appropriate */
+	if (cinfo->progress != NULL &&
+	    (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+	  if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+	    /* jdmaster underestimated number of scans; ratchet up one scan */
+	    cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+	  }
+	}
+      }
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+    }
+    cinfo->output_scan_number = cinfo->input_scan_number;
+  } else if (cinfo->global_state != DSTATE_PRESCAN)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Perform any dummy output passes, and set up for the final pass */
+  return output_pass_setup(cinfo);
+}
+
+
+/*
+ * Set up for an output pass, and perform any dummy pass(es) needed.
+ * Common subroutine for jpeg_start_decompress and jpeg_start_output.
+ * Entry: global_state = DSTATE_PRESCAN only if previously suspended.
+ * Exit: If done, returns TRUE and sets global_state for proper output mode.
+ *       If suspended, returns FALSE and sets global_state = DSTATE_PRESCAN.
+ */
+
+LOCAL(boolean)
+output_pass_setup (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state != DSTATE_PRESCAN) {
+    /* First call: do pass setup */
+    (*cinfo->master->prepare_for_output_pass) (cinfo);
+    cinfo->output_scanline = 0;
+    cinfo->global_state = DSTATE_PRESCAN;
+  }
+  /* Loop over any required dummy passes */
+  while (cinfo->master->is_dummy_pass) {
+#ifdef QUANT_2PASS_SUPPORTED
+    /* Crank through the dummy pass */
+    while (cinfo->output_scanline < cinfo->output_height) {
+      JDIMENSION last_scanline;
+      /* Call progress monitor hook if present */
+      if (cinfo->progress != NULL) {
+	cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+	cinfo->progress->pass_limit = (long) cinfo->output_height;
+	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      }
+      /* Process some data */
+      last_scanline = cinfo->output_scanline;
+      (*cinfo->main->process_data) (cinfo, (JSAMPARRAY) NULL,
+				    &cinfo->output_scanline, (JDIMENSION) 0);
+      if (cinfo->output_scanline == last_scanline)
+	return FALSE;		/* No progress made, must suspend */
+    }
+    /* Finish up dummy pass, and set up for another one */
+    (*cinfo->master->finish_output_pass) (cinfo);
+    (*cinfo->master->prepare_for_output_pass) (cinfo);
+    cinfo->output_scanline = 0;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* QUANT_2PASS_SUPPORTED */
+  }
+  /* Ready for application to drive output pass through
+   * jpeg_read_scanlines or jpeg_read_raw_data.
+   */
+  cinfo->global_state = cinfo->raw_data_out ? DSTATE_RAW_OK : DSTATE_SCANNING;
+  return TRUE;
+}
+
+
+/*
+ * Read some scanlines of data from the JPEG decompressor.
+ *
+ * The return value will be the number of lines actually read.
+ * This may be less than the number requested in several cases,
+ * including bottom of image, data source suspension, and operating
+ * modes that emit multiple scanlines at a time.
+ *
+ * Note: we warn about excess calls to jpeg_read_scanlines() since
+ * this likely signals an application programmer error.  However,
+ * an oversize buffer (max_lines > scanlines remaining) is not an error.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_read_scanlines (j_decompress_ptr cinfo, JSAMPARRAY scanlines,
+		     JDIMENSION max_lines)
+{
+  JDIMENSION row_ctr;
+
+  if (cinfo->global_state != DSTATE_SCANNING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->output_scanline >= cinfo->output_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->output_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Process some data */
+  row_ctr = 0;
+  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, max_lines);
+  cinfo->output_scanline += row_ctr;
+  return row_ctr;
+}
+
+
+/*
+ * Alternate entry point to read raw data.
+ * Processes exactly one iMCU row per call, unless suspended.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data,
+		    JDIMENSION max_lines)
+{
+  JDIMENSION lines_per_iMCU_row;
+
+  if (cinfo->global_state != DSTATE_RAW_OK)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->output_scanline >= cinfo->output_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->output_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Verify that at least one iMCU row can be returned. */
+  lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_v_scaled_size;
+  if (max_lines < lines_per_iMCU_row)
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* Decompress directly into user's buffer. */
+  if (! (*cinfo->coef->decompress_data) (cinfo, data))
+    return 0;			/* suspension forced, can do nothing more */
+
+  /* OK, we processed one iMCU row. */
+  cinfo->output_scanline += lines_per_iMCU_row;
+  return lines_per_iMCU_row;
+}
+
+
+/* Additional entry points for buffered-image mode. */
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Initialize for an output pass in buffered-image mode.
+ */
+
+GLOBAL(boolean)
+jpeg_start_output (j_decompress_ptr cinfo, int scan_number)
+{
+  if (cinfo->global_state != DSTATE_BUFIMAGE &&
+      cinfo->global_state != DSTATE_PRESCAN)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Limit scan number to valid range */
+  if (scan_number <= 0)
+    scan_number = 1;
+  if (cinfo->inputctl->eoi_reached &&
+      scan_number > cinfo->input_scan_number)
+    scan_number = cinfo->input_scan_number;
+  cinfo->output_scan_number = scan_number;
+  /* Perform any dummy output passes, and set up for the real pass */
+  return output_pass_setup(cinfo);
+}
+
+
+/*
+ * Finish up after an output pass in buffered-image mode.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_finish_output (j_decompress_ptr cinfo)
+{
+  if ((cinfo->global_state == DSTATE_SCANNING ||
+       cinfo->global_state == DSTATE_RAW_OK) && cinfo->buffered_image) {
+    /* Terminate this pass. */
+    /* We do not require the whole pass to have been completed. */
+    (*cinfo->master->finish_output_pass) (cinfo);
+    cinfo->global_state = DSTATE_BUFPOST;
+  } else if (cinfo->global_state != DSTATE_BUFPOST) {
+    /* BUFPOST = repeat call after a suspension, anything else is error */
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  /* Read markers looking for SOS or EOI */
+  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
+	 ! cinfo->inputctl->eoi_reached) {
+    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
+      return FALSE;		/* Suspend, come back later */
+  }
+  cinfo->global_state = DSTATE_BUFIMAGE;
+  return TRUE;
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jdarith.c b/cmake/externals/libjpeg/jdarith.c
new file mode 100644
index 000000000..2c9abe23f
--- /dev/null
+++ b/cmake/externals/libjpeg/jdarith.c
@@ -0,0 +1,796 @@
+/*
+ * jdarith.c
+ *
+ * Developed 1997-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains portable arithmetic entropy decoding routines for JPEG
+ * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81).
+ *
+ * Both sequential and progressive modes are supported in this single module.
+ *
+ * Suspension is not currently supported in this module.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Expanded entropy decoder object for arithmetic decoding. */
+
+typedef struct {
+  struct jpeg_entropy_decoder pub; /* public fields */
+
+  INT32 c;       /* C register, base of coding interval + input bit buffer */
+  INT32 a;               /* A register, normalized size of coding interval */
+  int ct;     /* bit shift counter, # of bits left in bit buffer part of C */
+                                                         /* init: ct = -16 */
+                                                         /* run: ct = 0..7 */
+                                                         /* error: ct = -1 */
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+  int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
+
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+
+  /* Pointers to statistics areas (these workspaces have image lifespan) */
+  unsigned char * dc_stats[NUM_ARITH_TBLS];
+  unsigned char * ac_stats[NUM_ARITH_TBLS];
+
+  /* Statistics bin for coding with fixed probability 0.5 */
+  unsigned char fixed_bin[4];
+} arith_entropy_decoder;
+
+typedef arith_entropy_decoder * arith_entropy_ptr;
+
+/* The following two definitions specify the allocation chunk size
+ * for the statistics area.
+ * According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
+ * 49 statistics bins for DC, and 245 statistics bins for AC coding.
+ *
+ * We use a compact representation with 1 byte per statistics bin,
+ * thus the numbers directly represent byte sizes.
+ * This 1 byte per statistics bin contains the meaning of the MPS
+ * (more probable symbol) in the highest bit (mask 0x80), and the
+ * index into the probability estimation state machine table
+ * in the lower bits (mask 0x7F).
+ */
+
+#define DC_STAT_BINS 64
+#define AC_STAT_BINS 256
+
+
+LOCAL(int)
+get_byte (j_decompress_ptr cinfo)
+/* Read next input byte; we do not support suspension in this module. */
+{
+  struct jpeg_source_mgr * src = cinfo->src;
+
+  if (src->bytes_in_buffer == 0)
+    if (! (*src->fill_input_buffer) (cinfo))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+  src->bytes_in_buffer--;
+  return GETJOCTET(*src->next_input_byte++);
+}
+
+
+/*
+ * The core arithmetic decoding routine (common in JPEG and JBIG).
+ * This needs to go as fast as possible.
+ * Machine-dependent optimization facilities
+ * are not utilized in this portable implementation.
+ * However, this code should be fairly efficient and
+ * may be a good base for further optimizations anyway.
+ *
+ * Return value is 0 or 1 (binary decision).
+ *
+ * Note: I've changed the handling of the code base & bit
+ * buffer register C compared to other implementations
+ * based on the standards layout & procedures.
+ * While it also contains both the actual base of the
+ * coding interval (16 bits) and the next-bits buffer,
+ * the cut-point between these two parts is floating
+ * (instead of fixed) with the bit shift counter CT.
+ * Thus, we also need only one (variable instead of
+ * fixed size) shift for the LPS/MPS decision, and
+ * we can do away with any renormalization update
+ * of C (except for new data insertion, of course).
+ *
+ * I've also introduced a new scheme for accessing
+ * the probability estimation state machine table,
+ * derived from Markus Kuhn's JBIG implementation.
+ */
+
+LOCAL(int)
+arith_decode (j_decompress_ptr cinfo, unsigned char *st)
+{
+  register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
+  register unsigned char nl, nm;
+  register INT32 qe, temp;
+  register int sv, data;
+
+  /* Renormalization & data input per section D.2.6 */
+  while (e->a < 0x8000L) {
+    if (--e->ct < 0) {
+      /* Need to fetch next data byte */
+      if (cinfo->unread_marker)
+	data = 0;		/* stuff zero data */
+      else {
+	data = get_byte(cinfo);	/* read next input byte */
+	if (data == 0xFF) {	/* zero stuff or marker code */
+	  do data = get_byte(cinfo);
+	  while (data == 0xFF);	/* swallow extra 0xFF bytes */
+	  if (data == 0)
+	    data = 0xFF;	/* discard stuffed zero byte */
+	  else {
+	    /* Note: Different from the Huffman decoder, hitting
+	     * a marker while processing the compressed data
+	     * segment is legal in arithmetic coding.
+	     * The convention is to supply zero data
+	     * then until decoding is complete.
+	     */
+	    cinfo->unread_marker = data;
+	    data = 0;
+	  }
+	}
+      }
+      e->c = (e->c << 8) | data; /* insert data into C register */
+      if ((e->ct += 8) < 0)	 /* update bit shift counter */
+	/* Need more initial bytes */
+	if (++e->ct == 0)
+	  /* Got 2 initial bytes -> re-init A and exit loop */
+	  e->a = 0x8000L; /* => e->a = 0x10000L after loop exit */
+    }
+    e->a <<= 1;
+  }
+
+  /* Fetch values from our compact representation of Table D.3(D.2):
+   * Qe values and probability estimation state machine
+   */
+  sv = *st;
+  qe = jpeg_aritab[sv & 0x7F];	/* => Qe_Value */
+  nl = qe & 0xFF; qe >>= 8;	/* Next_Index_LPS + Switch_MPS */
+  nm = qe & 0xFF; qe >>= 8;	/* Next_Index_MPS */
+
+  /* Decode & estimation procedures per sections D.2.4 & D.2.5 */
+  temp = e->a - qe;
+  e->a = temp;
+  temp <<= e->ct;
+  if (e->c >= temp) {
+    e->c -= temp;
+    /* Conditional LPS (less probable symbol) exchange */
+    if (e->a < qe) {
+      e->a = qe;
+      *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
+    } else {
+      e->a = qe;
+      *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
+      sv ^= 0x80;		/* Exchange LPS/MPS */
+    }
+  } else if (e->a < 0x8000L) {
+    /* Conditional MPS (more probable symbol) exchange */
+    if (e->a < qe) {
+      *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
+      sv ^= 0x80;		/* Exchange LPS/MPS */
+    } else {
+      *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
+    }
+  }
+
+  return sv >> 7;
+}
+
+
+/*
+ * Check for a restart marker & resynchronize decoder.
+ */
+
+LOCAL(void)
+process_restart (j_decompress_ptr cinfo)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  int ci;
+  jpeg_component_info * compptr;
+
+  /* Advance past the RSTn marker */
+  if (! (*cinfo->marker->read_restart_marker) (cinfo))
+    ERREXIT(cinfo, JERR_CANT_SUSPEND);
+
+  /* Re-initialize statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
+      MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
+      /* Reset DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    if ((! cinfo->progressive_mode && cinfo->lim_Se) ||
+	(cinfo->progressive_mode && cinfo->Ss)) {
+      MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
+    }
+  }
+
+  /* Reset arithmetic decoding variables */
+  entropy->c = 0;
+  entropy->a = 0;
+  entropy->ct = -16;	/* force reading 2 initial bytes to fill C */
+
+  /* Reset restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Arithmetic MCU decoding.
+ * Each of these routines decodes and returns one MCU's worth of
+ * arithmetic-compressed coefficients.
+ * The coefficients are reordered from zigzag order into natural array order,
+ * but are not dequantized.
+ *
+ * The i'th block of the MCU is stored into the block pointed to by
+ * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
+ */
+
+/*
+ * MCU decoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int blkn, ci, tbl, sign;
+  int v, m;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
+
+    /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.19: Decode_DC_DIFF */
+    if (arith_decode(cinfo, st) == 0)
+      entropy->dc_context[ci] = 0;
+    else {
+      /* Figure F.21: Decoding nonzero value v */
+      /* Figure F.22: Decoding the sign of v */
+      sign = arith_decode(cinfo, st + 1);
+      st += 2; st += sign;
+      /* Figure F.23: Decoding the magnitude category of v */
+      if ((m = arith_decode(cinfo, st)) != 0) {
+	st = entropy->dc_stats[tbl] + 20;	/* Table F.4: X1 = 20 */
+	while (arith_decode(cinfo, st)) {
+	  if ((m <<= 1) == (int) 0x8000U) {
+	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	    entropy->ct = -1;			/* magnitude overflow */
+	    return TRUE;
+	  }
+	  st += 1;
+	}
+      }
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
+	entropy->dc_context[ci] = 0;		   /* zero diff category */
+      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
+	entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
+      else
+	entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
+      v = m;
+      /* Figure F.24: Decoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+	if (arith_decode(cinfo, st)) v |= m;
+      v += 1; if (sign) v = -v;
+      entropy->last_dc_val[ci] += v;
+    }
+
+    /* Scale and output the DC coefficient (assumes jpeg_natural_order[0]=0) */
+    (*block)[0] = (JCOEF) (entropy->last_dc_val[ci] << cinfo->Al);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int tbl, sign, k;
+  int v, m;
+  const int * natural_order;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
+
+  natural_order = cinfo->natural_order;
+
+  /* There is always only one block per MCU */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
+
+  /* Figure F.20: Decode_AC_coefficients */
+  k = cinfo->Ss - 1;
+  do {
+    st = entropy->ac_stats[tbl] + 3 * k;
+    if (arith_decode(cinfo, st)) break;		/* EOB flag */
+    for (;;) {
+      k++;
+      if (arith_decode(cinfo, st + 1)) break;
+      st += 3;
+      if (k >= cinfo->Se) {
+	WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	entropy->ct = -1;			/* spectral overflow */
+	return TRUE;
+      }
+    }
+    /* Figure F.21: Decoding nonzero value v */
+    /* Figure F.22: Decoding the sign of v */
+    sign = arith_decode(cinfo, entropy->fixed_bin);
+    st += 2;
+    /* Figure F.23: Decoding the magnitude category of v */
+    if ((m = arith_decode(cinfo, st)) != 0) {
+      if (arith_decode(cinfo, st)) {
+	m <<= 1;
+	st = entropy->ac_stats[tbl] +
+	     (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+	while (arith_decode(cinfo, st)) {
+	  if ((m <<= 1) == (int) 0x8000U) {
+	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	    entropy->ct = -1;			/* magnitude overflow */
+	    return TRUE;
+	  }
+	  st += 1;
+	}
+      }
+    }
+    v = m;
+    /* Figure F.24: Decoding the magnitude bit pattern of v */
+    st += 14;
+    while (m >>= 1)
+      if (arith_decode(cinfo, st)) v |= m;
+    v += 1; if (sign) v = -v;
+    /* Scale and output coefficient in natural (dezigzagged) order */
+    (*block)[natural_order[k]] = (JCOEF) (v << cinfo->Al);
+  } while (k < cinfo->Se);
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component,
+ * although the spec is not very clear on the point.
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  unsigned char *st;
+  JCOEF p1;
+  int blkn;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  st = entropy->fixed_bin;	/* use fixed probability estimation */
+  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    /* Encoded data is simply the next bit of the two's-complement DC value */
+    if (arith_decode(cinfo, st))
+      MCU_data[blkn][0][0] |= p1;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  JCOEFPTR thiscoef;
+  unsigned char *st;
+  int tbl, k, kex;
+  JCOEF p1, m1;
+  const int * natural_order;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
+
+  natural_order = cinfo->natural_order;
+
+  /* There is always only one block per MCU */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
+  m1 = -p1;			/* -1 in the bit position being coded */
+
+  /* Establish EOBx (previous stage end-of-block) index */
+  kex = cinfo->Se;
+  do {
+    if ((*block)[natural_order[kex]]) break;
+  } while (--kex);
+
+  k = cinfo->Ss - 1;
+  do {
+    st = entropy->ac_stats[tbl] + 3 * k;
+    if (k >= kex)
+      if (arith_decode(cinfo, st)) break;	/* EOB flag */
+    for (;;) {
+      thiscoef = *block + natural_order[++k];
+      if (*thiscoef) {				/* previously nonzero coef */
+	if (arith_decode(cinfo, st + 2)) {
+	  if (*thiscoef < 0)
+	    *thiscoef += m1;
+	  else
+	    *thiscoef += p1;
+	}
+	break;
+      }
+      if (arith_decode(cinfo, st + 1)) {	/* newly nonzero coef */
+	if (arith_decode(cinfo, entropy->fixed_bin))
+	  *thiscoef = m1;
+	else
+	  *thiscoef = p1;
+	break;
+      }
+      st += 3;
+      if (k >= cinfo->Se) {
+	WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	entropy->ct = -1;			/* spectral overflow */
+	return TRUE;
+      }
+    }
+  } while (k < cinfo->Se);
+
+  return TRUE;
+}
+
+
+/*
+ * Decode one MCU's worth of arithmetic-compressed coefficients.
+ */
+
+METHODDEF(boolean)
+decode_mcu (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  jpeg_component_info * compptr;
+  JBLOCKROW block;
+  unsigned char *st;
+  int blkn, ci, tbl, sign, k;
+  int v, m;
+  const int * natural_order;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
+
+  natural_order = cinfo->natural_order;
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+
+    /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
+
+    tbl = compptr->dc_tbl_no;
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.19: Decode_DC_DIFF */
+    if (arith_decode(cinfo, st) == 0)
+      entropy->dc_context[ci] = 0;
+    else {
+      /* Figure F.21: Decoding nonzero value v */
+      /* Figure F.22: Decoding the sign of v */
+      sign = arith_decode(cinfo, st + 1);
+      st += 2; st += sign;
+      /* Figure F.23: Decoding the magnitude category of v */
+      if ((m = arith_decode(cinfo, st)) != 0) {
+	st = entropy->dc_stats[tbl] + 20;	/* Table F.4: X1 = 20 */
+	while (arith_decode(cinfo, st)) {
+	  if ((m <<= 1) == (int) 0x8000U) {
+	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	    entropy->ct = -1;			/* magnitude overflow */
+	    return TRUE;
+	  }
+	  st += 1;
+	}
+      }
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
+	entropy->dc_context[ci] = 0;		   /* zero diff category */
+      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
+	entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
+      else
+	entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
+      v = m;
+      /* Figure F.24: Decoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+	if (arith_decode(cinfo, st)) v |= m;
+      v += 1; if (sign) v = -v;
+      entropy->last_dc_val[ci] += v;
+    }
+
+    (*block)[0] = (JCOEF) entropy->last_dc_val[ci];
+
+    /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
+
+    if (cinfo->lim_Se == 0) continue;
+    tbl = compptr->ac_tbl_no;
+    k = 0;
+
+    /* Figure F.20: Decode_AC_coefficients */
+    do {
+      st = entropy->ac_stats[tbl] + 3 * k;
+      if (arith_decode(cinfo, st)) break;	/* EOB flag */
+      for (;;) {
+	k++;
+	if (arith_decode(cinfo, st + 1)) break;
+	st += 3;
+	if (k >= cinfo->lim_Se) {
+	  WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	  entropy->ct = -1;			/* spectral overflow */
+	  return TRUE;
+	}
+      }
+      /* Figure F.21: Decoding nonzero value v */
+      /* Figure F.22: Decoding the sign of v */
+      sign = arith_decode(cinfo, entropy->fixed_bin);
+      st += 2;
+      /* Figure F.23: Decoding the magnitude category of v */
+      if ((m = arith_decode(cinfo, st)) != 0) {
+	if (arith_decode(cinfo, st)) {
+	  m <<= 1;
+	  st = entropy->ac_stats[tbl] +
+	       (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+	  while (arith_decode(cinfo, st)) {
+	    if ((m <<= 1) == (int) 0x8000U) {
+	      WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+	      entropy->ct = -1;			/* magnitude overflow */
+	      return TRUE;
+	    }
+	    st += 1;
+	  }
+	}
+      }
+      v = m;
+      /* Figure F.24: Decoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+	if (arith_decode(cinfo, st)) v |= m;
+      v += 1; if (sign) v = -v;
+      (*block)[natural_order[k]] = (JCOEF) v;
+    } while (k < cinfo->lim_Se);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Initialize for an arithmetic-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass (j_decompress_ptr cinfo)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  int ci, tbl;
+  jpeg_component_info * compptr;
+
+  if (cinfo->progressive_mode) {
+    /* Validate progressive scan parameters */
+    if (cinfo->Ss == 0) {
+      if (cinfo->Se != 0)
+	goto bad;
+    } else {
+      /* need not check Ss/Se < 0 since they came from unsigned bytes */
+      if (cinfo->Se < cinfo->Ss || cinfo->Se > cinfo->lim_Se)
+	goto bad;
+      /* AC scans may have only one component */
+      if (cinfo->comps_in_scan != 1)
+	goto bad;
+    }
+    if (cinfo->Ah != 0) {
+      /* Successive approximation refinement scan: must have Al = Ah-1. */
+      if (cinfo->Ah-1 != cinfo->Al)
+	goto bad;
+    }
+    if (cinfo->Al > 13) {	/* need not check for < 0 */
+      bad:
+      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+    }
+    /* Update progression status, and verify that scan order is legal.
+     * Note that inter-scan inconsistencies are treated as warnings
+     * not fatal errors ... not clear if this is right way to behave.
+     */
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
+      int *coef_bit_ptr = & cinfo->coef_bits[cindex][0];
+      if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
+	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
+      for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
+	int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
+	if (cinfo->Ah != expected)
+	  WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
+	coef_bit_ptr[coefi] = cinfo->Al;
+      }
+    }
+    /* Select MCU decoding routine */
+    if (cinfo->Ah == 0) {
+      if (cinfo->Ss == 0)
+	entropy->pub.decode_mcu = decode_mcu_DC_first;
+      else
+	entropy->pub.decode_mcu = decode_mcu_AC_first;
+    } else {
+      if (cinfo->Ss == 0)
+	entropy->pub.decode_mcu = decode_mcu_DC_refine;
+      else
+	entropy->pub.decode_mcu = decode_mcu_AC_refine;
+    }
+  } else {
+    /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
+     * This ought to be an error condition, but we make it a warning.
+     */
+    if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 ||
+	(cinfo->Se < DCTSIZE2 && cinfo->Se != cinfo->lim_Se))
+      WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
+    /* Select MCU decoding routine */
+    entropy->pub.decode_mcu = decode_mcu;
+  }
+
+  /* Allocate & initialize requested statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
+      tbl = compptr->dc_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->dc_stats[tbl] == NULL)
+	entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
+      MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
+      /* Initialize DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    if ((! cinfo->progressive_mode && cinfo->lim_Se) ||
+	(cinfo->progressive_mode && cinfo->Ss)) {
+      tbl = compptr->ac_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->ac_stats[tbl] == NULL)
+	entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
+      MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
+    }
+  }
+
+  /* Initialize arithmetic decoding variables */
+  entropy->c = 0;
+  entropy->a = 0;
+  entropy->ct = -16;	/* force reading 2 initial bytes to fill C */
+
+  /* Initialize restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Finish up at the end of an arithmetic-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass (j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Module initialization routine for arithmetic entropy decoding.
+ */
+
+GLOBAL(void)
+jinit_arith_decoder (j_decompress_ptr cinfo)
+{
+  arith_entropy_ptr entropy;
+  int i;
+
+  entropy = (arith_entropy_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(arith_entropy_decoder));
+  cinfo->entropy = &entropy->pub;
+  entropy->pub.start_pass = start_pass;
+  entropy->pub.finish_pass = finish_pass;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    entropy->dc_stats[i] = NULL;
+    entropy->ac_stats[i] = NULL;
+  }
+
+  /* Initialize index for fixed probability estimation */
+  entropy->fixed_bin[0] = 113;
+
+  if (cinfo->progressive_mode) {
+    /* Create progression status table */
+    int *coef_bit_ptr, ci;
+    cinfo->coef_bits = (int (*)[DCTSIZE2]) (*cinfo->mem->alloc_small)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       cinfo->num_components * DCTSIZE2 * SIZEOF(int));
+    coef_bit_ptr = & cinfo->coef_bits[0][0];
+    for (ci = 0; ci < cinfo->num_components; ci++) 
+      for (i = 0; i < DCTSIZE2; i++)
+	*coef_bit_ptr++ = -1;
+  }
+}
diff --git a/cmake/externals/libjpeg/jdatadst.c b/cmake/externals/libjpeg/jdatadst.c
new file mode 100644
index 000000000..b3b4798ea
--- /dev/null
+++ b/cmake/externals/libjpeg/jdatadst.c
@@ -0,0 +1,263 @@
+/*
+ * jdatadst.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2009-2022 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains compression data destination routines for the case of
+ * emitting JPEG data to memory or to a file (or any stdio stream).
+ * While these routines are sufficient for most applications,
+ * some will want to use a different destination manager.
+ * IMPORTANT: we assume that fwrite() will correctly transcribe an array of
+ * JOCTETs into 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
+extern void * malloc JPP((size_t size));
+extern void free JPP((void *ptr));
+#endif
+
+
+/* Expanded data destination object for stdio output */
+
+#define OUTPUT_BUF_SIZE  4096	/* choose an efficiently fwrite'able size */
+
+typedef struct {
+  struct jpeg_destination_mgr pub; /* public fields */
+
+  FILE * outfile;		/* target stream */
+  JOCTET buffer[OUTPUT_BUF_SIZE]; /* output buffer */
+} my_destination_mgr;
+
+typedef my_destination_mgr * my_dest_ptr;
+
+
+/* Expanded data destination object for memory output */
+
+typedef struct {
+  struct jpeg_destination_mgr pub; /* public fields */
+
+  unsigned char ** outbuffer;	/* target buffer */
+  size_t * outsize;
+  unsigned char * newbuffer;	/* newly allocated buffer */
+  JOCTET * buffer;		/* start of buffer */
+  size_t bufsize;
+} my_mem_destination_mgr;
+
+typedef my_mem_destination_mgr * my_mem_dest_ptr;
+
+
+/*
+ * Initialize destination --- called by jpeg_start_compress
+ * before any data is actually written.
+ */
+
+METHODDEF(void)
+init_destination (j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
+
+  dest->pub.next_output_byte = dest->buffer;
+  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
+}
+
+METHODDEF(void)
+init_mem_destination (j_compress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Empty the output buffer --- called whenever buffer fills up.
+ *
+ * In typical applications, this should write the entire output buffer
+ * (ignoring the current state of next_output_byte & free_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been dumped.
+ *
+ * In applications that need to be able to suspend compression due to output
+ * overrun, a FALSE return indicates that the buffer cannot be emptied now.
+ * In this situation, the compressor will return to its caller (possibly with
+ * an indication that it has not accepted all the supplied scanlines).  The
+ * application should resume compression after it has made more room in the
+ * output buffer.  Note that there are substantial restrictions on the use of
+ * suspension --- see the documentation.
+ *
+ * When suspending, the compressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_output_byte & free_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point will be regenerated after resumption, so do not
+ * write it out when emptying the buffer externally.
+ */
+
+METHODDEF(boolean)
+empty_output_buffer (j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
+
+  if (JFWRITE(dest->outfile, dest->buffer, OUTPUT_BUF_SIZE) !=
+      (size_t) OUTPUT_BUF_SIZE)
+    ERREXIT(cinfo, JERR_FILE_WRITE);
+
+  dest->pub.next_output_byte = dest->buffer;
+  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
+
+  return TRUE;
+}
+
+METHODDEF(boolean)
+empty_mem_output_buffer (j_compress_ptr cinfo)
+{
+  size_t nextsize;
+  JOCTET * nextbuffer;
+  my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
+
+  /* Try to allocate new buffer with double size */
+  nextsize = dest->bufsize * 2;
+  nextbuffer = (JOCTET *) malloc(nextsize);
+
+  if (nextbuffer == NULL)
+    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 11);
+
+  MEMCOPY(nextbuffer, dest->buffer, dest->bufsize);
+
+  if (dest->newbuffer != NULL)
+    free(dest->newbuffer);
+
+  dest->newbuffer = nextbuffer;
+
+  dest->pub.next_output_byte = nextbuffer + dest->bufsize;
+  dest->pub.free_in_buffer = dest->bufsize;
+
+  dest->buffer = nextbuffer;
+  dest->bufsize = nextsize;
+
+  return TRUE;
+}
+
+
+/*
+ * Terminate destination --- called by jpeg_finish_compress
+ * after all data has been written.  Usually needs to flush buffer.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_destination (j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
+  size_t datacount = OUTPUT_BUF_SIZE - dest->pub.free_in_buffer;
+
+  /* Write any data remaining in the buffer */
+  if (datacount > 0) {
+    if (JFWRITE(dest->outfile, dest->buffer, datacount) != datacount)
+      ERREXIT(cinfo, JERR_FILE_WRITE);
+  }
+  JFFLUSH(dest->outfile);
+  /* Make sure we wrote the output file OK */
+  if (JFERROR(dest->outfile))
+    ERREXIT(cinfo, JERR_FILE_WRITE);
+}
+
+METHODDEF(void)
+term_mem_destination (j_compress_ptr cinfo)
+{
+  my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
+
+  *dest->outbuffer = dest->buffer;
+  *dest->outsize = dest->bufsize - dest->pub.free_in_buffer;
+}
+
+
+/*
+ * Prepare for output to a stdio stream.
+ * The caller must have already opened the stream,
+ * and is responsible for closing it after finishing compression.
+ */
+
+GLOBAL(void)
+jpeg_stdio_dest (j_compress_ptr cinfo, FILE * outfile)
+{
+  my_dest_ptr dest;
+
+  /* The destination object is made permanent so that multiple JPEG images
+   * can be written to the same file without re-executing jpeg_stdio_dest.
+   * This makes it dangerous to use this manager and a different destination
+   * manager serially with the same JPEG object, because their private object
+   * sizes may be different.  Caveat programmer.
+   */
+  if (cinfo->dest == NULL) {	/* first time for this JPEG object? */
+    cinfo->dest = (struct jpeg_destination_mgr *) (*cinfo->mem->alloc_small)
+      ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(my_destination_mgr));
+  }
+
+  dest = (my_dest_ptr) cinfo->dest;
+  dest->pub.init_destination = init_destination;
+  dest->pub.empty_output_buffer = empty_output_buffer;
+  dest->pub.term_destination = term_destination;
+  dest->outfile = outfile;
+}
+
+
+/*
+ * Prepare for output to a memory buffer.
+ * The caller may supply an own initial buffer with appropriate size.
+ * Otherwise, or when the actual data output exceeds the given size,
+ * the library adapts the buffer size as necessary.
+ * The standard library functions malloc/free are used for allocating
+ * larger memory, so the buffer is available to the application after
+ * finishing compression, and then the application is responsible for
+ * freeing the requested memory.
+ * Note:  An initial buffer supplied by the caller is expected to be
+ * managed by the application.  The library does not free such buffer
+ * when allocating a larger buffer.
+ */
+
+GLOBAL(void)
+jpeg_mem_dest (j_compress_ptr cinfo,
+	       unsigned char ** outbuffer, size_t * outsize)
+{
+  my_mem_dest_ptr dest;
+
+  if (outbuffer == NULL || outsize == NULL)	/* sanity check */
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* The destination object is made permanent so that multiple JPEG images
+   * can be written to the same buffer without re-executing jpeg_mem_dest.
+   */
+  if (cinfo->dest == NULL) {	/* first time for this JPEG object? */
+    cinfo->dest = (struct jpeg_destination_mgr *) (*cinfo->mem->alloc_small)
+      ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(my_mem_destination_mgr));
+  }
+
+  dest = (my_mem_dest_ptr) cinfo->dest;
+  dest->pub.init_destination = init_mem_destination;
+  dest->pub.empty_output_buffer = empty_mem_output_buffer;
+  dest->pub.term_destination = term_mem_destination;
+  dest->outbuffer = outbuffer;
+  dest->outsize = outsize;
+  dest->newbuffer = NULL;
+
+  if (*outbuffer == NULL || *outsize == 0) {
+    /* Allocate initial buffer */
+    dest->newbuffer = *outbuffer = (unsigned char *) malloc(OUTPUT_BUF_SIZE);
+    if (dest->newbuffer == NULL)
+      ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
+    *outsize = OUTPUT_BUF_SIZE;
+  }
+
+  dest->pub.next_output_byte = dest->buffer = *outbuffer;
+  dest->pub.free_in_buffer = dest->bufsize = *outsize;
+}
diff --git a/cmake/externals/libjpeg/jdatasrc.c b/cmake/externals/libjpeg/jdatasrc.c
new file mode 100644
index 000000000..fd7a1a594
--- /dev/null
+++ b/cmake/externals/libjpeg/jdatasrc.c
@@ -0,0 +1,271 @@
+/*
+ * jdatasrc.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2009-2022 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains decompression data source routines for the case of
+ * reading JPEG data from memory or from a file (or any stdio stream).
+ * While these routines are sufficient for most applications,
+ * some will want to use a different source manager.
+ * IMPORTANT: we assume that fread() will correctly transcribe an array of
+ * JOCTETs from 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+
+/* Expanded data source object for stdio input */
+
+#define INPUT_BUF_SIZE  4096	/* choose an efficiently fread'able size */
+
+typedef struct {
+  struct jpeg_source_mgr pub;	/* public fields */
+
+  FILE * infile;		/* source stream */
+  JOCTET buffer[INPUT_BUF_SIZE]; /* input buffer */
+  boolean start_of_file;	/* have we gotten any data yet? */
+} my_source_mgr;
+
+typedef my_source_mgr * my_src_ptr;
+
+
+/*
+ * Initialize source --- called by jpeg_read_header
+ * before any data is actually read.
+ */
+
+METHODDEF(void)
+init_source (j_decompress_ptr cinfo)
+{
+  my_src_ptr src = (my_src_ptr) cinfo->src;
+
+  /* We reset the empty-input-file flag for each image,
+   * but we don't clear the input buffer.
+   * This is correct behavior for reading a series of images from one source.
+   */
+  src->start_of_file = TRUE;
+}
+
+METHODDEF(void)
+init_mem_source (j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Fill the input buffer --- called whenever buffer is emptied.
+ *
+ * In typical applications, this should read fresh data into the buffer
+ * (ignoring the current state of next_input_byte & bytes_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been reloaded.  It is not necessary to
+ * fill the buffer entirely, only to obtain at least one more byte.
+ *
+ * There is no such thing as an EOF return.  If the end of the file has been
+ * reached, the routine has a choice of ERREXIT() or inserting fake data into
+ * the buffer.  In most cases, generating a warning message and inserting a
+ * fake EOI marker is the best course of action --- this will allow the
+ * decompressor to output however much of the image is there.  However,
+ * the resulting error message is misleading if the real problem is an empty
+ * input file, so we handle that case specially.
+ *
+ * In applications that need to be able to suspend compression due to input
+ * not being available yet, a FALSE return indicates that no more data can be
+ * obtained right now, but more may be forthcoming later.  In this situation,
+ * the decompressor will return to its caller (with an indication of the
+ * number of scanlines it has read, if any).  The application should resume
+ * decompression after it has loaded more data into the input buffer.  Note
+ * that there are substantial restrictions on the use of suspension --- see
+ * the documentation.
+ *
+ * When suspending, the decompressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_input_byte & bytes_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point must be rescanned after resumption, so move it to
+ * the front of the buffer rather than discarding it.
+ */
+
+METHODDEF(boolean)
+fill_input_buffer (j_decompress_ptr cinfo)
+{
+  my_src_ptr src = (my_src_ptr) cinfo->src;
+  size_t nbytes;
+
+  nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE);
+
+  if (nbytes <= 0) {
+    if (src->start_of_file)	/* Treat empty input file as fatal error */
+      ERREXIT(cinfo, JERR_INPUT_EMPTY);
+    WARNMS(cinfo, JWRN_JPEG_EOF);
+    /* Insert a fake EOI marker */
+    src->buffer[0] = (JOCTET) 0xFF;
+    src->buffer[1] = (JOCTET) JPEG_EOI;
+    nbytes = 2;
+  }
+
+  src->pub.next_input_byte = src->buffer;
+  src->pub.bytes_in_buffer = nbytes;
+  src->start_of_file = FALSE;
+
+  return TRUE;
+}
+
+METHODDEF(boolean)
+fill_mem_input_buffer (j_decompress_ptr cinfo)
+{
+  static const JOCTET mybuffer[4] = {
+    (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0
+  };
+
+  /* The whole JPEG data is expected to reside in the supplied memory
+   * buffer, so any request for more data beyond the given buffer size
+   * is treated as an error.
+   */
+  WARNMS(cinfo, JWRN_JPEG_EOF);
+
+  /* Insert a fake EOI marker */
+
+  cinfo->src->next_input_byte = mybuffer;
+  cinfo->src->bytes_in_buffer = 2;
+
+  return TRUE;
+}
+
+
+/*
+ * Skip data --- used to skip over a potentially large amount of
+ * uninteresting data (such as an APPn marker).
+ *
+ * Writers of suspendable-input applications must note that skip_input_data
+ * is not granted the right to give a suspension return.  If the skip extends
+ * beyond the data currently in the buffer, the buffer can be marked empty so
+ * that the next read will cause a fill_input_buffer call that can suspend.
+ * Arranging for additional bytes to be discarded before reloading the input
+ * buffer is the application writer's problem.
+ */
+
+METHODDEF(void)
+skip_input_data (j_decompress_ptr cinfo, long num_bytes)
+{
+  struct jpeg_source_mgr * src = cinfo->src;
+  size_t nbytes;
+
+  /* Just a dumb implementation for now.  Could use fseek() except
+   * it doesn't work on pipes.  Not clear that being smart is worth
+   * any trouble anyway --- large skips are infrequent.
+   */
+  if (num_bytes > 0) {
+    nbytes = (size_t) num_bytes;
+    while (nbytes > src->bytes_in_buffer) {
+      nbytes -= src->bytes_in_buffer;
+      (void) (*src->fill_input_buffer) (cinfo);
+      /* note we assume that fill_input_buffer will never return FALSE,
+       * so suspension need not be handled.
+       */
+    }
+    src->next_input_byte += nbytes;
+    src->bytes_in_buffer -= nbytes;
+  }
+}
+
+
+/*
+ * An additional method that can be provided by data source modules is the
+ * resync_to_restart method for error recovery in the presence of RST markers.
+ * For the moment, this source module just uses the default resync method
+ * provided by the JPEG library.  That method assumes that no backtracking
+ * is possible.
+ */
+
+
+/*
+ * Terminate source --- called by jpeg_finish_decompress
+ * after all data has been read.  Often a no-op.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_source (j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Prepare for input from a stdio stream.
+ * The caller must have already opened the stream,
+ * and is responsible for closing it after finishing decompression.
+ */
+
+GLOBAL(void)
+jpeg_stdio_src (j_decompress_ptr cinfo, FILE * infile)
+{
+  my_src_ptr src;
+
+  /* The source object including the input buffer is made permanent so that
+   * a series of JPEG images can be read from the same file by calling
+   * jpeg_stdio_src only before the first one.  (If we discarded the buffer
+   * at the end of one image, we'd likely lose the start of the next one.)
+   * This makes it unsafe to use this manager and a different source
+   * manager serially with the same JPEG object.  Caveat programmer.
+   */
+  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
+    cinfo->src = (struct jpeg_source_mgr *) (*cinfo->mem->alloc_small)
+      ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(my_source_mgr));
+  }
+
+  src = (my_src_ptr) cinfo->src;
+  src->pub.init_source = init_source;
+  src->pub.fill_input_buffer = fill_input_buffer;
+  src->pub.skip_input_data = skip_input_data;
+  src->pub.resync_to_restart = jpeg_resync_to_restart; /* use default method */
+  src->pub.term_source = term_source;
+  src->infile = infile;
+  src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */
+  src->pub.next_input_byte = NULL; /* until buffer loaded */
+}
+
+
+/*
+ * Prepare for input from a supplied memory buffer.
+ * The buffer must contain the whole JPEG data.
+ */
+
+GLOBAL(void)
+jpeg_mem_src (j_decompress_ptr cinfo,
+	      const unsigned char * inbuffer, size_t insize)
+{
+  struct jpeg_source_mgr * src;
+
+  if (inbuffer == NULL || insize == 0)	/* Treat empty input as fatal error */
+    ERREXIT(cinfo, JERR_INPUT_EMPTY);
+
+  /* The source object is made permanent so that a series of JPEG images
+   * can be read from the same buffer by calling jpeg_mem_src only before
+   * the first one.
+   */
+  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
+    cinfo->src = (struct jpeg_source_mgr *) (*cinfo->mem->alloc_small)
+      ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(struct jpeg_source_mgr));
+  }
+
+  src = cinfo->src;
+  src->init_source = init_mem_source;
+  src->fill_input_buffer = fill_mem_input_buffer;
+  src->skip_input_data = skip_input_data;
+  src->resync_to_restart = jpeg_resync_to_restart; /* use default method */
+  src->term_source = term_source;
+  src->bytes_in_buffer = insize;
+  src->next_input_byte = (const JOCTET *) inbuffer;
+}
diff --git a/cmake/externals/libjpeg/jdcoefct.c b/cmake/externals/libjpeg/jdcoefct.c
new file mode 100644
index 000000000..79ba42014
--- /dev/null
+++ b/cmake/externals/libjpeg/jdcoefct.c
@@ -0,0 +1,744 @@
+/*
+ * jdcoefct.c
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * Modified 2002-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the coefficient buffer controller for decompression.
+ * This controller is the top level of the JPEG decompressor proper.
+ * The coefficient buffer lies between entropy decoding and inverse-DCT steps.
+ *
+ * In buffered-image mode, this controller is the interface between
+ * input-oriented processing and output-oriented processing.
+ * Also, the input side (only) is used when reading a file for transcoding.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Block smoothing is only applicable for progressive JPEG, so: */
+#ifndef D_PROGRESSIVE_SUPPORTED
+#undef BLOCK_SMOOTHING_SUPPORTED
+#endif
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_coef_controller pub; /* public fields */
+
+  /* These variables keep track of the current location of the input side. */
+  /* cinfo->input_iMCU_row is also used for this. */
+  JDIMENSION MCU_ctr;		/* counts MCUs processed in current row */
+  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+
+  /* The output side's location is represented by cinfo->output_iMCU_row. */
+
+  /* In single-pass modes, it's sufficient to buffer just one MCU.
+   * We append a workspace of D_MAX_BLOCKS_IN_MCU coefficient blocks,
+   * and let the entropy decoder write into that workspace each time.
+   * In multi-pass modes, this array points to the current MCU's blocks
+   * within the virtual arrays; it is used only by the input side.
+   */
+  JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU];
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+  /* In multi-pass modes, we need a virtual block array for each component. */
+  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
+#endif
+
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  /* When doing block smoothing, we latch coefficient Al values here */
+  int * coef_bits_latch;
+#define SAVED_COEFS  6		/* we save coef_bits[0..5] */
+#endif
+
+  /* Workspace for single-pass modes (omitted otherwise). */
+  JBLOCK blk_buffer[D_MAX_BLOCKS_IN_MCU];
+} my_coef_controller;
+
+typedef my_coef_controller * my_coef_ptr;
+
+
+/* Forward declarations */
+METHODDEF(int) decompress_onepass
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+METHODDEF(int) decompress_data
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#endif
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+LOCAL(boolean) smoothing_ok JPP((j_decompress_ptr cinfo));
+METHODDEF(int) decompress_smooth_data
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#endif
+
+
+LOCAL(void)
+start_iMCU_row (j_decompress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row (input side) */
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->MCU_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for an input processing pass.
+ */
+
+METHODDEF(void)
+start_input_pass (j_decompress_ptr cinfo)
+{
+  cinfo->input_iMCU_row = 0;
+  start_iMCU_row(cinfo);
+}
+
+
+/*
+ * Initialize for an output processing pass.
+ */
+
+METHODDEF(void)
+start_output_pass (j_decompress_ptr cinfo)
+{
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* If multipass, check to see whether to use block smoothing on this pass */
+  if (coef->pub.coef_arrays != NULL) {
+    if (cinfo->do_block_smoothing && smoothing_ok(cinfo))
+      coef->pub.decompress_data = decompress_smooth_data;
+    else
+      coef->pub.decompress_data = decompress_data;
+  }
+#endif
+  cinfo->output_iMCU_row = 0;
+}
+
+
+/*
+ * Decompress and return some data in the single-pass case.
+ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
+ * Input and output must run in lockstep since we have only a one-MCU buffer.
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ *
+ * NB: output_buf contains a plane for each component in image,
+ * which we index according to the component's SOF position.
+ */
+
+METHODDEF(int)
+decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int ci, xindex, yindex, yoffset, useful_width;
+  JBLOCKROW blkp;
+  JSAMPARRAY output_ptr;
+  JDIMENSION start_col, output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+
+  /* Loop to process as much as one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
+	 MCU_col_num++) {
+      blkp = coef->blk_buffer;	/* pointer to current DCT block within MCU */
+      /* Try to fetch an MCU.  Entropy decoder expects buffer to be zeroed. */
+      if (cinfo->lim_Se)	/* can bypass in DC only case */
+	MEMZERO(blkp, cinfo->blocks_in_MCU * SIZEOF(JBLOCK));
+      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->MCU_ctr = MCU_col_num;
+	return JPEG_SUSPENDED;
+      }
+      /* Determine where data should go in output_buf and do the IDCT thing.
+       * We skip dummy blocks at the right and bottom edges (but blkp gets
+       * incremented past them!).
+       */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	/* Don't bother to IDCT an uninteresting component. */
+	if (! compptr->component_needed) {
+	  blkp += compptr->MCU_blocks;
+	  continue;
+	}
+	inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index];
+	output_ptr = output_buf[compptr->component_index] +
+	  yoffset * compptr->DCT_v_scaled_size;
+	useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+						    : compptr->last_col_width;
+	start_col = MCU_col_num * compptr->MCU_sample_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  if (cinfo->input_iMCU_row < last_iMCU_row ||
+	      yoffset + yindex < compptr->last_row_height) {
+	    output_col = start_col;
+	    for (xindex = 0; xindex < useful_width; xindex++) {
+	      (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) (blkp + xindex),
+			      output_ptr, output_col);
+	      output_col += compptr->DCT_h_scaled_size;
+	    }
+	    output_ptr += compptr->DCT_v_scaled_size;
+	  }
+	  blkp += compptr->MCU_width;
+	}
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  cinfo->output_iMCU_row++;
+  if (++(cinfo->input_iMCU_row) <= last_iMCU_row) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
+
+/*
+ * Dummy consume-input routine for single-pass operation.
+ */
+
+METHODDEF(int)
+dummy_consume_data (j_decompress_ptr cinfo)
+{
+  return JPEG_SUSPENDED;	/* Always indicate nothing was done */
+}
+
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Consume input data and store it in the full-image coefficient buffer.
+ * We read as much as one fully interleaved MCU row ("iMCU" row) per call,
+ * ie, v_samp_factor block rows for each component in the scan.
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ */
+
+METHODDEF(int)
+consume_data (j_decompress_ptr cinfo)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  int ci, xindex, yindex, yoffset;
+  JDIMENSION start_col;
+  JBLOCKARRAY blkp;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan. */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       cinfo->input_iMCU_row * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, TRUE);
+    /* Note: entropy decoder expects buffer to be zeroed,
+     * but this is handled automatically by the memory manager
+     * because we requested a pre-zeroed array.
+     */
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkp = coef->MCU_buffer;	/* pointer to current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	start_col = MCU_col_num * compptr->MCU_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  buffer_ptr = buffer[ci][yoffset + yindex] + start_col;
+	  xindex = compptr->MCU_width;
+	  do {
+	    *blkp++ = buffer_ptr++;
+	  } while (--xindex);
+	}
+      }
+      /* Try to fetch the MCU. */
+      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->MCU_ctr = MCU_col_num;
+	return JPEG_SUSPENDED;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
+
+/*
+ * Decompress and return some data in the multi-pass case.
+ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ *
+ * NB: output_buf contains a plane for each component in image.
+ */
+
+METHODDEF(int)
+decompress_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION block_num;
+  int ci, block_row, block_rows;
+  JBLOCKARRAY buffer;
+  JBLOCKROW buffer_ptr;
+  JSAMPARRAY output_ptr;
+  JDIMENSION output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+
+  /* Force some input to be done if we are getting ahead of the input. */
+  while (cinfo->input_scan_number < cinfo->output_scan_number ||
+	 (cinfo->input_scan_number == cinfo->output_scan_number &&
+	  cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) {
+    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
+      return JPEG_SUSPENDED;
+  }
+
+  /* OK, output from the virtual arrays. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Don't bother to IDCT an uninteresting component. */
+    if (! compptr->component_needed)
+      continue;
+    /* Align the virtual buffer for this component. */
+    buffer = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[ci],
+       cinfo->output_iMCU_row * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, FALSE);
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (cinfo->output_iMCU_row < last_iMCU_row)
+      block_rows = compptr->v_samp_factor;
+    else {
+      /* NB: can't use last_row_height here; it is input-side-dependent! */
+      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+    }
+    inverse_DCT = cinfo->idct->inverse_DCT[ci];
+    output_ptr = output_buf[ci];
+    /* Loop over all DCT blocks to be processed. */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      buffer_ptr = buffer[block_row];
+      output_col = 0;
+      for (block_num = 0; block_num < compptr->width_in_blocks; block_num++) {
+	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr,
+			output_ptr, output_col);
+	buffer_ptr++;
+	output_col += compptr->DCT_h_scaled_size;
+      }
+      output_ptr += compptr->DCT_v_scaled_size;
+    }
+  }
+
+  if (++(cinfo->output_iMCU_row) <= last_iMCU_row)
+    return JPEG_ROW_COMPLETED;
+  return JPEG_SCAN_COMPLETED;
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+
+
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+
+/*
+ * This code applies interblock smoothing as described by section K.8
+ * of the JPEG standard: the first 5 AC coefficients are estimated from
+ * the DC values of a DCT block and its 8 neighboring blocks.
+ * We apply smoothing only for progressive JPEG decoding, and only if
+ * the coefficients it can estimate are not yet known to full precision.
+ */
+
+/* Natural-order array positions of the first 5 zigzag-order coefficients */
+#define Q01_POS  1
+#define Q10_POS  8
+#define Q20_POS  16
+#define Q11_POS  9
+#define Q02_POS  2
+
+/*
+ * Determine whether block smoothing is applicable and safe.
+ * We also latch the current states of the coef_bits[] entries for the
+ * AC coefficients; otherwise, if the input side of the decompressor
+ * advances into a new scan, we might think the coefficients are known
+ * more accurately than they really are.
+ */
+
+LOCAL(boolean)
+smoothing_ok (j_decompress_ptr cinfo)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  boolean smoothing_useful = FALSE;
+  int ci, coefi;
+  jpeg_component_info *compptr;
+  JQUANT_TBL * qtable;
+  int * coef_bits;
+  int * coef_bits_latch;
+
+  if (! cinfo->progressive_mode || cinfo->coef_bits == NULL)
+    return FALSE;
+
+  /* Allocate latch area if not already done */
+  if (coef->coef_bits_latch == NULL)
+    coef->coef_bits_latch = (int *) (*cinfo->mem->alloc_small)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       cinfo->num_components * (SAVED_COEFS * SIZEOF(int)));
+  coef_bits_latch = coef->coef_bits_latch;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* All components' quantization values must already be latched. */
+    if ((qtable = compptr->quant_table) == NULL)
+      return FALSE;
+    /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */
+    if (qtable->quantval[0] == 0 ||
+	qtable->quantval[Q01_POS] == 0 ||
+	qtable->quantval[Q10_POS] == 0 ||
+	qtable->quantval[Q20_POS] == 0 ||
+	qtable->quantval[Q11_POS] == 0 ||
+	qtable->quantval[Q02_POS] == 0)
+      return FALSE;
+    /* DC values must be at least partly known for all components. */
+    coef_bits = cinfo->coef_bits[ci];
+    if (coef_bits[0] < 0)
+      return FALSE;
+    /* Block smoothing is helpful if some AC coefficients remain inaccurate. */
+    for (coefi = 1; coefi <= 5; coefi++) {
+      coef_bits_latch[coefi] = coef_bits[coefi];
+      if (coef_bits[coefi] != 0)
+	smoothing_useful = TRUE;
+    }
+    coef_bits_latch += SAVED_COEFS;
+  }
+
+  return smoothing_useful;
+}
+
+
+/*
+ * Variant of decompress_data for use when doing block smoothing.
+ */
+
+METHODDEF(int)
+decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION block_num, last_block_column;
+  int ci, block_row, block_rows, access_rows;
+  JBLOCKARRAY buffer;
+  JBLOCKROW buffer_ptr, prev_block_row, next_block_row;
+  JSAMPARRAY output_ptr;
+  JDIMENSION output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+  boolean first_row, last_row;
+  JBLOCK workspace;
+  int *coef_bits;
+  JQUANT_TBL *quanttbl;
+  INT32 Q00,Q01,Q02,Q10,Q11,Q20, num;
+  int DC1,DC2,DC3,DC4,DC5,DC6,DC7,DC8,DC9;
+  int Al, pred;
+
+  /* Force some input to be done if we are getting ahead of the input. */
+  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
+	 ! cinfo->inputctl->eoi_reached) {
+    if (cinfo->input_scan_number == cinfo->output_scan_number) {
+      /* If input is working on current scan, we ordinarily want it to
+       * have completed the current row.  But if input scan is DC,
+       * we want it to keep one row ahead so that next block row's DC
+       * values are up to date.
+       */
+      JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0;
+      if (cinfo->input_iMCU_row > cinfo->output_iMCU_row+delta)
+	break;
+    }
+    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
+      return JPEG_SUSPENDED;
+  }
+
+  /* OK, output from the virtual arrays. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Don't bother to IDCT an uninteresting component. */
+    if (! compptr->component_needed)
+      continue;
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (cinfo->output_iMCU_row < last_iMCU_row) {
+      block_rows = compptr->v_samp_factor;
+      access_rows = block_rows * 2; /* this and next iMCU row */
+      last_row = FALSE;
+    } else {
+      /* NB: can't use last_row_height here; it is input-side-dependent! */
+      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+      access_rows = block_rows; /* this iMCU row only */
+      last_row = TRUE;
+    }
+    /* Align the virtual buffer for this component. */
+    if (cinfo->output_iMCU_row > 0) {
+      access_rows += compptr->v_samp_factor; /* prior iMCU row too */
+      buffer = (*cinfo->mem->access_virt_barray)
+	((j_common_ptr) cinfo, coef->whole_image[ci],
+	 (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
+	 (JDIMENSION) access_rows, FALSE);
+      buffer += compptr->v_samp_factor;	/* point to current iMCU row */
+      first_row = FALSE;
+    } else {
+      buffer = (*cinfo->mem->access_virt_barray)
+	((j_common_ptr) cinfo, coef->whole_image[ci],
+	 (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE);
+      first_row = TRUE;
+    }
+    /* Fetch component-dependent info */
+    coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
+    quanttbl = compptr->quant_table;
+    Q00 = quanttbl->quantval[0];
+    Q01 = quanttbl->quantval[Q01_POS];
+    Q10 = quanttbl->quantval[Q10_POS];
+    Q20 = quanttbl->quantval[Q20_POS];
+    Q11 = quanttbl->quantval[Q11_POS];
+    Q02 = quanttbl->quantval[Q02_POS];
+    inverse_DCT = cinfo->idct->inverse_DCT[ci];
+    output_ptr = output_buf[ci];
+    /* Loop over all DCT blocks to be processed. */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      buffer_ptr = buffer[block_row];
+      if (first_row && block_row == 0)
+	prev_block_row = buffer_ptr;
+      else
+	prev_block_row = buffer[block_row-1];
+      if (last_row && block_row == block_rows-1)
+	next_block_row = buffer_ptr;
+      else
+	next_block_row = buffer[block_row+1];
+      /* We fetch the surrounding DC values using a sliding-register approach.
+       * Initialize all nine here so as to do the right thing on narrow pics.
+       */
+      DC1 = DC2 = DC3 = (int) prev_block_row[0][0];
+      DC4 = DC5 = DC6 = (int) buffer_ptr[0][0];
+      DC7 = DC8 = DC9 = (int) next_block_row[0][0];
+      output_col = 0;
+      last_block_column = compptr->width_in_blocks - 1;
+      for (block_num = 0; block_num <= last_block_column; block_num++) {
+	/* Fetch current DCT block into workspace so we can modify it. */
+	jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1);
+	/* Update DC values */
+	if (block_num < last_block_column) {
+	  DC3 = (int) prev_block_row[1][0];
+	  DC6 = (int) buffer_ptr[1][0];
+	  DC9 = (int) next_block_row[1][0];
+	}
+	/* Compute coefficient estimates per K.8.
+	 * An estimate is applied only if coefficient is still zero,
+	 * and is not known to be fully accurate.
+	 */
+	/* AC01 */
+	if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) {
+	  num = 36 * Q00 * (DC4 - DC6);
+	  if (num >= 0) {
+	    pred = (int) (((Q01<<7) + num) / (Q01<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q01<<7) - num) / (Q01<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[1] = (JCOEF) pred;
+	}
+	/* AC10 */
+	if ((Al=coef_bits[2]) != 0 && workspace[8] == 0) {
+	  num = 36 * Q00 * (DC2 - DC8);
+	  if (num >= 0) {
+	    pred = (int) (((Q10<<7) + num) / (Q10<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q10<<7) - num) / (Q10<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[8] = (JCOEF) pred;
+	}
+	/* AC20 */
+	if ((Al=coef_bits[3]) != 0 && workspace[16] == 0) {
+	  num = 9 * Q00 * (DC2 + DC8 - 2*DC5);
+	  if (num >= 0) {
+	    pred = (int) (((Q20<<7) + num) / (Q20<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q20<<7) - num) / (Q20<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[16] = (JCOEF) pred;
+	}
+	/* AC11 */
+	if ((Al=coef_bits[4]) != 0 && workspace[9] == 0) {
+	  num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
+	  if (num >= 0) {
+	    pred = (int) (((Q11<<7) + num) / (Q11<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q11<<7) - num) / (Q11<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[9] = (JCOEF) pred;
+	}
+	/* AC02 */
+	if ((Al=coef_bits[5]) != 0 && workspace[2] == 0) {
+	  num = 9 * Q00 * (DC4 + DC6 - 2*DC5);
+	  if (num >= 0) {
+	    pred = (int) (((Q02<<7) + num) / (Q02<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q02<<7) - num) / (Q02<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[2] = (JCOEF) pred;
+	}
+	/* OK, do the IDCT */
+	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) workspace,
+			output_ptr, output_col);
+	/* Advance for next column */
+	DC1 = DC2; DC2 = DC3;
+	DC4 = DC5; DC5 = DC6;
+	DC7 = DC8; DC8 = DC9;
+	buffer_ptr++, prev_block_row++, next_block_row++;
+	output_col += compptr->DCT_h_scaled_size;
+      }
+      output_ptr += compptr->DCT_v_scaled_size;
+    }
+  }
+
+  if (++(cinfo->output_iMCU_row) <= last_iMCU_row)
+    return JPEG_ROW_COMPLETED;
+  return JPEG_SCAN_COMPLETED;
+}
+
+#endif /* BLOCK_SMOOTHING_SUPPORTED */
+
+
+/*
+ * Initialize coefficient buffer controller.
+ */
+
+GLOBAL(void)
+jinit_d_coef_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_coef_ptr coef;
+
+  if (need_full_buffer) {
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+    /* Allocate a full-image virtual array for each component, */
+    /* padded to a multiple of samp_factor DCT blocks in each direction. */
+    /* Note we ask for a pre-zeroed array. */
+    int ci, access_rows;
+    jpeg_component_info *compptr;
+
+    coef = (my_coef_ptr) (*cinfo->mem->alloc_small)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       SIZEOF(my_coef_controller) - SIZEOF(coef->blk_buffer));
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      access_rows = compptr->v_samp_factor;
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+      /* If block smoothing could be used, need a bigger window */
+      if (cinfo->progressive_mode)
+	access_rows *= 3;
+#endif
+      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE,
+	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
+				(long) compptr->h_samp_factor),
+	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+				(long) compptr->v_samp_factor),
+	 (JDIMENSION) access_rows);
+    }
+    coef->pub.consume_data = consume_data;
+    coef->pub.decompress_data = decompress_data;
+    coef->pub.coef_arrays = coef->whole_image; /* link to virtual arrays */
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    /* We only need a single-MCU buffer. */
+    JBLOCKARRAY blkp;
+    JBLOCKROW buffer_ptr;
+    int bi;
+
+    coef = (my_coef_ptr) (*cinfo->mem->alloc_small)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_coef_controller));
+    buffer_ptr = coef->blk_buffer;
+    if (cinfo->lim_Se == 0)	/* DC only case: want to bypass later */
+      MEMZERO(buffer_ptr, SIZEOF(coef->blk_buffer));
+    blkp = coef->MCU_buffer;
+    bi = D_MAX_BLOCKS_IN_MCU;
+    do {
+      *blkp++ = buffer_ptr++;
+    } while (--bi);
+    coef->pub.consume_data = dummy_consume_data;
+    coef->pub.decompress_data = decompress_onepass;
+    coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
+  }
+
+  coef->pub.start_input_pass = start_input_pass;
+  coef->pub.start_output_pass = start_output_pass;
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  coef->coef_bits_latch = NULL;
+#endif
+  cinfo->coef = &coef->pub;
+}
diff --git a/cmake/externals/libjpeg/jdcolor.c b/cmake/externals/libjpeg/jdcolor.c
new file mode 100644
index 000000000..6b40fb534
--- /dev/null
+++ b/cmake/externals/libjpeg/jdcolor.c
@@ -0,0 +1,769 @@
+/*
+ * jdcolor.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2011-2023 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains output colorspace conversion routines.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+#if RANGE_BITS < 2
+  /* Deliberate syntax err */
+  Sorry, this code requires 2 or more range extension bits.
+#endif
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_deconverter pub; /* public fields */
+
+  /* Private state for YCbCr->RGB and BG_YCC->RGB conversion */
+  int * Cr_r_tab;		/* => table for Cr to R conversion */
+  int * Cb_b_tab;		/* => table for Cb to B conversion */
+  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
+  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
+
+  /* Private state for RGB->Y conversion */
+  INT32 * R_y_tab;		/* => table for R to Y conversion */
+  INT32 * G_y_tab;		/* => table for G to Y conversion */
+  INT32 * B_y_tab;		/* => table for B to Y conversion */
+} my_color_deconverter;
+
+typedef my_color_deconverter * my_cconvert_ptr;
+
+
+/***************  YCbCr -> RGB conversion: most common case **************/
+/*************** BG_YCC -> RGB conversion: less common case **************/
+/***************    RGB -> Y   conversion: less common case **************/
+
+/*
+ * YCbCr is defined per Recommendation ITU-R BT.601-7 (03/2011),
+ * previously known as Recommendation CCIR 601-1, except that Cb and Cr
+ * are normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * sRGB (standard RGB color space) is defined per IEC 61966-2-1:1999.
+ * sYCC (standard luma-chroma-chroma color space with extended gamut)
+ * is defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex F.
+ * bg-sRGB and bg-sYCC (big gamut standard color spaces)
+ * are defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex G.
+ * Note that the derived conversion coefficients given in some of these
+ * documents are imprecise.  The general conversion equations are
+ *
+ *	R = Y + K * (1 - Kr) * Cr
+ *	G = Y - K * (Kb * (1 - Kb) * Cb + Kr * (1 - Kr) * Cr) / (1 - Kr - Kb)
+ *	B = Y + K * (1 - Kb) * Cb
+ *
+ *	Y = Kr * R + (1 - Kr - Kb) * G + Kb * B
+ *
+ * With Kr = 0.299 and Kb = 0.114 (derived according to SMPTE RP 177-1993
+ * from the 1953 FCC NTSC primaries and CIE Illuminant C), K = 2 for sYCC,
+ * the conversion equations to be implemented are therefore
+ *
+ *	R = Y + 1.402 * Cr
+ *	G = Y - 0.344136286 * Cb - 0.714136286 * Cr
+ *	B = Y + 1.772 * Cb
+ *
+ *	Y = 0.299 * R + 0.587 * G + 0.114 * B
+ *
+ * where Cb and Cr represent the incoming values less CENTERJSAMPLE.
+ * For bg-sYCC, with K = 4, the equations are
+ *
+ *	R = Y + 2.804 * Cr
+ *	G = Y - 0.688272572 * Cb - 1.428272572 * Cr
+ *	B = Y + 3.544 * Cb
+ *
+ * To avoid floating-point arithmetic, we represent the fractional constants
+ * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
+ * the products by 2^16, with appropriate rounding, to get the correct answer.
+ * Notice that Y, being an integral input, does not contribute any fraction
+ * so it need not participate in the rounding.
+ *
+ * For even more speed, we avoid doing any multiplications in the inner loop
+ * by precalculating the constants times Cb and Cr for all possible values.
+ * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
+ * for 9-bit to 12-bit samples it is still acceptable.  It's not very
+ * reasonable for 16-bit samples, but if you want lossless storage
+ * you shouldn't be changing colorspace anyway.
+ * The Cr=>R and Cb=>B values can be rounded to integers in advance;
+ * the values for the G calculation are left scaled up,
+ * since we must add them together before rounding.
+ */
+
+#define SCALEBITS	16	/* speediest right-shift on some machines */
+#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+
+/*
+ * Initialize tables for YCbCr->RGB and BG_YCC->RGB colorspace conversion.
+ */
+
+LOCAL(void)
+build_ycc_rgb_table (j_decompress_ptr cinfo)
+/* Normal case, sYCC */
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  cconvert->Cr_r_tab = (int *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cb_b_tab = (int *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cr_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
+  cconvert->Cb_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 1.402 * x */
+    cconvert->Cr_r_tab[i] = (int) DESCALE(FIX(1.402) * x, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.772 * x */
+    cconvert->Cb_b_tab[i] = (int) DESCALE(FIX(1.772) * x, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.714136286 * x */
+    cconvert->Cr_g_tab[i] = (- FIX(0.714136286)) * x;
+    /* Cb=>G value is scaled-up -0.344136286 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    cconvert->Cb_g_tab[i] = (- FIX(0.344136286)) * x + ONE_HALF;
+  }
+}
+
+
+LOCAL(void)
+build_bg_ycc_rgb_table (j_decompress_ptr cinfo)
+/* Wide gamut case, bg-sYCC */
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  cconvert->Cr_r_tab = (int *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cb_b_tab = (int *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cr_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
+  cconvert->Cb_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 2.804 * x */
+    cconvert->Cr_r_tab[i] = (int) DESCALE(FIX(2.804) * x, SCALEBITS);
+    /* Cb=>B value is nearest int to 3.544 * x */
+    cconvert->Cb_b_tab[i] = (int) DESCALE(FIX(3.544) * x, SCALEBITS);
+    /* Cr=>G value is scaled-up -1.428272572 * x */
+    cconvert->Cr_g_tab[i] = (- FIX(1.428272572)) * x;
+    /* Cb=>G value is scaled-up -0.688272572 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    cconvert->Cb_g_tab[i] = (- FIX(0.688272572)) * x + ONE_HALF;
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the output colorspace.
+ *
+ * Note that we change from noninterleaved, one-plane-per-component format
+ * to interleaved-pixel format.  The output buffer is therefore three times
+ * as wide as the input buffer.
+ *
+ * A starting row offset is provided only for the input buffer.  The caller
+ * can easily adjust the passed output_buf value to accommodate any row
+ * offset required on that side.
+ */
+
+METHODDEF(void)
+ycc_rgb_convert (j_decompress_ptr cinfo,
+		 JSAMPIMAGE input_buf, JDIMENSION input_row,
+		 JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = GETJSAMPLE(inptr0[col]);
+      cb = GETJSAMPLE(inptr1[col]);
+      cr = GETJSAMPLE(inptr2[col]);
+      /* Range-limiting is essential due to noise introduced by DCT losses,
+       * for extended gamut (sYCC) and wide gamut (bg-sYCC) encodings.
+       */
+      outptr[RGB_RED]   = range_limit[y + Crrtab[cr]];
+      outptr[RGB_GREEN] = range_limit[y +
+			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+						 SCALEBITS))];
+      outptr[RGB_BLUE]  = range_limit[y + Cbbtab[cb]];
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/**************** Cases other than YCC -> RGB ****************/
+
+
+/*
+ * Initialize for RGB->grayscale colorspace conversion.
+ */
+
+LOCAL(void)
+build_rgb_y_table (j_decompress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  INT32 i;
+
+  cconvert->R_y_tab = (INT32 *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
+  cconvert->G_y_tab = (INT32 *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
+  cconvert->B_y_tab = (INT32 *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  for (i = 0; i <= MAXJSAMPLE; i++) {
+    cconvert->R_y_tab[i] = FIX(0.299) * i;
+    cconvert->G_y_tab[i] = FIX(0.587) * i;
+    cconvert->B_y_tab[i] = FIX(0.114) * i + ONE_HALF;
+  }
+}
+
+
+/*
+ * Convert RGB to grayscale.
+ */
+
+METHODDEF(void)
+rgb_gray_convert (j_decompress_ptr cinfo,
+		  JSAMPIMAGE input_buf, JDIMENSION input_row,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register INT32 y;
+  register INT32 * Rytab = cconvert->R_y_tab;
+  register INT32 * Gytab = cconvert->G_y_tab;
+  register INT32 * Bytab = cconvert->B_y_tab;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = Rytab[GETJSAMPLE(inptr0[col])];
+      y += Gytab[GETJSAMPLE(inptr1[col])];
+      y += Bytab[GETJSAMPLE(inptr2[col])];
+      outptr[col] = (JSAMPLE) (y >> SCALEBITS);
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the output colorspace.
+ * [R-G,G,B-G] to [R,G,B] conversion with modulo calculation
+ * (inverse color transform).
+ * This can be seen as an adaption of the general YCbCr->RGB
+ * conversion equation with Kr = Kb = 0, while replacing the
+ * normalization by modulo calculation.
+ */
+
+METHODDEF(void)
+rgb1_rgb_convert (j_decompress_ptr cinfo,
+		  JSAMPIMAGE input_buf, JDIMENSION input_row,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  register int r, g, b;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr0[col]);
+      g = GETJSAMPLE(inptr1[col]);
+      b = GETJSAMPLE(inptr2[col]);
+      /* Assume that MAXJSAMPLE+1 is a power of 2, so that the MOD
+       * (modulo) operator is equivalent to the bitmask operator AND.
+       */
+      outptr[RGB_RED]   = (JSAMPLE) ((r + g - CENTERJSAMPLE) & MAXJSAMPLE);
+      outptr[RGB_GREEN] = (JSAMPLE) g;
+      outptr[RGB_BLUE]  = (JSAMPLE) ((b + g - CENTERJSAMPLE) & MAXJSAMPLE);
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * [R-G,G,B-G] to grayscale conversion with modulo calculation
+ * (inverse color transform).
+ */
+
+METHODDEF(void)
+rgb1_gray_convert (j_decompress_ptr cinfo,
+		   JSAMPIMAGE input_buf, JDIMENSION input_row,
+		   JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 y;
+  register INT32 * Rytab = cconvert->R_y_tab;
+  register INT32 * Gytab = cconvert->G_y_tab;
+  register INT32 * Bytab = cconvert->B_y_tab;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr0[col]);
+      g = GETJSAMPLE(inptr1[col]);
+      b = GETJSAMPLE(inptr2[col]);
+      /* Assume that MAXJSAMPLE+1 is a power of 2, so that the MOD
+       * (modulo) operator is equivalent to the bitmask operator AND.
+       */
+      y  = Rytab[(r + g - CENTERJSAMPLE) & MAXJSAMPLE];
+      y += Gytab[g];
+      y += Bytab[(b + g - CENTERJSAMPLE) & MAXJSAMPLE];
+      outptr[col] = (JSAMPLE) (y >> SCALEBITS);
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the output colorspace.
+ * No colorspace change, but conversion from separate-planes
+ * to interleaved representation.
+ */
+
+METHODDEF(void)
+rgb_convert (j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION input_row,
+	     JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      /* We can dispense with GETJSAMPLE() here */
+      outptr[RGB_RED]   = inptr0[col];
+      outptr[RGB_GREEN] = inptr1[col];
+      outptr[RGB_BLUE]  = inptr2[col];
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * Color conversion for no colorspace change: just copy the data,
+ * converting from separate-planes to interleaved representation.
+ * Note: Omit uninteresting components in output buffer.
+ */
+
+METHODDEF(void)
+null_convert (j_decompress_ptr cinfo,
+	      JSAMPIMAGE input_buf, JDIMENSION input_row,
+	      JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW outptr;
+  register JSAMPROW inptr;
+  register JDIMENSION count;
+  register int out_comps = cinfo->out_color_components;
+  JDIMENSION num_cols = cinfo->output_width;
+  JSAMPROW startptr;
+  int ci;
+  jpeg_component_info *compptr;
+
+  while (--num_rows >= 0) {
+    /* It seems fastest to make a separate pass for each component. */
+    startptr = *output_buf++;
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      if (! compptr->component_needed)
+	continue;		/* skip uninteresting component */
+      inptr = input_buf[ci][input_row];
+      outptr = startptr++;
+      for (count = num_cols; count > 0; count--) {
+	*outptr = *inptr++;	/* don't need GETJSAMPLE() here */
+	outptr += out_comps;
+      }
+    }
+    input_row++;
+  }
+}
+
+
+/*
+ * Color conversion for grayscale: just copy the data.
+ * This also works for YCC -> grayscale conversion, in which
+ * we just copy the Y (luminance) component and ignore chrominance.
+ */
+
+METHODDEF(void)
+grayscale_convert (j_decompress_ptr cinfo,
+		   JSAMPIMAGE input_buf, JDIMENSION input_row,
+		   JSAMPARRAY output_buf, int num_rows)
+{
+  jcopy_sample_rows(input_buf[0] + input_row, output_buf,
+		    num_rows, cinfo->output_width);
+}
+
+
+/*
+ * Convert grayscale to RGB: just duplicate the graylevel three times.
+ * This is provided to support applications that don't want to cope
+ * with grayscale as a separate case.
+ */
+
+METHODDEF(void)
+gray_rgb_convert (j_decompress_ptr cinfo,
+		  JSAMPIMAGE input_buf, JDIMENSION input_row,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW outptr;
+  register JSAMPROW inptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr = input_buf[0][input_row++];
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      /* We can dispense with GETJSAMPLE() here */
+      outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the output colorspace.
+ * This version handles Adobe-style YCCK->CMYK conversion,
+ * where we convert YCbCr to R=1-C, G=1-M, and B=1-Y using the
+ * same conversion as above, while passing K (black) unchanged.
+ * We assume build_ycc_rgb_table has been called.
+ */
+
+METHODDEF(void)
+ycck_cmyk_convert (j_decompress_ptr cinfo,
+		   JSAMPIMAGE input_buf, JDIMENSION input_row,
+		   JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2, inptr3;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    inptr3 = input_buf[3][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = GETJSAMPLE(inptr0[col]);
+      cb = GETJSAMPLE(inptr1[col]);
+      cr = GETJSAMPLE(inptr2[col]);
+      /* Range-limiting is essential due to noise introduced by DCT losses,
+       * and for extended gamut encodings (sYCC).
+       */
+      outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];	/* red */
+      outptr[1] = range_limit[MAXJSAMPLE - (y +			/* green */
+			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+						 SCALEBITS)))];
+      outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])];	/* blue */
+      /* K passes through unchanged */
+      outptr[3] = inptr3[col];	/* don't need GETJSAMPLE here */
+      outptr += 4;
+    }
+  }
+}
+
+
+/*
+ * Convert CMYK to YK part of YCCK for colorless output.
+ * We assume build_rgb_y_table has been called.
+ */
+
+METHODDEF(void)
+cmyk_yk_convert (j_decompress_ptr cinfo,
+		 JSAMPIMAGE input_buf, JDIMENSION input_row,
+		 JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register INT32 y;
+  register INT32 * Rytab = cconvert->R_y_tab;
+  register INT32 * Gytab = cconvert->G_y_tab;
+  register INT32 * Bytab = cconvert->B_y_tab;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2, inptr3;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    inptr3 = input_buf[3][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = Rytab[MAXJSAMPLE - GETJSAMPLE(inptr0[col])];
+      y += Gytab[MAXJSAMPLE - GETJSAMPLE(inptr1[col])];
+      y += Bytab[MAXJSAMPLE - GETJSAMPLE(inptr2[col])];
+      outptr[0] = (JSAMPLE) (y >> SCALEBITS);
+      /* K passes through unchanged */
+      outptr[1] = inptr3[col];	/* don't need GETJSAMPLE here */
+      outptr += 2;
+    }
+  }
+}
+
+
+/*
+ * Empty method for start_pass.
+ */
+
+METHODDEF(void)
+start_pass_dcolor (j_decompress_ptr cinfo)
+{
+  /* no work needed */
+}
+
+
+/*
+ * Module initialization routine for output colorspace conversion.
+ */
+
+GLOBAL(void)
+jinit_color_deconverter (j_decompress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert;
+  int ci, i;
+
+  cconvert = (my_cconvert_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_color_deconverter));
+  cinfo->cconvert = &cconvert->pub;
+  cconvert->pub.start_pass = start_pass_dcolor;
+
+  /* Make sure num_components agrees with jpeg_color_space */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->num_components != 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  case JCS_RGB:
+  case JCS_YCbCr:
+  case JCS_BG_RGB:
+  case JCS_BG_YCC:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  case JCS_CMYK:
+  case JCS_YCCK:
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  default:			/* JCS_UNKNOWN can be anything */
+    if (cinfo->num_components < 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+  }
+
+  /* Support color transform only for RGB colorspaces */
+  if (cinfo->color_transform &&
+      cinfo->jpeg_color_space != JCS_RGB &&
+      cinfo->jpeg_color_space != JCS_BG_RGB)
+    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+
+  /* Set out_color_components and conversion method based on requested space.
+   * Also adjust the component_needed flags for any unused components,
+   * so that earlier pipeline stages can avoid useless computation.
+   */
+
+  switch (cinfo->out_color_space) {
+  case JCS_GRAYSCALE:
+    cinfo->out_color_components = 1;
+    switch (cinfo->jpeg_color_space) {
+    case JCS_GRAYSCALE:
+    case JCS_YCbCr:
+    case JCS_BG_YCC:
+      cconvert->pub.color_convert = grayscale_convert;
+      /* For color->grayscale conversion, only the Y (0) component is needed */
+      for (ci = 1; ci < cinfo->num_components; ci++)
+	cinfo->comp_info[ci].component_needed = FALSE;
+      break;
+    case JCS_RGB:
+      switch (cinfo->color_transform) {
+      case JCT_NONE:
+	cconvert->pub.color_convert = rgb_gray_convert;
+	break;
+      case JCT_SUBTRACT_GREEN:
+	cconvert->pub.color_convert = rgb1_gray_convert;
+	break;
+      default:
+	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+      }
+      build_rgb_y_table(cinfo);
+      break;
+    default:
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+
+  case JCS_RGB:
+    cinfo->out_color_components = RGB_PIXELSIZE;
+    switch (cinfo->jpeg_color_space) {
+    case JCS_GRAYSCALE:
+      cconvert->pub.color_convert = gray_rgb_convert;
+      break;
+    case JCS_YCbCr:
+      cconvert->pub.color_convert = ycc_rgb_convert;
+      build_ycc_rgb_table(cinfo);
+      break;
+    case JCS_BG_YCC:
+      cconvert->pub.color_convert = ycc_rgb_convert;
+      build_bg_ycc_rgb_table(cinfo);
+      break;
+    case JCS_RGB:
+      switch (cinfo->color_transform) {
+      case JCT_NONE:
+	cconvert->pub.color_convert = rgb_convert;
+	break;
+      case JCT_SUBTRACT_GREEN:
+	cconvert->pub.color_convert = rgb1_rgb_convert;
+	break;
+      default:
+	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+      }
+      break;
+    default:
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+
+  case JCS_BG_RGB:
+    if (cinfo->jpeg_color_space != JCS_BG_RGB)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    cinfo->out_color_components = RGB_PIXELSIZE;
+    switch (cinfo->color_transform) {
+    case JCT_NONE:
+      cconvert->pub.color_convert = rgb_convert;
+      break;
+    case JCT_SUBTRACT_GREEN:
+      cconvert->pub.color_convert = rgb1_rgb_convert;
+      break;
+    default:
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+
+  case JCS_CMYK:
+    if (cinfo->jpeg_color_space != JCS_YCCK)
+      goto def_label;
+    cinfo->out_color_components = 4;
+    cconvert->pub.color_convert = ycck_cmyk_convert;
+    build_ycc_rgb_table(cinfo);
+    break;
+
+  case JCS_YCCK:
+    if (cinfo->jpeg_color_space != JCS_CMYK ||
+	/* Support only YK part of YCCK for colorless output */
+	! cinfo->comp_info[0].component_needed ||
+	  cinfo->comp_info[1].component_needed ||
+	  cinfo->comp_info[2].component_needed ||
+	! cinfo->comp_info[3].component_needed)
+      goto def_label;
+    cinfo->out_color_components = 2;
+    /* Need all components on input side */
+    cinfo->comp_info[1].component_needed = TRUE;
+    cinfo->comp_info[2].component_needed = TRUE;
+    cconvert->pub.color_convert = cmyk_yk_convert;
+    build_rgb_y_table(cinfo);
+    break;
+
+  default: def_label:	/* permit null conversion to same output space */
+    if (cinfo->out_color_space != cinfo->jpeg_color_space)
+      /* unsupported non-null conversion */
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    i = 0;
+    for (ci = 0; ci < cinfo->num_components; ci++)
+      if (cinfo->comp_info[ci].component_needed)
+	i++;		/* count output color components */
+    cinfo->out_color_components = i;
+    cconvert->pub.color_convert = null_convert;
+  }
+
+  if (cinfo->quantize_colors)
+    cinfo->output_components = 1; /* single colormapped output component */
+  else
+    cinfo->output_components = cinfo->out_color_components;
+}
diff --git a/cmake/externals/libjpeg/jdct.h b/cmake/externals/libjpeg/jdct.h
new file mode 100644
index 000000000..0f251590c
--- /dev/null
+++ b/cmake/externals/libjpeg/jdct.h
@@ -0,0 +1,409 @@
+/*
+ * jdct.h
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2002-2023 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This include file contains common declarations for the forward and
+ * inverse DCT modules.  These declarations are private to the DCT managers
+ * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms.
+ * The individual DCT algorithms are kept in separate files to ease 
+ * machine-dependent tuning (e.g., assembly coding).
+ */
+
+
+/*
+ * A forward DCT routine is given a pointer to an input sample array and
+ * a pointer to a work area of type DCTELEM[]; the DCT is to be performed
+ * in-place in that buffer.  Type DCTELEM is int for 8-bit samples, INT32
+ * for 12-bit samples.  (NOTE: Floating-point DCT implementations use an
+ * array of type FAST_FLOAT, instead.)
+ * The input data is to be fetched from the sample array starting at a
+ * specified column.  (Any row offset needed will be applied to the array
+ * pointer before it is passed to the FDCT code.)
+ * Note that the number of samples fetched by the FDCT routine is
+ * DCT_h_scaled_size * DCT_v_scaled_size.
+ * The DCT outputs are returned scaled up by a factor of 8; they therefore
+ * have a range of +-8K for 8-bit data, +-128K for 12-bit data.  This
+ * convention improves accuracy in integer implementations and saves some
+ * work in floating-point ones.
+ * Quantization of the output coefficients is done by jcdctmgr.c.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef int DCTELEM;		/* 16 or 32 bits is fine */
+#else
+typedef INT32 DCTELEM;		/* must have 32 bits */
+#endif
+
+typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data,
+					       JSAMPARRAY sample_data,
+					       JDIMENSION start_col));
+typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data,
+					     JSAMPARRAY sample_data,
+					     JDIMENSION start_col));
+
+
+/*
+ * An inverse DCT routine is given a pointer to the input JBLOCK and a pointer
+ * to an output sample array.  The routine must dequantize the input data as
+ * well as perform the IDCT; for dequantization, it uses the multiplier table
+ * pointed to by compptr->dct_table.  The output data is to be placed into the
+ * sample array starting at a specified column.  (Any row offset needed will
+ * be applied to the array pointer before it is passed to the IDCT code.)
+ * Note that the number of samples emitted by the IDCT routine is
+ * DCT_h_scaled_size * DCT_v_scaled_size.
+ */
+
+/* typedef inverse_DCT_method_ptr is declared in jpegint.h */
+
+/*
+ * Each IDCT routine has its own ideas about the best dct_table element type.
+ */
+
+typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */
+#if BITS_IN_JSAMPLE == 8
+typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */
+#define IFAST_SCALE_BITS  2	/* fractional bits in scale factors */
+#else
+typedef INT32 IFAST_MULT_TYPE;	/* need 32 bits for scaled quantizers */
+#define IFAST_SCALE_BITS  13	/* fractional bits in scale factors */
+#endif
+typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
+
+
+/*
+ * Each IDCT routine is responsible for range-limiting its results and
+ * converting them to unsigned form (0..MAXJSAMPLE).  The raw outputs could
+ * be quite far out of range if the input data is corrupt, so a bulletproof
+ * range-limiting step is required.  We use a mask-and-table-lookup method
+ * to do the combined operations quickly, assuming that RANGE_CENTER
+ * (defined in jpegint.h) is a power of 2.  See the comments with
+ * prepare_range_limit_table (in jdmaster.c) for more info.
+ */
+
+#define RANGE_MASK  (RANGE_CENTER * 2 - 1)
+#define RANGE_SUBSET  (RANGE_CENTER - CENTERJSAMPLE)
+
+#define IDCT_range_limit(cinfo)  ((cinfo)->sample_range_limit - RANGE_SUBSET)
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_fdct_islow		jFDislow
+#define jpeg_fdct_ifast		jFDifast
+#define jpeg_fdct_float		jFDfloat
+#define jpeg_fdct_7x7		jFD7x7
+#define jpeg_fdct_6x6		jFD6x6
+#define jpeg_fdct_5x5		jFD5x5
+#define jpeg_fdct_4x4		jFD4x4
+#define jpeg_fdct_3x3		jFD3x3
+#define jpeg_fdct_2x2		jFD2x2
+#define jpeg_fdct_1x1		jFD1x1
+#define jpeg_fdct_9x9		jFD9x9
+#define jpeg_fdct_10x10		jFD10x10
+#define jpeg_fdct_11x11		jFD11x11
+#define jpeg_fdct_12x12		jFD12x12
+#define jpeg_fdct_13x13		jFD13x13
+#define jpeg_fdct_14x14		jFD14x14
+#define jpeg_fdct_15x15		jFD15x15
+#define jpeg_fdct_16x16		jFD16x16
+#define jpeg_fdct_16x8		jFD16x8
+#define jpeg_fdct_14x7		jFD14x7
+#define jpeg_fdct_12x6		jFD12x6
+#define jpeg_fdct_10x5		jFD10x5
+#define jpeg_fdct_8x4		jFD8x4
+#define jpeg_fdct_6x3		jFD6x3
+#define jpeg_fdct_4x2		jFD4x2
+#define jpeg_fdct_2x1		jFD2x1
+#define jpeg_fdct_8x16		jFD8x16
+#define jpeg_fdct_7x14		jFD7x14
+#define jpeg_fdct_6x12		jFD6x12
+#define jpeg_fdct_5x10		jFD5x10
+#define jpeg_fdct_4x8		jFD4x8
+#define jpeg_fdct_3x6		jFD3x6
+#define jpeg_fdct_2x4		jFD2x4
+#define jpeg_fdct_1x2		jFD1x2
+#define jpeg_idct_islow		jRDislow
+#define jpeg_idct_ifast		jRDifast
+#define jpeg_idct_float		jRDfloat
+#define jpeg_idct_7x7		jRD7x7
+#define jpeg_idct_6x6		jRD6x6
+#define jpeg_idct_5x5		jRD5x5
+#define jpeg_idct_4x4		jRD4x4
+#define jpeg_idct_3x3		jRD3x3
+#define jpeg_idct_2x2		jRD2x2
+#define jpeg_idct_1x1		jRD1x1
+#define jpeg_idct_9x9		jRD9x9
+#define jpeg_idct_10x10		jRD10x10
+#define jpeg_idct_11x11		jRD11x11
+#define jpeg_idct_12x12		jRD12x12
+#define jpeg_idct_13x13		jRD13x13
+#define jpeg_idct_14x14		jRD14x14
+#define jpeg_idct_15x15		jRD15x15
+#define jpeg_idct_16x16		jRD16x16
+#define jpeg_idct_16x8		jRD16x8
+#define jpeg_idct_14x7		jRD14x7
+#define jpeg_idct_12x6		jRD12x6
+#define jpeg_idct_10x5		jRD10x5
+#define jpeg_idct_8x4		jRD8x4
+#define jpeg_idct_6x3		jRD6x3
+#define jpeg_idct_4x2		jRD4x2
+#define jpeg_idct_2x1		jRD2x1
+#define jpeg_idct_8x16		jRD8x16
+#define jpeg_idct_7x14		jRD7x14
+#define jpeg_idct_6x12		jRD6x12
+#define jpeg_idct_5x10		jRD5x10
+#define jpeg_idct_4x8		jRD4x8
+#define jpeg_idct_3x6		jRD3x6
+#define jpeg_idct_2x4		jRD2x4
+#define jpeg_idct_1x2		jRD1x2
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+/* Extern declarations for the forward and inverse DCT routines. */
+
+EXTERN(void) jpeg_fdct_islow
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_ifast
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_float
+    JPP((FAST_FLOAT * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_7x7
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_6x6
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_5x5
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_4x4
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_3x3
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_2x2
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_1x1
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_9x9
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_10x10
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_11x11
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_12x12
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_13x13
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_14x14
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_15x15
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_16x16
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_16x8
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_14x7
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_12x6
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_10x5
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_8x4
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_6x3
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_4x2
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_2x1
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_8x16
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_7x14
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_6x12
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_5x10
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_4x8
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_3x6
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_2x4
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+EXTERN(void) jpeg_fdct_1x2
+    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
+
+EXTERN(void) jpeg_idct_islow
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_ifast
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_float
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_7x7
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_6x6
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_5x5
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_4x4
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_3x3
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_2x2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_1x1
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_9x9
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_10x10
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_11x11
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_12x12
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_13x13
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_14x14
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_15x15
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_16x16
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_16x8
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_14x7
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_12x6
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_10x5
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_8x4
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_6x3
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_4x2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_2x1
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_8x16
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_7x14
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_6x12
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_5x10
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_4x8
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_3x6
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_2x4
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_1x2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+
+
+/*
+ * Macros for handling fixed-point arithmetic; these are used by many
+ * but not all of the DCT/IDCT modules.
+ *
+ * All values are expected to be of type INT32.
+ * Fractional constants are scaled left by CONST_BITS bits.
+ * CONST_BITS is defined within each module using these macros,
+ * and may differ from one module to the next.
+ */
+
+#define ONE	((INT32) 1)
+#define CONST_SCALE (ONE << CONST_BITS)
+
+/* Convert a positive real constant to an integer scaled by CONST_SCALE.
+ * Caution: some C compilers fail to reduce "FIX(constant)" at compile time,
+ * thus causing a lot of useless floating-point operations at run time.
+ */
+
+#define FIX(x)	((INT32) ((x) * CONST_SCALE + 0.5))
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * This macro is used only when the two inputs will actually be no more than
+ * 16 bits wide, so that a 16x16->32 bit multiply can be used instead of a
+ * full 32x32 multiply.  This provides a useful speedup on many machines.
+ * Unfortunately there is no way to specify a 16x16->32 multiply portably
+ * in C, but some C compilers will do the right thing if you provide the
+ * correct combination of casts.
+ */
+
+#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT16) (const)))
+#endif
+#ifdef SHORTxLCONST_32		/* known to work with Microsoft C 6.0 */
+#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT32) (const)))
+#endif
+
+#ifndef MULTIPLY16C16		/* default definition */
+#define MULTIPLY16C16(var,const)  ((var) * (const))
+#endif
+
+/* Same except both inputs are variables. */
+
+#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#define MULTIPLY16V16(var1,var2)  (((INT16) (var1)) * ((INT16) (var2)))
+#endif
+
+#ifndef MULTIPLY16V16		/* default definition */
+#define MULTIPLY16V16(var1,var2)  ((var1) * (var2))
+#endif
+
+/* Like RIGHT_SHIFT, but applies to a DCTELEM.
+ * We assume that int right shift is unsigned if INT32 right shift is.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define ISHIFT_TEMPS	DCTELEM ishift_temp;
+#if BITS_IN_JSAMPLE == 8
+#define DCTELEMBITS  16		/* DCTELEM may be 16 or 32 bits */
+#else
+#define DCTELEMBITS  32		/* DCTELEM must be 32 bits */
+#endif
+#define IRIGHT_SHIFT(x,shft)  \
+    ((ishift_temp = (x)) < 0 ? \
+     (ishift_temp >> (shft)) | ((~((DCTELEM) 0)) << (DCTELEMBITS-(shft))) : \
+     (ishift_temp >> (shft)))
+#else
+#define ISHIFT_TEMPS
+#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
diff --git a/cmake/externals/libjpeg/jddctmgr.c b/cmake/externals/libjpeg/jddctmgr.c
new file mode 100644
index 000000000..9ecfbb510
--- /dev/null
+++ b/cmake/externals/libjpeg/jddctmgr.c
@@ -0,0 +1,384 @@
+/*
+ * jddctmgr.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2002-2013 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the inverse-DCT management logic.
+ * This code selects a particular IDCT implementation to be used,
+ * and it performs related housekeeping chores.  No code in this file
+ * is executed per IDCT step, only during output pass setup.
+ *
+ * Note that the IDCT routines are responsible for performing coefficient
+ * dequantization as well as the IDCT proper.  This module sets up the
+ * dequantization multiplier table needed by the IDCT routine.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+
+/*
+ * The decompressor input side (jdinput.c) saves away the appropriate
+ * quantization table for each component at the start of the first scan
+ * involving that component.  (This is necessary in order to correctly
+ * decode files that reuse Q-table slots.)
+ * When we are ready to make an output pass, the saved Q-table is converted
+ * to a multiplier table that will actually be used by the IDCT routine.
+ * The multiplier table contents are IDCT-method-dependent.  To support
+ * application changes in IDCT method between scans, we can remake the
+ * multiplier tables if necessary.
+ * In buffered-image mode, the first output pass may occur before any data
+ * has been seen for some components, and thus before their Q-tables have
+ * been saved away.  To handle this case, multiplier tables are preset
+ * to zeroes; the result of the IDCT will be a neutral gray level.
+ */
+
+
+/* Private subobject for this module */
+
+typedef struct {
+  struct jpeg_inverse_dct pub;	/* public fields */
+
+  /* This array contains the IDCT method code that each multiplier table
+   * is currently set up for, or -1 if it's not yet set up.
+   * The actual multiplier tables are pointed to by dct_table in the
+   * per-component comp_info structures.
+   */
+  int cur_method[MAX_COMPONENTS];
+} my_idct_controller;
+
+typedef my_idct_controller * my_idct_ptr;
+
+
+/* Allocated multiplier tables: big enough for any supported variant */
+
+typedef union {
+  ISLOW_MULT_TYPE islow_array[DCTSIZE2];
+#ifdef DCT_IFAST_SUPPORTED
+  IFAST_MULT_TYPE ifast_array[DCTSIZE2];
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  FLOAT_MULT_TYPE float_array[DCTSIZE2];
+#endif
+} multiplier_table;
+
+
+/* The current scaled-IDCT routines require ISLOW-style multiplier tables,
+ * so be sure to compile that code if either ISLOW or SCALING is requested.
+ */
+#ifdef DCT_ISLOW_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#else
+#ifdef IDCT_SCALING_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#endif
+#endif
+
+
+/*
+ * Prepare for an output pass.
+ * Here we select the proper IDCT routine for each component and build
+ * a matching multiplier table.
+ */
+
+METHODDEF(void)
+start_pass (j_decompress_ptr cinfo)
+{
+  my_idct_ptr idct = (my_idct_ptr) cinfo->idct;
+  int ci, i;
+  jpeg_component_info *compptr;
+  int method = 0;
+  inverse_DCT_method_ptr method_ptr = NULL;
+  JQUANT_TBL * qtbl;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Select the proper IDCT routine for this component's scaling */
+    switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
+#ifdef IDCT_SCALING_SUPPORTED
+    case ((1 << 8) + 1):
+      method_ptr = jpeg_idct_1x1;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((2 << 8) + 2):
+      method_ptr = jpeg_idct_2x2;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((3 << 8) + 3):
+      method_ptr = jpeg_idct_3x3;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((4 << 8) + 4):
+      method_ptr = jpeg_idct_4x4;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((5 << 8) + 5):
+      method_ptr = jpeg_idct_5x5;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((6 << 8) + 6):
+      method_ptr = jpeg_idct_6x6;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((7 << 8) + 7):
+      method_ptr = jpeg_idct_7x7;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((9 << 8) + 9):
+      method_ptr = jpeg_idct_9x9;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((10 << 8) + 10):
+      method_ptr = jpeg_idct_10x10;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((11 << 8) + 11):
+      method_ptr = jpeg_idct_11x11;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((12 << 8) + 12):
+      method_ptr = jpeg_idct_12x12;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((13 << 8) + 13):
+      method_ptr = jpeg_idct_13x13;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((14 << 8) + 14):
+      method_ptr = jpeg_idct_14x14;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((15 << 8) + 15):
+      method_ptr = jpeg_idct_15x15;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((16 << 8) + 16):
+      method_ptr = jpeg_idct_16x16;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((16 << 8) + 8):
+      method_ptr = jpeg_idct_16x8;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((14 << 8) + 7):
+      method_ptr = jpeg_idct_14x7;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((12 << 8) + 6):
+      method_ptr = jpeg_idct_12x6;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((10 << 8) + 5):
+      method_ptr = jpeg_idct_10x5;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((8 << 8) + 4):
+      method_ptr = jpeg_idct_8x4;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((6 << 8) + 3):
+      method_ptr = jpeg_idct_6x3;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((4 << 8) + 2):
+      method_ptr = jpeg_idct_4x2;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((2 << 8) + 1):
+      method_ptr = jpeg_idct_2x1;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((8 << 8) + 16):
+      method_ptr = jpeg_idct_8x16;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((7 << 8) + 14):
+      method_ptr = jpeg_idct_7x14;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((6 << 8) + 12):
+      method_ptr = jpeg_idct_6x12;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((5 << 8) + 10):
+      method_ptr = jpeg_idct_5x10;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((4 << 8) + 8):
+      method_ptr = jpeg_idct_4x8;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((3 << 8) + 6):
+      method_ptr = jpeg_idct_3x6;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((2 << 8) + 4):
+      method_ptr = jpeg_idct_2x4;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+    case ((1 << 8) + 2):
+      method_ptr = jpeg_idct_1x2;
+      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
+      break;
+#endif
+    case ((DCTSIZE << 8) + DCTSIZE):
+      switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+      case JDCT_ISLOW:
+	method_ptr = jpeg_idct_islow;
+	method = JDCT_ISLOW;
+	break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+      case JDCT_IFAST:
+	method_ptr = jpeg_idct_ifast;
+	method = JDCT_IFAST;
+	break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+      case JDCT_FLOAT:
+	method_ptr = jpeg_idct_float;
+	method = JDCT_FLOAT;
+	break;
+#endif
+      default:
+	ERREXIT(cinfo, JERR_NOT_COMPILED);
+	break;
+      }
+      break;
+    default:
+      ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
+	       compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
+      break;
+    }
+    idct->pub.inverse_DCT[ci] = method_ptr;
+    /* Create multiplier table from quant table.
+     * However, we can skip this if the component is uninteresting
+     * or if we already built the table.  Also, if no quant table
+     * has yet been saved for the component, we leave the
+     * multiplier table all-zero; we'll be reading zeroes from the
+     * coefficient controller's buffer anyway.
+     */
+    if (! compptr->component_needed || idct->cur_method[ci] == method)
+      continue;
+    qtbl = compptr->quant_table;
+    if (qtbl == NULL)		/* happens if no data yet for component */
+      continue;
+    idct->cur_method[ci] = method;
+    switch (method) {
+#ifdef PROVIDE_ISLOW_TABLES
+    case JDCT_ISLOW:
+      {
+	/* For LL&M IDCT method, multipliers are equal to raw quantization
+	 * coefficients, but are stored as ints to ensure access efficiency.
+	 */
+	ISLOW_MULT_TYPE * ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table;
+	for (i = 0; i < DCTSIZE2; i++) {
+	  ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i];
+	}
+      }
+      break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+    case JDCT_IFAST:
+      {
+	/* For AA&N IDCT method, multipliers are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * For integer operation, the multiplier table is to be scaled by
+	 * IFAST_SCALE_BITS.
+	 */
+	IFAST_MULT_TYPE * ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table;
+#define CONST_BITS 14
+	static const INT16 aanscales[DCTSIZE2] = {
+	  /* precomputed values scaled up by 14 bits */
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+	};
+	SHIFT_TEMPS
+
+	for (i = 0; i < DCTSIZE2; i++) {
+	  ifmtbl[i] = (IFAST_MULT_TYPE)
+	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
+				  (INT32) aanscales[i]),
+		    CONST_BITS-IFAST_SCALE_BITS);
+	}
+      }
+      break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+    case JDCT_FLOAT:
+      {
+	/* For float AA&N IDCT method, multipliers are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * We apply a further scale factor of 1/8.
+	 */
+	FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table;
+	int row, col;
+	static const double aanscalefactor[DCTSIZE] = {
+	  1.0, 1.387039845, 1.306562965, 1.175875602,
+	  1.0, 0.785694958, 0.541196100, 0.275899379
+	};
+
+	i = 0;
+	for (row = 0; row < DCTSIZE; row++) {
+	  for (col = 0; col < DCTSIZE; col++) {
+	    fmtbl[i] = (FLOAT_MULT_TYPE)
+	      ((double) qtbl->quantval[i] *
+	       aanscalefactor[row] * aanscalefactor[col] * 0.125);
+	    i++;
+	  }
+	}
+      }
+      break;
+#endif
+    default:
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+      break;
+    }
+  }
+}
+
+
+/*
+ * Initialize IDCT manager.
+ */
+
+GLOBAL(void)
+jinit_inverse_dct (j_decompress_ptr cinfo)
+{
+  my_idct_ptr idct;
+  int ci;
+  jpeg_component_info *compptr;
+
+  idct = (my_idct_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_idct_controller));
+  cinfo->idct = &idct->pub;
+  idct->pub.start_pass = start_pass;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Allocate and pre-zero a multiplier table for each component */
+    compptr->dct_table =
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(multiplier_table));
+    MEMZERO(compptr->dct_table, SIZEOF(multiplier_table));
+    /* Mark multiplier table not yet set up for any method */
+    idct->cur_method[ci] = -1;
+  }
+}
diff --git a/cmake/externals/libjpeg/jdhuff.c b/cmake/externals/libjpeg/jdhuff.c
new file mode 100644
index 000000000..f175f0c32
--- /dev/null
+++ b/cmake/externals/libjpeg/jdhuff.c
@@ -0,0 +1,1559 @@
+/*
+ * jdhuff.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2006-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains Huffman entropy decoding routines.
+ * Both sequential and progressive modes are supported in this single module.
+ *
+ * Much of the complexity here has to do with supporting input suspension.
+ * If the data source module demands suspension, we want to be able to back
+ * up to the start of the current MCU.  To do this, we copy state variables
+ * into local working storage, and update them back to the permanent
+ * storage only upon successful completion of an MCU.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Derived data constructed for each Huffman table */
+
+#define HUFF_LOOKAHEAD	8	/* # of bits of lookahead */
+
+typedef struct {
+  /* Basic tables: (element [0] of each array is unused) */
+  INT32 maxcode[18];		/* largest code of length k (-1 if none) */
+  /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */
+  INT32 valoffset[17];		/* huffval[] offset for codes of length k */
+  /* valoffset[k] = huffval[] index of 1st symbol of code length k, less
+   * the smallest code of length k; so given a code of length k, the
+   * corresponding symbol is huffval[code + valoffset[k]]
+   */
+
+  /* Link to public Huffman table (needed only in jpeg_huff_decode) */
+  JHUFF_TBL *pub;
+
+  /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of
+   * the input data stream.  If the next Huffman code is no more
+   * than HUFF_LOOKAHEAD bits long, we can obtain its length and
+   * the corresponding symbol directly from these tables.
+   */
+  int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */
+  UINT8 look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */
+} d_derived_tbl;
+
+
+/*
+ * Fetching the next N bits from the input stream is a time-critical operation
+ * for the Huffman decoders.  We implement it with a combination of inline
+ * macros and out-of-line subroutines.  Note that N (the number of bits
+ * demanded at one time) never exceeds 15 for JPEG use.
+ *
+ * We read source bytes into get_buffer and dole out bits as needed.
+ * If get_buffer already contains enough bits, they are fetched in-line
+ * by the macros CHECK_BIT_BUFFER and GET_BITS.  When there aren't enough
+ * bits, jpeg_fill_bit_buffer is called; it will attempt to fill get_buffer
+ * as full as possible (not just to the number of bits needed; this
+ * prefetching reduces the overhead cost of calling jpeg_fill_bit_buffer).
+ * Note that jpeg_fill_bit_buffer may return FALSE to indicate suspension.
+ * On TRUE return, jpeg_fill_bit_buffer guarantees that get_buffer contains
+ * at least the requested number of bits --- dummy zeroes are inserted if
+ * necessary.
+ */
+
+typedef INT32 bit_buf_type;	/* type of bit-extraction buffer */
+#define BIT_BUF_SIZE  32	/* size of buffer in bits */
+
+/* If long is > 32 bits on your machine, and shifting/masking longs is
+ * reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE
+ * appropriately should be a win.  Unfortunately we can't define the size
+ * with something like  #define BIT_BUF_SIZE (sizeof(bit_buf_type)*8)
+ * because not all machines measure sizeof in 8-bit bytes.
+ */
+
+typedef struct {		/* Bitreading state saved across MCUs */
+  bit_buf_type get_buffer;	/* current bit-extraction buffer */
+  int bits_left;		/* # of unused bits in it */
+} bitread_perm_state;
+
+typedef struct {		/* Bitreading working state within an MCU */
+  /* Current data source location */
+  /* We need a copy, rather than munging the original, in case of suspension */
+  const JOCTET * next_input_byte; /* => next byte to read from source */
+  size_t bytes_in_buffer;	/* # of bytes remaining in source buffer */
+  /* Bit input buffer --- note these values are kept in register variables,
+   * not in this struct, inside the inner loops.
+   */
+  bit_buf_type get_buffer;	/* current bit-extraction buffer */
+  int bits_left;		/* # of unused bits in it */
+  /* Pointer needed by jpeg_fill_bit_buffer. */
+  j_decompress_ptr cinfo;	/* back link to decompress master record */
+} bitread_working_state;
+
+/* Macros to declare and load/save bitread local variables. */
+#define BITREAD_STATE_VARS  \
+	register bit_buf_type get_buffer;  \
+	register int bits_left;  \
+	bitread_working_state br_state
+
+#define BITREAD_LOAD_STATE(cinfop,permstate)  \
+	br_state.cinfo = cinfop; \
+	br_state.next_input_byte = cinfop->src->next_input_byte; \
+	br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \
+	get_buffer = permstate.get_buffer; \
+	bits_left = permstate.bits_left;
+
+#define BITREAD_SAVE_STATE(cinfop,permstate)  \
+	cinfop->src->next_input_byte = br_state.next_input_byte; \
+	cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \
+	permstate.get_buffer = get_buffer; \
+	permstate.bits_left = bits_left
+
+/*
+ * These macros provide the in-line portion of bit fetching.
+ * Use CHECK_BIT_BUFFER to ensure there are N bits in get_buffer
+ * before using GET_BITS, PEEK_BITS, or DROP_BITS.
+ * The variables get_buffer and bits_left are assumed to be locals,
+ * but the state struct might not be (jpeg_huff_decode needs this).
+ *	CHECK_BIT_BUFFER(state,n,action);
+ *		Ensure there are N bits in get_buffer; if suspend, take action.
+ *      val = GET_BITS(n);
+ *		Fetch next N bits.
+ *      val = PEEK_BITS(n);
+ *		Fetch next N bits without removing them from the buffer.
+ *	DROP_BITS(n);
+ *		Discard next N bits.
+ * The value N should be a simple variable, not an expression, because it
+ * is evaluated multiple times.
+ */
+
+#define CHECK_BIT_BUFFER(state,nbits,action) \
+	{ if (bits_left < (nbits)) {  \
+	    if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits))  \
+	      { action; }  \
+	    get_buffer = (state).get_buffer; bits_left = (state).bits_left; } }
+
+#define GET_BITS(nbits) \
+	(((int) (get_buffer >> (bits_left -= (nbits)))) & BIT_MASK(nbits))
+
+#define PEEK_BITS(nbits) \
+	(((int) (get_buffer >> (bits_left -  (nbits)))) & BIT_MASK(nbits))
+
+#define DROP_BITS(nbits) \
+	(bits_left -= (nbits))
+
+
+/*
+ * Code for extracting next Huffman-coded symbol from input bit stream.
+ * Again, this is time-critical and we make the main paths be macros.
+ *
+ * We use a lookahead table to process codes of up to HUFF_LOOKAHEAD bits
+ * without looping.  Usually, more than 95% of the Huffman codes will be 8
+ * or fewer bits long.  The few overlength codes are handled with a loop,
+ * which need not be inline code.
+ *
+ * Notes about the HUFF_DECODE macro:
+ * 1. Near the end of the data segment, we may fail to get enough bits
+ *    for a lookahead.  In that case, we do it the hard way.
+ * 2. If the lookahead table contains no entry, the next code must be
+ *    more than HUFF_LOOKAHEAD bits long.
+ * 3. jpeg_huff_decode returns -1 if forced to suspend.
+ */
+
+#define HUFF_DECODE(result,state,htbl,failaction,slowlabel) \
+{ register int nb, look; \
+  if (bits_left < HUFF_LOOKAHEAD) { \
+    if (! jpeg_fill_bit_buffer(&state,get_buffer,bits_left, 0)) {failaction;} \
+    get_buffer = state.get_buffer; bits_left = state.bits_left; \
+    if (bits_left < HUFF_LOOKAHEAD) { \
+      nb = 1; goto slowlabel; \
+    } \
+  } \
+  look = PEEK_BITS(HUFF_LOOKAHEAD); \
+  if ((nb = htbl->look_nbits[look]) != 0) { \
+    DROP_BITS(nb); \
+    result = htbl->look_sym[look]; \
+  } else { \
+    nb = HUFF_LOOKAHEAD+1; \
+slowlabel: \
+    if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \
+	{ failaction; } \
+    get_buffer = state.get_buffer; bits_left = state.bits_left; \
+  } \
+}
+
+
+/*
+ * Expanded entropy decoder object for Huffman decoding.
+ *
+ * The savable_state subrecord contains fields that change within an MCU,
+ * but must not be updated permanently until we complete the MCU.
+ */
+
+typedef struct {
+  unsigned int EOBRUN;			/* remaining EOBs in EOBRUN */
+  int last_dc_val[MAX_COMPS_IN_SCAN];	/* last DC coef for each component */
+} savable_state;
+
+/* This macro is to work around compilers with missing or broken
+ * structure assignment.  You'll need to fix this code if you have
+ * such a compiler and you change MAX_COMPS_IN_SCAN.
+ */
+
+#ifndef NO_STRUCT_ASSIGN
+#define ASSIGN_STATE(dest,src)  ((dest) = (src))
+#else
+#if MAX_COMPS_IN_SCAN == 4
+#define ASSIGN_STATE(dest,src)  \
+	((dest).EOBRUN = (src).EOBRUN, \
+	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
+	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
+	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
+	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+#endif
+#endif
+
+
+typedef struct {
+  struct jpeg_entropy_decoder pub; /* public fields */
+
+  /* These fields are loaded into local variables at start of each MCU.
+   * In case of suspension, we exit WITHOUT updating them.
+   */
+  bitread_perm_state bitstate;	/* Bit buffer at start of MCU */
+  savable_state saved;		/* Other state at start of MCU */
+
+  /* These fields are NOT loaded into local working state. */
+  boolean insufficient_data;	/* set TRUE after emitting warning */
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+
+  /* Following two fields used only in progressive mode */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  d_derived_tbl * derived_tbls[NUM_HUFF_TBLS];
+
+  d_derived_tbl * ac_derived_tbl; /* active table during an AC scan */
+
+  /* Following fields used only in sequential mode */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  d_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
+  d_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
+
+  /* Precalculated info set up by start_pass for use in decode_mcu: */
+
+  /* Pointers to derived tables to be used for each block within an MCU */
+  d_derived_tbl * dc_cur_tbls[D_MAX_BLOCKS_IN_MCU];
+  d_derived_tbl * ac_cur_tbls[D_MAX_BLOCKS_IN_MCU];
+  /* Whether we care about the DC and AC coefficient values for each block */
+  int coef_limit[D_MAX_BLOCKS_IN_MCU];
+} huff_entropy_decoder;
+
+typedef huff_entropy_decoder * huff_entropy_ptr;
+
+
+static const int jpeg_zigzag_order[8][8] = {
+  {  0,  1,  5,  6, 14, 15, 27, 28 },
+  {  2,  4,  7, 13, 16, 26, 29, 42 },
+  {  3,  8, 12, 17, 25, 30, 41, 43 },
+  {  9, 11, 18, 24, 31, 40, 44, 53 },
+  { 10, 19, 23, 32, 39, 45, 52, 54 },
+  { 20, 22, 33, 38, 46, 51, 55, 60 },
+  { 21, 34, 37, 47, 50, 56, 59, 61 },
+  { 35, 36, 48, 49, 57, 58, 62, 63 }
+};
+
+static const int jpeg_zigzag_order7[7][7] = {
+  {  0,  1,  5,  6, 14, 15, 27 },
+  {  2,  4,  7, 13, 16, 26, 28 },
+  {  3,  8, 12, 17, 25, 29, 38 },
+  {  9, 11, 18, 24, 30, 37, 39 },
+  { 10, 19, 23, 31, 36, 40, 45 },
+  { 20, 22, 32, 35, 41, 44, 46 },
+  { 21, 33, 34, 42, 43, 47, 48 }
+};
+
+static const int jpeg_zigzag_order6[6][6] = {
+  {  0,  1,  5,  6, 14, 15 },
+  {  2,  4,  7, 13, 16, 25 },
+  {  3,  8, 12, 17, 24, 26 },
+  {  9, 11, 18, 23, 27, 32 },
+  { 10, 19, 22, 28, 31, 33 },
+  { 20, 21, 29, 30, 34, 35 }
+};
+
+static const int jpeg_zigzag_order5[5][5] = {
+  {  0,  1,  5,  6, 14 },
+  {  2,  4,  7, 13, 15 },
+  {  3,  8, 12, 16, 21 },
+  {  9, 11, 17, 20, 22 },
+  { 10, 18, 19, 23, 24 }
+};
+
+static const int jpeg_zigzag_order4[4][4] = {
+  { 0,  1,  5,  6 },
+  { 2,  4,  7, 12 },
+  { 3,  8, 11, 13 },
+  { 9, 10, 14, 15 }
+};
+
+static const int jpeg_zigzag_order3[3][3] = {
+  { 0, 1, 5 },
+  { 2, 4, 6 },
+  { 3, 7, 8 }
+};
+
+static const int jpeg_zigzag_order2[2][2] = {
+  { 0, 1 },
+  { 2, 3 }
+};
+
+
+/*
+ * Compute the derived values for a Huffman table.
+ * This routine also performs some validation checks on the table.
+ */
+
+LOCAL(void)
+jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
+			 d_derived_tbl ** pdtbl)
+{
+  JHUFF_TBL *htbl;
+  d_derived_tbl *dtbl;
+  int p, i, l, si, numsymbols;
+  int lookbits, ctr;
+  char huffsize[257];
+  unsigned int huffcode[257];
+  unsigned int code;
+
+  /* Note that huffsize[] and huffcode[] are filled in code-length order,
+   * paralleling the order of the symbols themselves in htbl->huffval[].
+   */
+
+  /* Find the input Huffman table */
+  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+  htbl =
+    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
+  if (htbl == NULL)
+    htbl = jpeg_std_huff_table((j_common_ptr) cinfo, isDC, tblno);
+
+  /* Allocate a workspace if we haven't already done so. */
+  if (*pdtbl == NULL)
+    *pdtbl = (d_derived_tbl *) (*cinfo->mem->alloc_small)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(d_derived_tbl));
+  dtbl = *pdtbl;
+  dtbl->pub = htbl;		/* fill in back link */
+  
+  /* Figure C.1: make table of Huffman code length for each symbol */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    i = (int) htbl->bits[l];
+    if (i < 0 || p + i > 256)	/* protect against table overrun */
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    while (i--)
+      huffsize[p++] = (char) l;
+  }
+  huffsize[p] = 0;
+  numsymbols = p;
+  
+  /* Figure C.2: generate the codes themselves */
+  /* We also validate that the counts represent a legal Huffman code tree. */
+  
+  code = 0;
+  si = huffsize[0];
+  p = 0;
+  while (huffsize[p]) {
+    while (((int) huffsize[p]) == si) {
+      huffcode[p++] = code;
+      code++;
+    }
+    /* code is now 1 more than the last code used for codelength si; but
+     * it must still fit in si bits, since no code is allowed to be all ones.
+     */
+    if (((INT32) code) >= (((INT32) 1) << si))
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    code <<= 1;
+    si++;
+  }
+
+  /* Figure F.15: generate decoding tables for bit-sequential decoding */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    if (htbl->bits[l]) {
+      /* valoffset[l] = huffval[] index of 1st symbol of code length l,
+       * minus the minimum code of length l
+       */
+      dtbl->valoffset[l] = (INT32) p - (INT32) huffcode[p];
+      p += htbl->bits[l];
+      dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
+    } else {
+      dtbl->maxcode[l] = -1;	/* -1 if no codes of this length */
+    }
+  }
+  dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */
+
+  /* Compute lookahead tables to speed up decoding.
+   * First we set all the table entries to 0, indicating "too long";
+   * then we iterate through the Huffman codes that are short enough and
+   * fill in all the entries that correspond to bit sequences starting
+   * with that code.
+   */
+
+  MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits));
+
+  p = 0;
+  for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
+    for (i = 1; i <= (int) htbl->bits[l]; i++, p++) {
+      /* l = current code's length, p = its index in huffcode[] & huffval[]. */
+      /* Generate left-justified code followed by all possible bit sequences */
+      lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
+      for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
+	dtbl->look_nbits[lookbits] = l;
+	dtbl->look_sym[lookbits] = htbl->huffval[p];
+	lookbits++;
+      }
+    }
+  }
+
+  /* Validate symbols as being reasonable.
+   * For AC tables, we make no check, but accept all byte values 0..255.
+   * For DC tables, we require the symbols to be in range 0..15.
+   * (Tighter bounds could be applied depending on the data depth and mode,
+   * but this is sufficient to ensure safe decoding.)
+   */
+  if (isDC) {
+    for (i = 0; i < numsymbols; i++) {
+      int sym = htbl->huffval[i];
+      if (sym < 0 || sym > 15)
+	ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    }
+  }
+}
+
+
+/*
+ * Out-of-line code for bit fetching.
+ * Note: current values of get_buffer and bits_left are passed as parameters,
+ * but are returned in the corresponding fields of the state struct.
+ *
+ * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width
+ * of get_buffer to be used.  (On machines with wider words, an even larger
+ * buffer could be used.)  However, on some machines 32-bit shifts are
+ * quite slow and take time proportional to the number of places shifted.
+ * (This is true with most PC compilers, for instance.)  In this case it may
+ * be a win to set MIN_GET_BITS to the minimum value of 15.  This reduces the
+ * average shift distance at the cost of more calls to jpeg_fill_bit_buffer.
+ */
+
+#ifdef SLOW_SHIFT_32
+#define MIN_GET_BITS  15	/* minimum allowable value */
+#else
+#define MIN_GET_BITS  (BIT_BUF_SIZE-7)
+#endif
+
+
+LOCAL(boolean)
+jpeg_fill_bit_buffer (bitread_working_state * state,
+		      register bit_buf_type get_buffer, register int bits_left,
+		      int nbits)
+/* Load up the bit buffer to a depth of at least nbits */
+{
+  /* Copy heavily used state fields into locals (hopefully registers) */
+  register const JOCTET * next_input_byte = state->next_input_byte;
+  register size_t bytes_in_buffer = state->bytes_in_buffer;
+  j_decompress_ptr cinfo = state->cinfo;
+
+  /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */
+  /* (It is assumed that no request will be for more than that many bits.) */
+  /* We fail to do so only if we hit a marker or are forced to suspend. */
+
+  if (cinfo->unread_marker == 0) {	/* cannot advance past a marker */
+    while (bits_left < MIN_GET_BITS) {
+      register int c;
+
+      /* Attempt to read a byte */
+      if (bytes_in_buffer == 0) {
+	if (! (*cinfo->src->fill_input_buffer) (cinfo))
+	  return FALSE;
+	next_input_byte = cinfo->src->next_input_byte;
+	bytes_in_buffer = cinfo->src->bytes_in_buffer;
+      }
+      bytes_in_buffer--;
+      c = GETJOCTET(*next_input_byte++);
+
+      /* If it's 0xFF, check and discard stuffed zero byte */
+      if (c == 0xFF) {
+	/* Loop here to discard any padding FF's on terminating marker,
+	 * so that we can save a valid unread_marker value.  NOTE: we will
+	 * accept multiple FF's followed by a 0 as meaning a single FF data
+	 * byte.  This data pattern is not valid according to the standard.
+	 */
+	do {
+	  if (bytes_in_buffer == 0) {
+	    if (! (*cinfo->src->fill_input_buffer) (cinfo))
+	      return FALSE;
+	    next_input_byte = cinfo->src->next_input_byte;
+	    bytes_in_buffer = cinfo->src->bytes_in_buffer;
+	  }
+	  bytes_in_buffer--;
+	  c = GETJOCTET(*next_input_byte++);
+	} while (c == 0xFF);
+
+	if (c == 0) {
+	  /* Found FF/00, which represents an FF data byte */
+	  c = 0xFF;
+	} else {
+	  /* Oops, it's actually a marker indicating end of compressed data.
+	   * Save the marker code for later use.
+	   * Fine point: it might appear that we should save the marker into
+	   * bitread working state, not straight into permanent state.  But
+	   * once we have hit a marker, we cannot need to suspend within the
+	   * current MCU, because we will read no more bytes from the data
+	   * source.  So it is OK to update permanent state right away.
+	   */
+	  cinfo->unread_marker = c;
+	  /* See if we need to insert some fake zero bits. */
+	  goto no_more_bytes;
+	}
+      }
+
+      /* OK, load c into get_buffer */
+      get_buffer = (get_buffer << 8) | c;
+      bits_left += 8;
+    } /* end while */
+  } else {
+  no_more_bytes:
+    /* We get here if we've read the marker that terminates the compressed
+     * data segment.  There should be enough bits in the buffer register
+     * to satisfy the request; if so, no problem.
+     */
+    if (nbits > bits_left) {
+      /* Uh-oh.  Report corrupted data to user and stuff zeroes into
+       * the data stream, so that we can produce some kind of image.
+       * We use a nonvolatile flag to ensure that only one warning message
+       * appears per data segment.
+       */
+      if (! ((huff_entropy_ptr) cinfo->entropy)->insufficient_data) {
+	WARNMS(cinfo, JWRN_HIT_MARKER);
+	((huff_entropy_ptr) cinfo->entropy)->insufficient_data = TRUE;
+      }
+      /* Fill the buffer with zero bits */
+      get_buffer <<= MIN_GET_BITS - bits_left;
+      bits_left = MIN_GET_BITS;
+    }
+  }
+
+  /* Unload the local registers */
+  state->next_input_byte = next_input_byte;
+  state->bytes_in_buffer = bytes_in_buffer;
+  state->get_buffer = get_buffer;
+  state->bits_left = bits_left;
+
+  return TRUE;
+}
+
+
+/*
+ * Figure F.12: extend sign bit.
+ * On some machines, a shift and sub will be faster than a table lookup.
+ */
+
+#ifdef AVOID_TABLES
+
+#define BIT_MASK(nbits)   ((1<<(nbits))-1)
+#define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) - ((1<<(s))-1) : (x))
+
+#else
+
+#define BIT_MASK(nbits)   bmask[nbits]
+#define HUFF_EXTEND(x,s)  ((x) <= bmask[(s) - 1] ? (x) - bmask[s] : (x))
+
+static const int bmask[16] =	/* bmask[n] is mask for n rightmost bits */
+  { 0, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF,
+    0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF };
+
+#endif /* AVOID_TABLES */
+
+
+/*
+ * Out-of-line code for Huffman code decoding.
+ */
+
+LOCAL(int)
+jpeg_huff_decode (bitread_working_state * state,
+		  register bit_buf_type get_buffer, register int bits_left,
+		  d_derived_tbl * htbl, int min_bits)
+{
+  register int l = min_bits;
+  register INT32 code;
+
+  /* HUFF_DECODE has determined that the code is at least min_bits */
+  /* bits long, so fetch that many bits in one swoop. */
+
+  CHECK_BIT_BUFFER(*state, l, return -1);
+  code = GET_BITS(l);
+
+  /* Collect the rest of the Huffman code one bit at a time. */
+  /* This is per Figure F.16 in the JPEG spec. */
+
+  while (code > htbl->maxcode[l]) {
+    code <<= 1;
+    CHECK_BIT_BUFFER(*state, 1, return -1);
+    code |= GET_BITS(1);
+    l++;
+  }
+
+  /* Unload the local registers */
+  state->get_buffer = get_buffer;
+  state->bits_left = bits_left;
+
+  /* With garbage input we may reach the sentinel value l = 17. */
+
+  if (l > 16) {
+    WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE);
+    return 0;			/* fake a zero as the safest result */
+  }
+
+  return htbl->pub->huffval[ (int) (code + htbl->valoffset[l]) ];
+}
+
+
+/*
+ * Finish up at the end of a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass_huff (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+
+  /* Throw away any unused bits remaining in bit buffer; */
+  /* include any full bytes in next_marker's count of discarded bytes */
+  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
+  entropy->bitstate.bits_left = 0;
+}
+
+
+/*
+ * Check for a restart marker & resynchronize decoder.
+ * Returns FALSE if must suspend.
+ */
+
+LOCAL(boolean)
+process_restart (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci;
+
+  finish_pass_huff(cinfo);
+
+  /* Advance past the RSTn marker */
+  if (! (*cinfo->marker->read_restart_marker) (cinfo))
+    return FALSE;
+
+  /* Re-initialize DC predictions to 0 */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+    entropy->saved.last_dc_val[ci] = 0;
+  /* Re-init EOB run count, too */
+  entropy->saved.EOBRUN = 0;
+
+  /* Reset restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+
+  /* Reset out-of-data flag, unless read_restart_marker left us smack up
+   * against a marker.  In that case we will end up treating the next data
+   * segment as empty, and we can avoid producing bogus output pixels by
+   * leaving the flag set.
+   */
+  if (cinfo->unread_marker == 0)
+    entropy->insufficient_data = FALSE;
+
+  return TRUE;
+}
+
+
+/*
+ * Huffman MCU decoding.
+ * Each of these routines decodes and returns one MCU's worth of
+ * Huffman-compressed coefficients. 
+ * The coefficients are reordered from zigzag order into natural array order,
+ * but are not dequantized.
+ *
+ * The i'th block of the MCU is stored into the block pointed to by
+ * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
+ * (Wholesale zeroing is usually a little faster than retail...)
+ *
+ * We return FALSE if data source requested suspension.  In that case no
+ * changes have been made to permanent state.  (Exception: some output
+ * coefficients may already have been assigned.  This is harmless for
+ * spectral selection, since we'll just re-assign them on the next call.
+ * Successive approximation AC refinement has to be more careful, however.)
+ */
+
+/*
+ * MCU decoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int Al = cinfo->Al;
+  register int s, r;
+  int blkn, ci;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+  savable_state state;
+  d_derived_tbl * tbl;
+  jpeg_component_info * compptr;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
+    ASSIGN_STATE(state, entropy->saved);
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      block = MCU_data[blkn];
+      ci = cinfo->MCU_membership[blkn];
+      compptr = cinfo->cur_comp_info[ci];
+      tbl = entropy->derived_tbls[compptr->dc_tbl_no];
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      HUFF_DECODE(s, br_state, tbl, return FALSE, label1);
+      if (s) {
+	CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	r = GET_BITS(s);
+	s = HUFF_EXTEND(r, s);
+      }
+
+      /* Convert DC difference to actual value, update last_dc_val */
+      s += state.last_dc_val[ci];
+      state.last_dc_val[ci] = s;
+      /* Scale and output the coefficient (assumes jpeg_natural_order[0]=0) */
+      (*block)[0] = (JCOEF) (s << Al);
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
+    ASSIGN_STATE(entropy->saved, state);
+  }
+
+  /* Account for restart interval if using restarts */
+  if (cinfo->restart_interval)
+    entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  register int s, k, r;
+  unsigned int EOBRUN;
+  int Se, Al;
+  const int * natural_order;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+  d_derived_tbl * tbl;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->insufficient_data) {
+
+    /* Load up working state.
+     * We can avoid loading/saving bitread state if in an EOB run.
+     */
+    EOBRUN = entropy->saved.EOBRUN;	/* only part of saved state we need */
+
+    /* There is always only one block per MCU */
+
+    if (EOBRUN)			/* if it's a band of zeroes... */
+      EOBRUN--;			/* ...process it now (we do nothing) */
+    else {
+      BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
+      Se = cinfo->Se;
+      Al = cinfo->Al;
+      natural_order = cinfo->natural_order;
+      block = MCU_data[0];
+      tbl = entropy->ac_derived_tbl;
+
+      for (k = cinfo->Ss; k <= Se; k++) {
+	HUFF_DECODE(s, br_state, tbl, return FALSE, label2);
+	r = s >> 4;
+	s &= 15;
+	if (s) {
+	  k += r;
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  r = GET_BITS(s);
+	  s = HUFF_EXTEND(r, s);
+	  /* Scale and output coefficient in natural (dezigzagged) order */
+	  (*block)[natural_order[k]] = (JCOEF) (s << Al);
+	} else {
+	  if (r != 15) {	/* EOBr, run length is 2^r + appended bits */
+	    if (r) {		/* EOBr, r > 0 */
+	      EOBRUN = 1 << r;
+	      CHECK_BIT_BUFFER(br_state, r, return FALSE);
+	      r = GET_BITS(r);
+	      EOBRUN += r;
+	      EOBRUN--;		/* this band is processed at this moment */
+	    }
+	    break;		/* force end-of-band */
+	  }
+	  k += 15;		/* ZRL: skip 15 zeroes in band */
+	}
+      }
+
+      BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
+    }
+
+    /* Completed MCU, so update state */
+    entropy->saved.EOBRUN = EOBRUN;	/* only part of saved state we need */
+  }
+
+  /* Account for restart interval if using restarts */
+  if (cinfo->restart_interval)
+    entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component,
+ * although the spec is not very clear on the point.
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  JCOEF p1;
+  int blkn;
+  BITREAD_STATE_VARS;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* Not worth the cycles to check insufficient_data here,
+   * since we will not change the data anyway if we read zeroes.
+   */
+
+  /* Load up working state */
+  BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
+
+  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    /* Encoded data is simply the next bit of the two's-complement DC value */
+    CHECK_BIT_BUFFER(br_state, 1, return FALSE);
+    if (GET_BITS(1))
+      MCU_data[blkn][0][0] |= p1;
+    /* Note: since we use |=, repeating the assignment later is safe */
+  }
+
+  /* Completed MCU, so update state */
+  BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
+
+  /* Account for restart interval if using restarts */
+  if (cinfo->restart_interval)
+    entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  register int s, k, r;
+  unsigned int EOBRUN;
+  int Se;
+  JCOEF p1, m1;
+  const int * natural_order;
+  JBLOCKROW block;
+  JCOEFPTR thiscoef;
+  BITREAD_STATE_VARS;
+  d_derived_tbl * tbl;
+  int num_newnz;
+  int newnz_pos[DCTSIZE2];
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, don't modify the MCU.
+   */
+  if (! entropy->insufficient_data) {
+
+    Se = cinfo->Se;
+    p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
+    m1 = -p1;			/* -1 in the bit position being coded */
+    natural_order = cinfo->natural_order;
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
+    EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */
+
+    /* There is always only one block per MCU */
+    block = MCU_data[0];
+    tbl = entropy->ac_derived_tbl;
+
+    /* If we are forced to suspend, we must undo the assignments to any newly
+     * nonzero coefficients in the block, because otherwise we'd get confused
+     * next time about which coefficients were already nonzero.
+     * But we need not undo addition of bits to already-nonzero coefficients;
+     * instead, we can test the current bit to see if we already did it.
+     */
+    num_newnz = 0;
+
+    /* initialize coefficient loop counter to start of band */
+    k = cinfo->Ss;
+
+    if (EOBRUN == 0) {
+      do {
+	HUFF_DECODE(s, br_state, tbl, goto undoit, label3);
+	r = s >> 4;
+	s &= 15;
+	if (s) {
+	  if (s != 1)		/* size of new coef should always be 1 */
+	    WARNMS(cinfo, JWRN_HUFF_BAD_CODE);
+	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	  if (GET_BITS(1))
+	    s = p1;		/* newly nonzero coef is positive */
+	  else
+	    s = m1;		/* newly nonzero coef is negative */
+	} else {
+	  if (r != 15) {
+	    EOBRUN = 1 << r;	/* EOBr, run length is 2^r + appended bits */
+	    if (r) {
+	      CHECK_BIT_BUFFER(br_state, r, goto undoit);
+	      r = GET_BITS(r);
+	      EOBRUN += r;
+	    }
+	    break;		/* rest of block is handled by EOB logic */
+	  }
+	  /* note s = 0 for processing ZRL */
+	}
+	/* Advance over already-nonzero coefs and r still-zero coefs,
+	 * appending correction bits to the nonzeroes.  A correction bit is 1
+	 * if the absolute value of the coefficient must be increased.
+	 */
+	do {
+	  thiscoef = *block + natural_order[k];
+	  if (*thiscoef) {
+	    CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	    if (GET_BITS(1)) {
+	      if ((*thiscoef & p1) == 0) { /* do nothing if already set it */
+		if (*thiscoef >= 0)
+		  *thiscoef += p1;
+		else
+		  *thiscoef += m1;
+	      }
+	    }
+	  } else {
+	    if (--r < 0)
+	      break;		/* reached target zero coefficient */
+	  }
+	  k++;
+	} while (k <= Se);
+	if (s) {
+	  int pos = natural_order[k];
+	  /* Output newly nonzero coefficient */
+	  (*block)[pos] = (JCOEF) s;
+	  /* Remember its position in case we have to suspend */
+	  newnz_pos[num_newnz++] = pos;
+	}
+	k++;
+      } while (k <= Se);
+    }
+
+    if (EOBRUN) {
+      /* Scan any remaining coefficient positions after the end-of-band
+       * (the last newly nonzero coefficient, if any).  Append a correction
+       * bit to each already-nonzero coefficient.  A correction bit is 1
+       * if the absolute value of the coefficient must be increased.
+       */
+      do {
+	thiscoef = *block + natural_order[k];
+	if (*thiscoef) {
+	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	  if (GET_BITS(1)) {
+	    if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */
+	      if (*thiscoef >= 0)
+		*thiscoef += p1;
+	      else
+		*thiscoef += m1;
+	    }
+	  }
+	}
+	k++;
+      } while (k <= Se);
+      /* Count one block completed in EOB run */
+      EOBRUN--;
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
+    entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */
+  }
+
+  /* Account for restart interval if using restarts */
+  if (cinfo->restart_interval)
+    entropy->restarts_to_go--;
+
+  return TRUE;
+
+undoit:
+  /* Re-zero any output coefficients that we made newly nonzero */
+  while (num_newnz)
+    (*block)[newnz_pos[--num_newnz]] = 0;
+
+  return FALSE;
+}
+
+
+/*
+ * Decode one MCU's worth of Huffman-compressed coefficients,
+ * partial blocks.
+ */
+
+METHODDEF(boolean)
+decode_mcu_sub (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  const int * natural_order;
+  int Se, blkn;
+  BITREAD_STATE_VARS;
+  savable_state state;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->insufficient_data) {
+
+    natural_order = cinfo->natural_order;
+    Se = cinfo->lim_Se;
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
+    ASSIGN_STATE(state, entropy->saved);
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      JBLOCKROW block = MCU_data[blkn];
+      d_derived_tbl * htbl;
+      register int s, k, r;
+      int coef_limit, ci;
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      htbl = entropy->dc_cur_tbls[blkn];
+      HUFF_DECODE(s, br_state, htbl, return FALSE, label1);
+
+      htbl = entropy->ac_cur_tbls[blkn];
+      k = 1;
+      coef_limit = entropy->coef_limit[blkn];
+      if (coef_limit) {
+	/* Convert DC difference to actual value, update last_dc_val */
+	if (s) {
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  r = GET_BITS(s);
+	  s = HUFF_EXTEND(r, s);
+	}
+	ci = cinfo->MCU_membership[blkn];
+	s += state.last_dc_val[ci];
+	state.last_dc_val[ci] = s;
+	/* Output the DC coefficient */
+	(*block)[0] = (JCOEF) s;
+
+	/* Section F.2.2.2: decode the AC coefficients */
+	/* Since zeroes are skipped, output area must be cleared beforehand */
+	for (; k < coef_limit; k++) {
+	  HUFF_DECODE(s, br_state, htbl, return FALSE, label2);
+
+	  r = s >> 4;
+	  s &= 15;
+
+	  if (s) {
+	    k += r;
+	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	    r = GET_BITS(s);
+	    s = HUFF_EXTEND(r, s);
+	    /* Output coefficient in natural (dezigzagged) order.
+	     * Note: the extra entries in natural_order[] will save us
+	     * if k > Se, which could happen if the data is corrupted.
+	     */
+	    (*block)[natural_order[k]] = (JCOEF) s;
+	  } else {
+	    if (r != 15)
+	      goto EndOfBlock;
+	    k += 15;
+	  }
+	}
+      } else {
+	if (s) {
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  DROP_BITS(s);
+	}
+      }
+
+      /* Section F.2.2.2: decode the AC coefficients */
+      /* In this path we just discard the values */
+      for (; k <= Se; k++) {
+	HUFF_DECODE(s, br_state, htbl, return FALSE, label3);
+
+	r = s >> 4;
+	s &= 15;
+
+	if (s) {
+	  k += r;
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  DROP_BITS(s);
+	} else {
+	  if (r != 15)
+	    break;
+	  k += 15;
+	}
+      }
+
+      EndOfBlock: ;
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
+    ASSIGN_STATE(entropy->saved, state);
+  }
+
+  /* Account for restart interval if using restarts */
+  if (cinfo->restart_interval)
+    entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * Decode one MCU's worth of Huffman-compressed coefficients,
+ * full-size blocks.
+ */
+
+METHODDEF(boolean)
+decode_mcu (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int blkn;
+  BITREAD_STATE_VARS;
+  savable_state state;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
+    ASSIGN_STATE(state, entropy->saved);
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      JBLOCKROW block = MCU_data[blkn];
+      d_derived_tbl * htbl;
+      register int s, k, r;
+      int coef_limit, ci;
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      htbl = entropy->dc_cur_tbls[blkn];
+      HUFF_DECODE(s, br_state, htbl, return FALSE, label1);
+
+      htbl = entropy->ac_cur_tbls[blkn];
+      k = 1;
+      coef_limit = entropy->coef_limit[blkn];
+      if (coef_limit) {
+	/* Convert DC difference to actual value, update last_dc_val */
+	if (s) {
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  r = GET_BITS(s);
+	  s = HUFF_EXTEND(r, s);
+	}
+	ci = cinfo->MCU_membership[blkn];
+	s += state.last_dc_val[ci];
+	state.last_dc_val[ci] = s;
+	/* Output the DC coefficient */
+	(*block)[0] = (JCOEF) s;
+
+	/* Section F.2.2.2: decode the AC coefficients */
+	/* Since zeroes are skipped, output area must be cleared beforehand */
+	for (; k < coef_limit; k++) {
+	  HUFF_DECODE(s, br_state, htbl, return FALSE, label2);
+
+	  r = s >> 4;
+	  s &= 15;
+
+	  if (s) {
+	    k += r;
+	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	    r = GET_BITS(s);
+	    s = HUFF_EXTEND(r, s);
+	    /* Output coefficient in natural (dezigzagged) order.
+	     * Note: the extra entries in jpeg_natural_order[] will save us
+	     * if k >= DCTSIZE2, which could happen if the data is corrupted.
+	     */
+	    (*block)[jpeg_natural_order[k]] = (JCOEF) s;
+	  } else {
+	    if (r != 15)
+	      goto EndOfBlock;
+	    k += 15;
+	  }
+	}
+      } else {
+	if (s) {
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  DROP_BITS(s);
+	}
+      }
+
+      /* Section F.2.2.2: decode the AC coefficients */
+      /* In this path we just discard the values */
+      for (; k < DCTSIZE2; k++) {
+	HUFF_DECODE(s, br_state, htbl, return FALSE, label3);
+
+	r = s >> 4;
+	s &= 15;
+
+	if (s) {
+	  k += r;
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  DROP_BITS(s);
+	} else {
+	  if (r != 15)
+	    break;
+	  k += 15;
+	}
+      }
+
+      EndOfBlock: ;
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
+    ASSIGN_STATE(entropy->saved, state);
+  }
+
+  /* Account for restart interval if using restarts */
+  if (cinfo->restart_interval)
+    entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass_huff_decoder (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci, blkn, tbl, i;
+  jpeg_component_info * compptr;
+
+  if (cinfo->progressive_mode) {
+    /* Validate progressive scan parameters */
+    if (cinfo->Ss == 0) {
+      if (cinfo->Se != 0)
+	goto bad;
+    } else {
+      /* need not check Ss/Se < 0 since they came from unsigned bytes */
+      if (cinfo->Se < cinfo->Ss || cinfo->Se > cinfo->lim_Se)
+	goto bad;
+      /* AC scans may have only one component */
+      if (cinfo->comps_in_scan != 1)
+	goto bad;
+    }
+    if (cinfo->Ah != 0) {
+      /* Successive approximation refinement scan: must have Al = Ah-1. */
+      if (cinfo->Ah-1 != cinfo->Al)
+	goto bad;
+    }
+    if (cinfo->Al > 13) {	/* need not check for < 0 */
+      /* Arguably the maximum Al value should be less than 13 for 8-bit
+       * precision, but the spec doesn't say so, and we try to be liberal
+       * about what we accept.  Note: large Al values could result in
+       * out-of-range DC coefficients during early scans, leading to bizarre
+       * displays due to overflows in the IDCT math.  But we won't crash.
+       */
+      bad:
+      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+    }
+    /* Update progression status, and verify that scan order is legal.
+     * Note that inter-scan inconsistencies are treated as warnings
+     * not fatal errors ... not clear if this is right way to behave.
+     */
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
+      int *coef_bit_ptr = & cinfo->coef_bits[cindex][0];
+      if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
+	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
+      for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
+	int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
+	if (cinfo->Ah != expected)
+	  WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
+	coef_bit_ptr[coefi] = cinfo->Al;
+      }
+    }
+
+    /* Select MCU decoding routine */
+    if (cinfo->Ah == 0) {
+      if (cinfo->Ss == 0)
+	entropy->pub.decode_mcu = decode_mcu_DC_first;
+      else
+	entropy->pub.decode_mcu = decode_mcu_AC_first;
+    } else {
+      if (cinfo->Ss == 0)
+	entropy->pub.decode_mcu = decode_mcu_DC_refine;
+      else
+	entropy->pub.decode_mcu = decode_mcu_AC_refine;
+    }
+
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Make sure requested tables are present, and compute derived tables.
+       * We may build same derived table more than once, but it's not expensive.
+       */
+      if (cinfo->Ss == 0) {
+	if (cinfo->Ah == 0) {	/* DC refinement needs no table */
+	  tbl = compptr->dc_tbl_no;
+	  jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
+				  & entropy->derived_tbls[tbl]);
+	}
+      } else {
+	tbl = compptr->ac_tbl_no;
+	jpeg_make_d_derived_tbl(cinfo, FALSE, tbl,
+				& entropy->derived_tbls[tbl]);
+	/* remember the single active table */
+	entropy->ac_derived_tbl = entropy->derived_tbls[tbl];
+      }
+      /* Initialize DC predictions to 0 */
+      entropy->saved.last_dc_val[ci] = 0;
+    }
+
+    /* Initialize private state variables */
+    entropy->saved.EOBRUN = 0;
+  } else {
+    /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
+     * This ought to be an error condition, but we make it a warning because
+     * there are some baseline files out there with all zeroes in these bytes.
+     */
+    if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 ||
+	((cinfo->is_baseline || cinfo->Se < DCTSIZE2) &&
+	cinfo->Se != cinfo->lim_Se))
+      WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
+
+    /* Select MCU decoding routine */
+    /* We retain the hard-coded case for full-size blocks.
+     * This is not necessary, but it appears that this version is slightly
+     * more performant in the given implementation.
+     * With an improved implementation we would prefer a single optimized
+     * function.
+     */
+    if (cinfo->lim_Se != DCTSIZE2-1)
+      entropy->pub.decode_mcu = decode_mcu_sub;
+    else
+      entropy->pub.decode_mcu = decode_mcu;
+
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Compute derived values for Huffman tables */
+      /* We may do this more than once for a table, but it's not expensive */
+      tbl = compptr->dc_tbl_no;
+      jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
+			      & entropy->dc_derived_tbls[tbl]);
+      if (cinfo->lim_Se) {	/* AC needs no table when not present */
+	tbl = compptr->ac_tbl_no;
+	jpeg_make_d_derived_tbl(cinfo, FALSE, tbl,
+				& entropy->ac_derived_tbls[tbl]);
+      }
+      /* Initialize DC predictions to 0 */
+      entropy->saved.last_dc_val[ci] = 0;
+    }
+
+    /* Precalculate decoding info for each block in an MCU of this scan */
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      ci = cinfo->MCU_membership[blkn];
+      compptr = cinfo->cur_comp_info[ci];
+      /* Precalculate which table to use for each block */
+      entropy->dc_cur_tbls[blkn] = entropy->dc_derived_tbls[compptr->dc_tbl_no];
+      entropy->ac_cur_tbls[blkn] =	/* AC needs no table when not present */
+	cinfo->lim_Se ? entropy->ac_derived_tbls[compptr->ac_tbl_no] : NULL;
+      /* Decide whether we really care about the coefficient values */
+      if (compptr->component_needed) {
+	ci = compptr->DCT_v_scaled_size;
+	i = compptr->DCT_h_scaled_size;
+	switch (cinfo->lim_Se) {
+	case (1*1-1):
+	  entropy->coef_limit[blkn] = 1;
+	  break;
+	case (2*2-1):
+	  if (ci <= 0 || ci > 2) ci = 2;
+	  if (i <= 0 || i > 2) i = 2;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order2[ci - 1][i - 1];
+	  break;
+	case (3*3-1):
+	  if (ci <= 0 || ci > 3) ci = 3;
+	  if (i <= 0 || i > 3) i = 3;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order3[ci - 1][i - 1];
+	  break;
+	case (4*4-1):
+	  if (ci <= 0 || ci > 4) ci = 4;
+	  if (i <= 0 || i > 4) i = 4;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order4[ci - 1][i - 1];
+	  break;
+	case (5*5-1):
+	  if (ci <= 0 || ci > 5) ci = 5;
+	  if (i <= 0 || i > 5) i = 5;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order5[ci - 1][i - 1];
+	  break;
+	case (6*6-1):
+	  if (ci <= 0 || ci > 6) ci = 6;
+	  if (i <= 0 || i > 6) i = 6;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order6[ci - 1][i - 1];
+	  break;
+	case (7*7-1):
+	  if (ci <= 0 || ci > 7) ci = 7;
+	  if (i <= 0 || i > 7) i = 7;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order7[ci - 1][i - 1];
+	  break;
+	default:
+	  if (ci <= 0 || ci > 8) ci = 8;
+	  if (i <= 0 || i > 8) i = 8;
+	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order[ci - 1][i - 1];
+	}
+      } else {
+	entropy->coef_limit[blkn] = 0;
+      }
+    }
+  }
+
+  /* Initialize bitread state variables */
+  entropy->bitstate.bits_left = 0;
+  entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
+  entropy->insufficient_data = FALSE;
+
+  /* Initialize restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Module initialization routine for Huffman entropy decoding.
+ */
+
+GLOBAL(void)
+jinit_huff_decoder (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy;
+  int i;
+
+  entropy = (huff_entropy_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(huff_entropy_decoder));
+  cinfo->entropy = &entropy->pub;
+  entropy->pub.start_pass = start_pass_huff_decoder;
+  entropy->pub.finish_pass = finish_pass_huff;
+
+  if (cinfo->progressive_mode) {
+    /* Create progression status table */
+    int *coef_bit_ptr, ci;
+    cinfo->coef_bits = (int (*)[DCTSIZE2]) (*cinfo->mem->alloc_small)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       cinfo->num_components * DCTSIZE2 * SIZEOF(int));
+    coef_bit_ptr = & cinfo->coef_bits[0][0];
+    for (ci = 0; ci < cinfo->num_components; ci++)
+      for (i = 0; i < DCTSIZE2; i++)
+	*coef_bit_ptr++ = -1;
+
+    /* Mark derived tables unallocated */
+    for (i = 0; i < NUM_HUFF_TBLS; i++) {
+      entropy->derived_tbls[i] = NULL;
+    }
+  } else {
+    /* Mark derived tables unallocated */
+    for (i = 0; i < NUM_HUFF_TBLS; i++) {
+      entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
+    }
+  }
+}
diff --git a/cmake/externals/libjpeg/jdinput.c b/cmake/externals/libjpeg/jdinput.c
new file mode 100644
index 000000000..29fbef90b
--- /dev/null
+++ b/cmake/externals/libjpeg/jdinput.c
@@ -0,0 +1,657 @@
+/*
+ * jdinput.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2002-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains input control logic for the JPEG decompressor.
+ * These routines are concerned with controlling the decompressor's input
+ * processing (marker reading and coefficient decoding).  The actual input
+ * reading is done in jdmarker.c, jdhuff.c, and jdarith.c.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_input_controller pub; /* public fields */
+
+  int inheaders;		/* Nonzero until first SOS is reached */
+} my_input_controller;
+
+typedef my_input_controller * my_inputctl_ptr;
+
+
+/* Forward declarations */
+METHODDEF(int) consume_markers JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * Routines to calculate various quantities related to the size of the image.
+ */
+
+
+/*
+ * Compute output image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
+ */
+
+GLOBAL(void)
+jpeg_core_output_dimensions (j_decompress_ptr cinfo)
+/* Do computations that are needed before master selection phase.
+ * This function is used for transcoding and full decompression.
+ */
+{
+#ifdef IDCT_SCALING_SUPPORTED
+  int ci;
+  jpeg_component_info *compptr;
+
+  /* Compute actual output image dimensions and DCT scaling choices. */
+  if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom) {
+    /* Provide 1/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 1;
+    cinfo->min_DCT_v_scaled_size = 1;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 2) {
+    /* Provide 2/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 2L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 2L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 2;
+    cinfo->min_DCT_v_scaled_size = 2;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 3) {
+    /* Provide 3/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 3L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 3L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 3;
+    cinfo->min_DCT_v_scaled_size = 3;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 4) {
+    /* Provide 4/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 4L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 4L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 4;
+    cinfo->min_DCT_v_scaled_size = 4;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 5) {
+    /* Provide 5/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 5L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 5L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 5;
+    cinfo->min_DCT_v_scaled_size = 5;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 6) {
+    /* Provide 6/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 6L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 6L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 6;
+    cinfo->min_DCT_v_scaled_size = 6;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 7) {
+    /* Provide 7/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 7L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 7L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 7;
+    cinfo->min_DCT_v_scaled_size = 7;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 8) {
+    /* Provide 8/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 8L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 8L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 8;
+    cinfo->min_DCT_v_scaled_size = 8;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 9) {
+    /* Provide 9/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 9L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 9L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 9;
+    cinfo->min_DCT_v_scaled_size = 9;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 10) {
+    /* Provide 10/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 10L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 10L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 10;
+    cinfo->min_DCT_v_scaled_size = 10;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 11) {
+    /* Provide 11/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 11L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 11L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 11;
+    cinfo->min_DCT_v_scaled_size = 11;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 12) {
+    /* Provide 12/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 12L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 12L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 12;
+    cinfo->min_DCT_v_scaled_size = 12;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 13) {
+    /* Provide 13/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 13L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 13L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 13;
+    cinfo->min_DCT_v_scaled_size = 13;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 14) {
+    /* Provide 14/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 14L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 14L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 14;
+    cinfo->min_DCT_v_scaled_size = 14;
+  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 15) {
+    /* Provide 15/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 15L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 15L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 15;
+    cinfo->min_DCT_v_scaled_size = 15;
+  } else {
+    /* Provide 16/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 16L, (long) cinfo->block_size);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 16L, (long) cinfo->block_size);
+    cinfo->min_DCT_h_scaled_size = 16;
+    cinfo->min_DCT_v_scaled_size = 16;
+  }
+
+  /* Recompute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size;
+    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size;
+  }
+
+#else /* !IDCT_SCALING_SUPPORTED */
+
+  /* Hardwire it to "no scaling" */
+  cinfo->output_width = cinfo->image_width;
+  cinfo->output_height = cinfo->image_height;
+  /* initial_setup has already initialized DCT_scaled_size,
+   * and has computed unscaled downsampled_width and downsampled_height.
+   */
+
+#endif /* IDCT_SCALING_SUPPORTED */
+}
+
+
+LOCAL(void)
+initial_setup (j_decompress_ptr cinfo)
+/* Called once, when first SOS marker is reached */
+{
+  int ci;
+  jpeg_component_info *compptr;
+
+  /* Make sure image isn't bigger than I can handle */
+  if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION ||
+      (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
+
+  /* Only 8 to 12 bits data precision are supported for DCT based JPEG */
+  if (cinfo->data_precision < 8 || cinfo->data_precision > 12)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Check that number of components won't exceed internal array sizes */
+  if (cinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	     MAX_COMPONENTS);
+
+  /* Compute maximum sampling factors; check factor validity */
+  cinfo->max_h_samp_factor = 1;
+  cinfo->max_v_samp_factor = 1;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
+	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
+      ERREXIT(cinfo, JERR_BAD_SAMPLING);
+    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
+				   compptr->h_samp_factor);
+    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
+				   compptr->v_samp_factor);
+  }
+
+  /* Derive block_size, natural_order, and lim_Se */
+  if (cinfo->is_baseline || (cinfo->progressive_mode &&
+      cinfo->comps_in_scan)) { /* no pseudo SOS marker */
+    cinfo->block_size = DCTSIZE;
+    cinfo->natural_order = jpeg_natural_order;
+    cinfo->lim_Se = DCTSIZE2-1;
+  } else
+    switch (cinfo->Se) {
+    case (1*1-1):
+      cinfo->block_size = 1;
+      cinfo->natural_order = jpeg_natural_order; /* not needed */
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (2*2-1):
+      cinfo->block_size = 2;
+      cinfo->natural_order = jpeg_natural_order2;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (3*3-1):
+      cinfo->block_size = 3;
+      cinfo->natural_order = jpeg_natural_order3;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (4*4-1):
+      cinfo->block_size = 4;
+      cinfo->natural_order = jpeg_natural_order4;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (5*5-1):
+      cinfo->block_size = 5;
+      cinfo->natural_order = jpeg_natural_order5;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (6*6-1):
+      cinfo->block_size = 6;
+      cinfo->natural_order = jpeg_natural_order6;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (7*7-1):
+      cinfo->block_size = 7;
+      cinfo->natural_order = jpeg_natural_order7;
+      cinfo->lim_Se = cinfo->Se;
+      break;
+    case (8*8-1):
+      cinfo->block_size = 8;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (9*9-1):
+      cinfo->block_size = 9;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (10*10-1):
+      cinfo->block_size = 10;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (11*11-1):
+      cinfo->block_size = 11;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (12*12-1):
+      cinfo->block_size = 12;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (13*13-1):
+      cinfo->block_size = 13;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (14*14-1):
+      cinfo->block_size = 14;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (15*15-1):
+      cinfo->block_size = 15;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    case (16*16-1):
+      cinfo->block_size = 16;
+      cinfo->natural_order = jpeg_natural_order;
+      cinfo->lim_Se = DCTSIZE2-1;
+      break;
+    default:
+      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+    }
+
+  /* We initialize DCT_scaled_size and min_DCT_scaled_size to block_size.
+   * In the full decompressor,
+   * this will be overridden by jpeg_calc_output_dimensions in jdmaster.c;
+   * but in the transcoder,
+   * jpeg_calc_output_dimensions is not used, so we must do it here.
+   */
+  cinfo->min_DCT_h_scaled_size = cinfo->block_size;
+  cinfo->min_DCT_v_scaled_size = cinfo->block_size;
+
+  /* Compute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    compptr->DCT_h_scaled_size = cinfo->block_size;
+    compptr->DCT_v_scaled_size = cinfo->block_size;
+    /* Size in DCT blocks */
+    compptr->width_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
+    compptr->height_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+    /* downsampled_width and downsampled_height will also be overridden by
+     * jdmaster.c if we are doing full decompression.  The transcoder library
+     * doesn't use these values, but the calling application might.
+     */
+    /* Size in samples */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) cinfo->max_h_samp_factor);
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) cinfo->max_v_samp_factor);
+    /* Mark component needed, until color conversion says otherwise */
+    compptr->component_needed = TRUE;
+    /* Mark no quantization table yet saved for component */
+    compptr->quant_table = NULL;
+  }
+
+  /* Compute number of fully interleaved MCU rows. */
+  cinfo->total_iMCU_rows = (JDIMENSION)
+    jdiv_round_up((long) cinfo->image_height,
+	          (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+
+  /* Decide whether file contains multiple scans */
+  if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode)
+    cinfo->inputctl->has_multiple_scans = TRUE;
+  else
+    cinfo->inputctl->has_multiple_scans = FALSE;
+}
+
+
+LOCAL(void)
+per_scan_setup (j_decompress_ptr cinfo)
+/* Do computations that are needed before processing a JPEG scan */
+/* cinfo->comps_in_scan and cinfo->cur_comp_info[] were set from SOS marker */
+{
+  int ci, mcublks, tmp;
+  jpeg_component_info *compptr;
+
+  if (cinfo->comps_in_scan == 1) {
+
+    /* Noninterleaved (single-component) scan */
+    compptr = cinfo->cur_comp_info[0];
+
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = compptr->width_in_blocks;
+    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
+
+    /* For noninterleaved scan, always one block per MCU */
+    compptr->MCU_width = 1;
+    compptr->MCU_height = 1;
+    compptr->MCU_blocks = 1;
+    compptr->MCU_sample_width = compptr->DCT_h_scaled_size;
+    compptr->last_col_width = 1;
+    /* For noninterleaved scans, it is convenient to define last_row_height
+     * as the number of block rows present in the last iMCU row.
+     */
+    tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+    if (tmp == 0) tmp = compptr->v_samp_factor;
+    compptr->last_row_height = tmp;
+
+    /* Prepare array describing MCU composition */
+    cinfo->blocks_in_MCU = 1;
+    cinfo->MCU_membership[0] = 0;
+
+  } else {
+
+    /* Interleaved (multi-component) scan */
+    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
+	       MAX_COMPS_IN_SCAN);
+
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width,
+		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
+    cinfo->MCU_rows_in_scan = cinfo->total_iMCU_rows;
+
+    cinfo->blocks_in_MCU = 0;
+
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Sampling factors give # of blocks of component in each MCU */
+      compptr->MCU_width = compptr->h_samp_factor;
+      compptr->MCU_height = compptr->v_samp_factor;
+      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
+      compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_h_scaled_size;
+      /* Figure number of non-dummy blocks in last MCU column & row */
+      tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
+      if (tmp == 0) tmp = compptr->MCU_width;
+      compptr->last_col_width = tmp;
+      tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
+      if (tmp == 0) tmp = compptr->MCU_height;
+      compptr->last_row_height = tmp;
+      /* Prepare array describing MCU composition */
+      mcublks = compptr->MCU_blocks;
+      if (cinfo->blocks_in_MCU + mcublks > D_MAX_BLOCKS_IN_MCU)
+	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
+      while (mcublks-- > 0) {
+	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
+      }
+    }
+
+  }
+}
+
+
+/*
+ * Save away a copy of the Q-table referenced by each component present
+ * in the current scan, unless already saved during a prior scan.
+ *
+ * In a multiple-scan JPEG file, the encoder could assign different components
+ * the same Q-table slot number, but change table definitions between scans
+ * so that each component uses a different Q-table.  (The IJG encoder is not
+ * currently capable of doing this, but other encoders might.)  Since we want
+ * to be able to dequantize all the components at the end of the file, this
+ * means that we have to save away the table actually used for each component.
+ * We do this by copying the table at the start of the first scan containing
+ * the component.
+ * The JPEG spec prohibits the encoder from changing the contents of a Q-table
+ * slot between scans of a component using that slot.  If the encoder does so
+ * anyway, this decoder will simply use the Q-table values that were current
+ * at the start of the first scan for the component.
+ *
+ * The decompressor output side looks only at the saved quant tables,
+ * not at the current Q-table slots.
+ */
+
+LOCAL(void)
+latch_quant_tables (j_decompress_ptr cinfo)
+{
+  int ci, qtblno;
+  jpeg_component_info *compptr;
+  JQUANT_TBL * qtbl;
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* No work if we already saved Q-table for this component */
+    if (compptr->quant_table != NULL)
+      continue;
+    /* Make sure specified quantization table is present */
+    qtblno = compptr->quant_tbl_no;
+    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
+	cinfo->quant_tbl_ptrs[qtblno] == NULL)
+      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
+    /* OK, save away the quantization table */
+    qtbl = (JQUANT_TBL *) (*cinfo->mem->alloc_small)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(JQUANT_TBL));
+    MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], SIZEOF(JQUANT_TBL));
+    compptr->quant_table = qtbl;
+  }
+}
+
+
+/*
+ * Initialize the input modules to read a scan of compressed data.
+ * The first call to this is done by jdmaster.c after initializing
+ * the entire decompressor (during jpeg_start_decompress).
+ * Subsequent calls come from consume_markers, below.
+ */
+
+METHODDEF(void)
+start_input_pass (j_decompress_ptr cinfo)
+{
+  per_scan_setup(cinfo);
+  latch_quant_tables(cinfo);
+  (*cinfo->entropy->start_pass) (cinfo);
+  (*cinfo->coef->start_input_pass) (cinfo);
+  cinfo->inputctl->consume_input = cinfo->coef->consume_data;
+}
+
+
+/*
+ * Finish up after inputting a compressed-data scan.
+ * This is called by the coefficient controller after it's read all
+ * the expected data of the scan.
+ */
+
+METHODDEF(void)
+finish_input_pass (j_decompress_ptr cinfo)
+{
+  (*cinfo->entropy->finish_pass) (cinfo);
+  cinfo->inputctl->consume_input = consume_markers;
+}
+
+
+/*
+ * Read JPEG markers before, between, or after compressed-data scans.
+ * Change state as necessary when a new scan is reached.
+ * Return value is JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+ *
+ * The consume_input method pointer points either here or to the
+ * coefficient controller's consume_data routine, depending on whether
+ * we are reading a compressed data segment or inter-segment markers.
+ *
+ * Note: This function should NOT return a pseudo SOS marker (with zero
+ * component number) to the caller.  A pseudo marker received by
+ * read_markers is processed and then skipped for other markers.
+ */
+
+METHODDEF(int)
+consume_markers (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
+  int val;
+
+  if (inputctl->pub.eoi_reached) /* After hitting EOI, read no further */
+    return JPEG_REACHED_EOI;
+
+  for (;;) {			/* Loop to pass pseudo SOS marker */
+    val = (*cinfo->marker->read_markers) (cinfo);
+
+    switch (val) {
+    case JPEG_REACHED_SOS:	/* Found SOS */
+      if (inputctl->inheaders) { /* 1st SOS */
+	if (inputctl->inheaders == 1)
+	  initial_setup(cinfo);
+	if (cinfo->comps_in_scan == 0) { /* pseudo SOS marker */
+	  inputctl->inheaders = 2;
+	  break;
+	}
+	inputctl->inheaders = 0;
+	/* Note: start_input_pass must be called by jdmaster.c
+	 * before any more input can be consumed.  jdapimin.c is
+	 * responsible for enforcing this sequencing.
+	 */
+      } else {			/* 2nd or later SOS marker */
+	if (! inputctl->pub.has_multiple_scans)
+	  ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */
+	if (cinfo->comps_in_scan == 0) /* unexpected pseudo SOS marker */
+	  break;
+	start_input_pass(cinfo);
+      }
+      return val;
+    case JPEG_REACHED_EOI:	/* Found EOI */
+      inputctl->pub.eoi_reached = TRUE;
+      if (inputctl->inheaders) { /* Tables-only datastream, apparently */
+	if (cinfo->marker->saw_SOF)
+	  ERREXIT(cinfo, JERR_SOF_NO_SOS);
+      } else {
+	/* Prevent infinite loop in coef ctlr's decompress_data routine
+	 * if user set output_scan_number larger than number of scans.
+	 */
+	if (cinfo->output_scan_number > cinfo->input_scan_number)
+	  cinfo->output_scan_number = cinfo->input_scan_number;
+      }
+      return val;
+    case JPEG_SUSPENDED:
+      return val;
+    default:
+      return val;
+    }
+  }
+}
+
+
+/*
+ * Reset state to begin a fresh datastream.
+ */
+
+METHODDEF(void)
+reset_input_controller (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
+
+  inputctl->pub.consume_input = consume_markers;
+  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
+  inputctl->pub.eoi_reached = FALSE;
+  inputctl->inheaders = 1;
+  /* Reset other modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->marker->reset_marker_reader) (cinfo);
+  /* Reset progression state -- would be cleaner if entropy decoder did this */
+  cinfo->coef_bits = NULL;
+}
+
+
+/*
+ * Initialize the input controller module.
+ * This is called only once, when the decompression object is created.
+ */
+
+GLOBAL(void)
+jinit_input_controller (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl;
+
+  /* Create subobject in permanent pool */
+  inputctl = (my_inputctl_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(my_input_controller));
+  cinfo->inputctl = &inputctl->pub;
+  /* Initialize method pointers */
+  inputctl->pub.consume_input = consume_markers;
+  inputctl->pub.reset_input_controller = reset_input_controller;
+  inputctl->pub.start_input_pass = start_input_pass;
+  inputctl->pub.finish_input_pass = finish_input_pass;
+  /* Initialize state: can't use reset_input_controller since we don't
+   * want to try to reset other modules yet.
+   */
+  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
+  inputctl->pub.eoi_reached = FALSE;
+  inputctl->inheaders = 1;
+}
diff --git a/cmake/externals/libjpeg/jdmainct.c b/cmake/externals/libjpeg/jdmainct.c
new file mode 100644
index 000000000..1cd66d853
--- /dev/null
+++ b/cmake/externals/libjpeg/jdmainct.c
@@ -0,0 +1,511 @@
+/*
+ * jdmainct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2002-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the main buffer controller for decompression.
+ * The main buffer lies between the JPEG decompressor proper and the
+ * post-processor; it holds downsampled data in the JPEG colorspace.
+ *
+ * Note that this code is bypassed in raw-data mode, since the application
+ * supplies the equivalent of the main buffer in that case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * In the current system design, the main buffer need never be a full-image
+ * buffer; any full-height buffers will be found inside the coefficient or
+ * postprocessing controllers.  Nonetheless, the main controller is not
+ * trivial.  Its responsibility is to provide context rows for upsampling/
+ * rescaling, and doing this in an efficient fashion is a bit tricky.
+ *
+ * Postprocessor input data is counted in "row groups".  A row group is
+ * defined to be (v_samp_factor * DCT_v_scaled_size / min_DCT_v_scaled_size)
+ * sample rows of each component.  (We require DCT_scaled_size values to be
+ * chosen such that these numbers are integers.  In practice DCT_scaled_size
+ * values will likely be powers of two, so we actually have the stronger
+ * condition that DCT_scaled_size / min_DCT_scaled_size is an integer.)
+ * Upsampling will typically produce max_v_samp_factor pixel rows from each
+ * row group (times any additional scale factor that the upsampler is
+ * applying).
+ *
+ * The coefficient controller will deliver data to us one iMCU row at a time;
+ * each iMCU row contains v_samp_factor * DCT_v_scaled_size sample rows, or
+ * exactly min_DCT_v_scaled_size row groups.  (This amount of data corresponds
+ * to one row of MCUs when the image is fully interleaved.)  Note that the
+ * number of sample rows varies across components, but the number of row
+ * groups does not.  Some garbage sample rows may be included in the last iMCU
+ * row at the bottom of the image.
+ *
+ * Depending on the vertical scaling algorithm used, the upsampler may need
+ * access to the sample row(s) above and below its current input row group.
+ * The upsampler is required to set need_context_rows TRUE at global selection
+ * time if so.  When need_context_rows is FALSE, this controller can simply
+ * obtain one iMCU row at a time from the coefficient controller and dole it
+ * out as row groups to the postprocessor.
+ *
+ * When need_context_rows is TRUE, this controller guarantees that the buffer
+ * passed to postprocessing contains at least one row group's worth of samples
+ * above and below the row group(s) being processed.  Note that the context
+ * rows "above" the first passed row group appear at negative row offsets in
+ * the passed buffer.  At the top and bottom of the image, the required
+ * context rows are manufactured by duplicating the first or last real sample
+ * row; this avoids having special cases in the upsampling inner loops.
+ *
+ * The amount of context is fixed at one row group just because that's a
+ * convenient number for this controller to work with.  The existing
+ * upsamplers really only need one sample row of context.  An upsampler
+ * supporting arbitrary output rescaling might wish for more than one row
+ * group of context when shrinking the image; tough, we don't handle that.
+ * (This is justified by the assumption that downsizing will be handled mostly
+ * by adjusting the DCT_scaled_size values, so that the actual scale factor at
+ * the upsample step needn't be much less than one.)
+ *
+ * To provide the desired context, we have to retain the last two row groups
+ * of one iMCU row while reading in the next iMCU row.  (The last row group
+ * can't be processed until we have another row group for its below-context,
+ * and so we have to save the next-to-last group too for its above-context.)
+ * We could do this most simply by copying data around in our buffer, but
+ * that'd be very slow.  We can avoid copying any data by creating a rather
+ * strange pointer structure.  Here's how it works.  We allocate a workspace
+ * consisting of M+2 row groups (where M = min_DCT_v_scaled_size is the number
+ * of row groups per iMCU row).  We create two sets of redundant pointers to
+ * the workspace.  Labeling the physical row groups 0 to M+1, the synthesized
+ * pointer lists look like this:
+ *                   M+1                          M-1
+ * master pointer --> 0         master pointer --> 0
+ *                    1                            1
+ *                   ...                          ...
+ *                   M-3                          M-3
+ *                   M-2                           M
+ *                   M-1                          M+1
+ *                    M                           M-2
+ *                   M+1                          M-1
+ *                    0                            0
+ * We read alternate iMCU rows using each master pointer; thus the last two
+ * row groups of the previous iMCU row remain un-overwritten in the workspace.
+ * The pointer lists are set up so that the required context rows appear to
+ * be adjacent to the proper places when we pass the pointer lists to the
+ * upsampler.
+ *
+ * The above pictures describe the normal state of the pointer lists.
+ * At top and bottom of the image, we diddle the pointer lists to duplicate
+ * the first or last sample row as necessary (this is cheaper than copying
+ * sample rows around).
+ *
+ * This scheme breaks down if M < 2, ie, min_DCT_v_scaled_size is 1.  In that
+ * situation each iMCU row provides only one row group so the buffering logic
+ * must be different (eg, we must read two iMCU rows before we can emit the
+ * first row group).  For now, we simply do not support providing context
+ * rows when min_DCT_v_scaled_size is 1.  That combination seems unlikely to
+ * be worth providing --- if someone wants a 1/8th-size preview, they probably
+ * want it quick and dirty, so a context-free upsampler is sufficient.
+ */
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_main_controller pub; /* public fields */
+
+  /* Pointer to allocated workspace (M or M+2 row groups). */
+  JSAMPARRAY buffer[MAX_COMPONENTS];
+
+  JDIMENSION rowgroup_ctr;	/* counts row groups output to postprocessor */
+  JDIMENSION rowgroups_avail;	/* row groups available to postprocessor */
+
+  /* Remaining fields are only used in the context case. */
+
+  boolean buffer_full;		/* Have we gotten an iMCU row from decoder? */
+
+  /* These are the master pointers to the funny-order pointer lists. */
+  JSAMPIMAGE xbuffer[2];	/* pointers to weird pointer lists */
+
+  int whichptr;			/* indicates which pointer set is now in use */
+  int context_state;		/* process_data state machine status */
+  JDIMENSION iMCU_row_ctr;	/* counts iMCU rows to detect image top/bot */
+} my_main_controller;
+
+typedef my_main_controller * my_main_ptr;
+
+/* context_state values: */
+#define CTX_PREPARE_FOR_IMCU	0	/* need to prepare for MCU row */
+#define CTX_PROCESS_IMCU	1	/* feeding iMCU to postprocessor */
+#define CTX_POSTPONED_ROW	2	/* feeding postponed row group */
+
+
+/* Forward declarations */
+METHODDEF(void) process_data_simple_main
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+METHODDEF(void) process_data_context_main
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+#ifdef QUANT_2PASS_SUPPORTED
+METHODDEF(void) process_data_crank_post
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+#endif
+
+
+LOCAL(void)
+alloc_funny_pointers (j_decompress_ptr cinfo)
+/* Allocate space for the funny pointer lists.
+ * This is done only once, not once per pass.
+ */
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+  int ci, rgroup;
+  int M = cinfo->min_DCT_v_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf;
+
+  /* Get top-level space for component array pointers.
+   * We alloc both arrays with one call to save a few cycles.
+   */
+  mainp->xbuffer[0] = (JSAMPIMAGE) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE,
+     cinfo->num_components * 2 * SIZEOF(JSAMPARRAY));
+  mainp->xbuffer[1] = mainp->xbuffer[0] + cinfo->num_components;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (! compptr->component_needed)
+      continue;			/* skip uninteresting component */
+    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
+    /* Get space for pointer lists --- M+4 row groups in each list.
+     * We alloc both pointer lists with one call to save a few cycles.
+     */
+    xbuf = (JSAMPARRAY) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo,
+      JPOOL_IMAGE, 2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW));
+    xbuf += rgroup;		/* want one row group at negative offsets */
+    mainp->xbuffer[0][ci] = xbuf;
+    xbuf += rgroup * (M + 4);
+    mainp->xbuffer[1][ci] = xbuf;
+  }
+}
+
+
+LOCAL(void)
+make_funny_pointers (j_decompress_ptr cinfo)
+/* Create the funny pointer lists discussed in the comments above.
+ * The actual workspace is already allocated (in mainp->buffer),
+ * and the space for the pointer lists is allocated too.
+ * This routine just fills in the curiously ordered lists.
+ * This will be repeated at the beginning of each pass.
+ */
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup;
+  int M = cinfo->min_DCT_v_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY buf, xbuf0, xbuf1;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (! compptr->component_needed)
+      continue;			/* skip uninteresting component */
+    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
+    xbuf0 = mainp->xbuffer[0][ci];
+    xbuf1 = mainp->xbuffer[1][ci];
+    /* First copy the workspace pointers as-is */
+    buf = mainp->buffer[ci];
+    for (i = 0; i < rgroup * (M + 2); i++) {
+      xbuf0[i] = xbuf1[i] = buf[i];
+    }
+    /* In the second list, put the last four row groups in swapped order */
+    for (i = 0; i < rgroup * 2; i++) {
+      xbuf1[rgroup*(M-2) + i] = buf[rgroup*M + i];
+      xbuf1[rgroup*M + i] = buf[rgroup*(M-2) + i];
+    }
+    /* The wraparound pointers at top and bottom will be filled later
+     * (see set_wraparound_pointers, below).  Initially we want the "above"
+     * pointers to duplicate the first actual data line.  This only needs
+     * to happen in xbuffer[0].
+     */
+    for (i = 0; i < rgroup; i++) {
+      xbuf0[i - rgroup] = xbuf0[0];
+    }
+  }
+}
+
+
+LOCAL(void)
+set_wraparound_pointers (j_decompress_ptr cinfo)
+/* Set up the "wraparound" pointers at top and bottom of the pointer lists.
+ * This changes the pointer list state from top-of-image to the normal state.
+ */
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup;
+  int M = cinfo->min_DCT_v_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf0, xbuf1;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (! compptr->component_needed)
+      continue;			/* skip uninteresting component */
+    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
+    xbuf0 = mainp->xbuffer[0][ci];
+    xbuf1 = mainp->xbuffer[1][ci];
+    for (i = 0; i < rgroup; i++) {
+      xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i];
+      xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i];
+      xbuf0[rgroup*(M+2) + i] = xbuf0[i];
+      xbuf1[rgroup*(M+2) + i] = xbuf1[i];
+    }
+  }
+}
+
+
+LOCAL(void)
+set_bottom_pointers (j_decompress_ptr cinfo)
+/* Change the pointer lists to duplicate the last sample row at the bottom
+ * of the image.  whichptr indicates which xbuffer holds the final iMCU row.
+ * Also sets rowgroups_avail to indicate number of nondummy row groups in row.
+ */
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup, iMCUheight, rows_left;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (! compptr->component_needed)
+      continue;			/* skip uninteresting component */
+    /* Count sample rows in one iMCU row and in one row group */
+    iMCUheight = compptr->v_samp_factor * compptr->DCT_v_scaled_size;
+    rgroup = iMCUheight / cinfo->min_DCT_v_scaled_size;
+    /* Count nondummy sample rows remaining for this component */
+    rows_left = (int) (compptr->downsampled_height % (JDIMENSION) iMCUheight);
+    if (rows_left == 0) rows_left = iMCUheight;
+    /* Count nondummy row groups.  Should get same answer for each component,
+     * so we need only do it once.
+     */
+    if (ci == 0) {
+      mainp->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1);
+    }
+    /* Duplicate the last real sample row rgroup*2 times; this pads out the
+     * last partial rowgroup and ensures at least one full rowgroup of context.
+     */
+    xbuf = mainp->xbuffer[mainp->whichptr][ci];
+    for (i = 0; i < rgroup * 2; i++) {
+      xbuf[rows_left + i] = xbuf[rows_left-1];
+    }
+  }
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (cinfo->upsample->need_context_rows) {
+      mainp->pub.process_data = process_data_context_main;
+      make_funny_pointers(cinfo); /* Create the xbuffer[] lists */
+      mainp->whichptr = 0;	/* Read first iMCU row into xbuffer[0] */
+      mainp->context_state = CTX_PREPARE_FOR_IMCU;
+      mainp->iMCU_row_ctr = 0;
+      mainp->buffer_full = FALSE; /* Mark buffer empty */
+    } else {
+      /* Simple case with no context needed */
+      mainp->pub.process_data = process_data_simple_main;
+      mainp->rowgroup_ctr = mainp->rowgroups_avail; /* Mark buffer empty */
+    }
+    break;
+#ifdef QUANT_2PASS_SUPPORTED
+  case JBUF_CRANK_DEST:
+    /* For last pass of 2-pass quantization, just crank the postprocessor */
+    mainp->pub.process_data = process_data_crank_post;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+  }
+}
+
+
+/*
+ * Process some data.
+ * This handles the simple case where no context is required.
+ */
+
+METHODDEF(void)
+process_data_simple_main (j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+			  JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+
+  /* Read input data if we haven't filled the main buffer yet */
+  if (mainp->rowgroup_ctr >= mainp->rowgroups_avail) {
+    if (! (*cinfo->coef->decompress_data) (cinfo, mainp->buffer))
+      return;			/* suspension forced, can do nothing more */
+    mainp->rowgroup_ctr = 0;	/* OK, we have an iMCU row to work with */
+  }
+
+  /* Note: at the bottom of the image, we may pass extra garbage row groups
+   * to the postprocessor.  The postprocessor has to check for bottom
+   * of image anyway (at row resolution), so no point in us doing it too.
+   */
+
+  /* Feed the postprocessor */
+  (*cinfo->post->post_process_data) (cinfo, mainp->buffer,
+			&mainp->rowgroup_ctr, mainp->rowgroups_avail,
+			output_buf, out_row_ctr, out_rows_avail);
+}
+
+
+/*
+ * Process some data.
+ * This handles the case where context rows must be provided.
+ */
+
+METHODDEF(void)
+process_data_context_main (j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+			   JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
+{
+  my_main_ptr mainp = (my_main_ptr) cinfo->main;
+
+  /* Read input data if we haven't filled the main buffer yet */
+  if (! mainp->buffer_full) {
+    if (! (*cinfo->coef->decompress_data) (cinfo,
+					   mainp->xbuffer[mainp->whichptr]))
+      return;			/* suspension forced, can do nothing more */
+    mainp->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
+    mainp->iMCU_row_ctr++;	/* count rows received */
+  }
+
+  /* Postprocessor typically will not swallow all the input data it is handed
+   * in one call (due to filling the output buffer first).  Must be prepared
+   * to exit and restart.  This switch lets us keep track of how far we got.
+   * Note that each case falls through to the next on successful completion.
+   */
+  switch (mainp->context_state) {
+  case CTX_POSTPONED_ROW:
+    /* Call postprocessor using previously set pointers for postponed row */
+    (*cinfo->post->post_process_data) (cinfo, mainp->xbuffer[mainp->whichptr],
+			&mainp->rowgroup_ctr, mainp->rowgroups_avail,
+			output_buf, out_row_ctr, out_rows_avail);
+    if (mainp->rowgroup_ctr < mainp->rowgroups_avail)
+      return;			/* Need to suspend */
+    mainp->context_state = CTX_PREPARE_FOR_IMCU;
+    if (*out_row_ctr >= out_rows_avail)
+      return;			/* Postprocessor exactly filled output buf */
+    /*FALLTHROUGH*/
+  case CTX_PREPARE_FOR_IMCU:
+    /* Prepare to process first M-1 row groups of this iMCU row */
+    mainp->rowgroup_ctr = 0;
+    mainp->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_v_scaled_size - 1);
+    /* Check for bottom of image: if so, tweak pointers to "duplicate"
+     * the last sample row, and adjust rowgroups_avail to ignore padding rows.
+     */
+    if (mainp->iMCU_row_ctr == cinfo->total_iMCU_rows)
+      set_bottom_pointers(cinfo);
+    mainp->context_state = CTX_PROCESS_IMCU;
+    /*FALLTHROUGH*/
+  case CTX_PROCESS_IMCU:
+    /* Call postprocessor using previously set pointers */
+    (*cinfo->post->post_process_data) (cinfo, mainp->xbuffer[mainp->whichptr],
+			&mainp->rowgroup_ctr, mainp->rowgroups_avail,
+			output_buf, out_row_ctr, out_rows_avail);
+    if (mainp->rowgroup_ctr < mainp->rowgroups_avail)
+      return;			/* Need to suspend */
+    /* After the first iMCU, change wraparound pointers to normal state */
+    if (mainp->iMCU_row_ctr == 1)
+      set_wraparound_pointers(cinfo);
+    /* Prepare to load new iMCU row using other xbuffer list */
+    mainp->whichptr ^= 1;	/* 0=>1 or 1=>0 */
+    mainp->buffer_full = FALSE;
+    /* Still need to process last row group of this iMCU row, */
+    /* which is saved at index M+1 of the other xbuffer */
+    mainp->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_v_scaled_size + 1);
+    mainp->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_v_scaled_size + 2);
+    mainp->context_state = CTX_POSTPONED_ROW;
+  }
+}
+
+
+/*
+ * Process some data.
+ * Final pass of two-pass quantization: just call the postprocessor.
+ * Source data will be the postprocessor controller's internal buffer.
+ */
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+METHODDEF(void)
+process_data_crank_post (j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+			 JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
+{
+  (*cinfo->post->post_process_data) (cinfo, (JSAMPIMAGE) NULL,
+			(JDIMENSION *) NULL, (JDIMENSION) 0,
+			output_buf, out_row_ctr, out_rows_avail);
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
+
+
+/*
+ * Initialize main buffer controller.
+ */
+
+GLOBAL(void)
+jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_main_ptr mainp;
+  int ci, rgroup, ngroups;
+  jpeg_component_info *compptr;
+
+  mainp = (my_main_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_main_controller));
+  cinfo->main = &mainp->pub;
+  mainp->pub.start_pass = start_pass_main;
+
+  if (need_full_buffer)		/* shouldn't happen */
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  /* Allocate the workspace.
+   * ngroups is the number of row groups we need.
+   */
+  if (cinfo->upsample->need_context_rows) {
+    if (cinfo->min_DCT_v_scaled_size < 2) /* unsupported, see comments above */
+      ERREXIT(cinfo, JERR_NOTIMPL);
+    alloc_funny_pointers(cinfo); /* Alloc space for xbuffer[] lists */
+    ngroups = cinfo->min_DCT_v_scaled_size + 2;
+  } else {
+    /* There are always min_DCT_v_scaled_size row groups in an iMCU row. */
+    ngroups = cinfo->min_DCT_v_scaled_size;
+    mainp->rowgroups_avail = (JDIMENSION) ngroups;
+  }
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (! compptr->component_needed)
+      continue;			/* skip uninteresting component */
+    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
+    mainp->buffer[ci] = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       compptr->width_in_blocks * ((JDIMENSION) compptr->DCT_h_scaled_size),
+       (JDIMENSION) (rgroup * ngroups));
+  }
+}
diff --git a/cmake/externals/libjpeg/jdmarker.c b/cmake/externals/libjpeg/jdmarker.c
new file mode 100644
index 000000000..c10fde605
--- /dev/null
+++ b/cmake/externals/libjpeg/jdmarker.c
@@ -0,0 +1,1505 @@
+/*
+ * jdmarker.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modified 2009-2019 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains routines to decode JPEG datastream markers.
+ * Most of the complexity arises from our desire to support input
+ * suspension: if not all of the data for a marker is available,
+ * we must exit back to the application.  On resumption, we reprocess
+ * the marker.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+typedef enum {			/* JPEG marker codes */
+  M_SOF0  = 0xc0,
+  M_SOF1  = 0xc1,
+  M_SOF2  = 0xc2,
+  M_SOF3  = 0xc3,
+
+  M_SOF5  = 0xc5,
+  M_SOF6  = 0xc6,
+  M_SOF7  = 0xc7,
+
+  M_JPG   = 0xc8,
+  M_SOF9  = 0xc9,
+  M_SOF10 = 0xca,
+  M_SOF11 = 0xcb,
+
+  M_SOF13 = 0xcd,
+  M_SOF14 = 0xce,
+  M_SOF15 = 0xcf,
+
+  M_DHT   = 0xc4,
+
+  M_DAC   = 0xcc,
+
+  M_RST0  = 0xd0,
+  M_RST1  = 0xd1,
+  M_RST2  = 0xd2,
+  M_RST3  = 0xd3,
+  M_RST4  = 0xd4,
+  M_RST5  = 0xd5,
+  M_RST6  = 0xd6,
+  M_RST7  = 0xd7,
+
+  M_SOI   = 0xd8,
+  M_EOI   = 0xd9,
+  M_SOS   = 0xda,
+  M_DQT   = 0xdb,
+  M_DNL   = 0xdc,
+  M_DRI   = 0xdd,
+  M_DHP   = 0xde,
+  M_EXP   = 0xdf,
+
+  M_APP0  = 0xe0,
+  M_APP1  = 0xe1,
+  M_APP2  = 0xe2,
+  M_APP3  = 0xe3,
+  M_APP4  = 0xe4,
+  M_APP5  = 0xe5,
+  M_APP6  = 0xe6,
+  M_APP7  = 0xe7,
+  M_APP8  = 0xe8,
+  M_APP9  = 0xe9,
+  M_APP10 = 0xea,
+  M_APP11 = 0xeb,
+  M_APP12 = 0xec,
+  M_APP13 = 0xed,
+  M_APP14 = 0xee,
+  M_APP15 = 0xef,
+
+  M_JPG0  = 0xf0,
+  M_JPG8  = 0xf8,
+  M_JPG13 = 0xfd,
+  M_COM   = 0xfe,
+
+  M_TEM   = 0x01,
+
+  M_ERROR = 0x100
+} JPEG_MARKER;
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_marker_reader pub; /* public fields */
+
+  /* Application-overridable marker processing methods */
+  jpeg_marker_parser_method process_COM;
+  jpeg_marker_parser_method process_APPn[16];
+
+  /* Limit on marker data length to save for each marker type */
+  unsigned int length_limit_COM;
+  unsigned int length_limit_APPn[16];
+
+  /* Status of COM/APPn marker saving */
+  jpeg_saved_marker_ptr cur_marker;	/* NULL if not processing a marker */
+  unsigned int bytes_read;		/* data bytes read so far in marker */
+  /* Note: cur_marker is not linked into marker_list until it's all read. */
+} my_marker_reader;
+
+typedef my_marker_reader * my_marker_ptr;
+
+
+/*
+ * Macros for fetching data from the data source module.
+ *
+ * At all times, cinfo->src->next_input_byte and ->bytes_in_buffer reflect
+ * the current restart point; we update them only when we have reached a
+ * suitable place to restart if a suspension occurs.
+ */
+
+/* Declare and initialize local copies of input pointer/count */
+#define INPUT_VARS(cinfo)  \
+	struct jpeg_source_mgr * datasrc = (cinfo)->src;  \
+	const JOCTET * next_input_byte = datasrc->next_input_byte;  \
+	size_t bytes_in_buffer = datasrc->bytes_in_buffer
+
+/* Unload the local copies --- do this only at a restart boundary */
+#define INPUT_SYNC(cinfo)  \
+	( datasrc->next_input_byte = next_input_byte,  \
+	  datasrc->bytes_in_buffer = bytes_in_buffer )
+
+/* Reload the local copies --- used only in MAKE_BYTE_AVAIL */
+#define INPUT_RELOAD(cinfo)  \
+	( next_input_byte = datasrc->next_input_byte,  \
+	  bytes_in_buffer = datasrc->bytes_in_buffer )
+
+/* Internal macro for INPUT_BYTE and INPUT_2BYTES: make a byte available.
+ * Note we do *not* do INPUT_SYNC before calling fill_input_buffer,
+ * but we must reload the local copies after a successful fill.
+ */
+#define MAKE_BYTE_AVAIL(cinfo,action)  \
+	if (bytes_in_buffer == 0) {  \
+	  if (! (*datasrc->fill_input_buffer) (cinfo))  \
+	    { action; }  \
+	  INPUT_RELOAD(cinfo);  \
+	}
+
+/* Read a byte into variable V.
+ * If must suspend, take the specified action (typically "return FALSE").
+ */
+#define INPUT_BYTE(cinfo,V,action)  \
+	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V = GETJOCTET(*next_input_byte++); )
+
+/* As above, but read two bytes interpreted as an unsigned 16-bit integer.
+ * V should be declared unsigned int or perhaps INT32.
+ */
+#define INPUT_2BYTES(cinfo,V,action)  \
+	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \
+		  MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V += GETJOCTET(*next_input_byte++); )
+
+
+/*
+ * Routines to process JPEG markers.
+ *
+ * Entry condition: JPEG marker itself has been read and its code saved
+ *   in cinfo->unread_marker; input restart point is just after the marker.
+ *
+ * Exit: if return TRUE, have read and processed any parameters, and have
+ *   updated the restart point to point after the parameters.
+ *   If return FALSE, was forced to suspend before reaching end of
+ *   marker parameters; restart point has not been moved.  Same routine
+ *   will be called again after application supplies more input data.
+ *
+ * This approach to suspension assumes that all of a marker's parameters
+ * can fit into a single input bufferload.  This should hold for "normal"
+ * markers.  Some COM/APPn markers might have large parameter segments
+ * that might not fit.  If we are simply dropping such a marker, we use
+ * skip_input_data to get past it, and thereby put the problem on the
+ * source manager's shoulders.  If we are saving the marker's contents
+ * into memory, we use a slightly different convention: when forced to
+ * suspend, the marker processor updates the restart point to the end of
+ * what it's consumed (ie, the end of the buffer) before returning FALSE.
+ * On resumption, cinfo->unread_marker still contains the marker code,
+ * but the data source will point to the next chunk of marker data.
+ * The marker processor must retain internal state to deal with this.
+ *
+ * Note that we don't bother to avoid duplicate trace messages if a
+ * suspension occurs within marker parameters.  Other side effects
+ * require more care.
+ */
+
+
+LOCAL(boolean)
+get_soi (j_decompress_ptr cinfo)
+/* Process an SOI marker */
+{
+  int i;
+  
+  TRACEMS(cinfo, 1, JTRC_SOI);
+
+  if (cinfo->marker->saw_SOI)
+    ERREXIT(cinfo, JERR_SOI_DUPLICATE);
+
+  /* Reset all parameters that are defined to be reset by SOI */
+
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    cinfo->arith_dc_L[i] = 0;
+    cinfo->arith_dc_U[i] = 1;
+    cinfo->arith_ac_K[i] = 5;
+  }
+  cinfo->restart_interval = 0;
+
+  /* Set initial assumptions for colorspace etc */
+
+  cinfo->jpeg_color_space = JCS_UNKNOWN;
+  cinfo->color_transform = JCT_NONE;
+  cinfo->CCIR601_sampling = FALSE; /* Assume non-CCIR sampling??? */
+
+  cinfo->saw_JFIF_marker = FALSE;
+  cinfo->JFIF_major_version = 1; /* set default JFIF APP0 values */
+  cinfo->JFIF_minor_version = 1;
+  cinfo->density_unit = 0;
+  cinfo->X_density = 1;
+  cinfo->Y_density = 1;
+  cinfo->saw_Adobe_marker = FALSE;
+  cinfo->Adobe_transform = 0;
+
+  cinfo->marker->saw_SOI = TRUE;
+
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_sof (j_decompress_ptr cinfo, boolean is_baseline, boolean is_prog,
+	 boolean is_arith)
+/* Process a SOFn marker */
+{
+  INT32 length;
+  int c, ci, i;
+  jpeg_component_info * compptr;
+  INPUT_VARS(cinfo);
+
+  cinfo->is_baseline = is_baseline;
+  cinfo->progressive_mode = is_prog;
+  cinfo->arith_code = is_arith;
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  INPUT_BYTE(cinfo, cinfo->data_precision, return FALSE);
+  INPUT_2BYTES(cinfo, cinfo->image_height, return FALSE);
+  INPUT_2BYTES(cinfo, cinfo->image_width, return FALSE);
+  INPUT_BYTE(cinfo, cinfo->num_components, return FALSE);
+
+  length -= 8;
+
+  TRACEMS4(cinfo, 1, JTRC_SOF, cinfo->unread_marker,
+	   (int) cinfo->image_width, (int) cinfo->image_height,
+	   cinfo->num_components);
+
+  if (cinfo->marker->saw_SOF)
+    ERREXIT(cinfo, JERR_SOF_DUPLICATE);
+
+  /* We don't support files in which the image height is initially specified */
+  /* as 0 and is later redefined by DNL.  As long as we have to check that,  */
+  /* might as well have a general sanity check. */
+  if (cinfo->image_height <= 0 || cinfo->image_width <= 0 ||
+      cinfo->num_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  if (length != (cinfo->num_components * 3))
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  if (cinfo->comp_info == NULL)	/* do only once, even if suspend */
+    cinfo->comp_info = (jpeg_component_info *) (*cinfo->mem->alloc_small)
+			((j_common_ptr) cinfo, JPOOL_IMAGE,
+			 cinfo->num_components * SIZEOF(jpeg_component_info));
+
+  for (ci = 0; ci < cinfo->num_components; ci++) {
+    INPUT_BYTE(cinfo, c, return FALSE);
+    /* Check to see whether component id has already been seen   */
+    /* (in violation of the spec, but unfortunately seen in some */
+    /* files).  If so, create "fake" component id equal to the   */
+    /* max id seen so far + 1. */
+    for (i = 0, compptr = cinfo->comp_info; i < ci; i++, compptr++) {
+      if (c == compptr->component_id) {
+	compptr = cinfo->comp_info;
+	c = compptr->component_id;
+	compptr++;
+	for (i = 1; i < ci; i++, compptr++) {
+	  if (compptr->component_id > c) c = compptr->component_id;
+	}
+	c++;
+	break;
+      }
+    }
+    compptr->component_id = c;
+    compptr->component_index = ci;
+    INPUT_BYTE(cinfo, c, return FALSE);
+    compptr->h_samp_factor = (c >> 4) & 15;
+    compptr->v_samp_factor = (c     ) & 15;
+    INPUT_BYTE(cinfo, compptr->quant_tbl_no, return FALSE);
+
+    TRACEMS4(cinfo, 1, JTRC_SOF_COMPONENT,
+	     compptr->component_id, compptr->h_samp_factor,
+	     compptr->v_samp_factor, compptr->quant_tbl_no);
+  }
+
+  cinfo->marker->saw_SOF = TRUE;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_sos (j_decompress_ptr cinfo)
+/* Process a SOS marker */
+{
+  INT32 length;
+  int c, ci, i, n;
+  jpeg_component_info * compptr;
+  INPUT_VARS(cinfo);
+
+  if (! cinfo->marker->saw_SOF)
+    ERREXITS(cinfo, JERR_SOF_BEFORE, "SOS");
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  INPUT_BYTE(cinfo, n, return FALSE); /* Number of components */
+
+  TRACEMS1(cinfo, 1, JTRC_SOS, n);
+
+  if (length != (n * 2 + 6) || n > MAX_COMPS_IN_SCAN ||
+      (n == 0 && !cinfo->progressive_mode))
+      /* pseudo SOS marker only allowed in progressive mode */
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  cinfo->comps_in_scan = n;
+
+  /* Collect the component-spec parameters */
+
+  for (i = 0; i < n; i++) {
+    INPUT_BYTE(cinfo, c, return FALSE);
+
+    /* Detect the case where component id's are not unique, and, if so, */
+    /* create a fake component id using the same logic as in get_sof.   */
+    /* Note:  This also ensures that all of the SOF components are      */
+    /* referenced in the single scan case, which prevents access to     */
+    /* uninitialized memory in later decoding stages. */
+    for (ci = 0; ci < i; ci++) {
+      if (c == cinfo->cur_comp_info[ci]->component_id) {
+	c = cinfo->cur_comp_info[0]->component_id;
+	for (ci = 1; ci < i; ci++) {
+	  compptr = cinfo->cur_comp_info[ci];
+	  if (compptr->component_id > c) c = compptr->component_id;
+	}
+	c++;
+	break;
+      }
+    }
+
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      if (c == compptr->component_id)
+	goto id_found;
+    }
+
+    ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, c);
+
+  id_found:
+
+    cinfo->cur_comp_info[i] = compptr;
+    INPUT_BYTE(cinfo, c, return FALSE);
+    compptr->dc_tbl_no = (c >> 4) & 15;
+    compptr->ac_tbl_no = (c     ) & 15;
+
+    TRACEMS3(cinfo, 1, JTRC_SOS_COMPONENT, compptr->component_id,
+	     compptr->dc_tbl_no, compptr->ac_tbl_no);
+  }
+
+  /* Collect the additional scan parameters Ss, Se, Ah/Al. */
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Ss = c;
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Se = c;
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Ah = (c >> 4) & 15;
+  cinfo->Al = (c     ) & 15;
+
+  TRACEMS4(cinfo, 1, JTRC_SOS_PARAMS, cinfo->Ss, cinfo->Se,
+	   cinfo->Ah, cinfo->Al);
+
+  /* Prepare to scan data & restart markers */
+  cinfo->marker->next_restart_num = 0;
+
+  /* Count another (non-pseudo) SOS marker */
+  if (n) cinfo->input_scan_number++;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+#ifdef D_ARITH_CODING_SUPPORTED
+
+LOCAL(boolean)
+get_dac (j_decompress_ptr cinfo)
+/* Process a DAC marker */
+{
+  INT32 length;
+  int index, val;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  while (length > 0) {
+    INPUT_BYTE(cinfo, index, return FALSE);
+    INPUT_BYTE(cinfo, val, return FALSE);
+
+    length -= 2;
+
+    TRACEMS2(cinfo, 1, JTRC_DAC, index, val);
+
+    if (index < 0 || index >= (2*NUM_ARITH_TBLS))
+      ERREXIT1(cinfo, JERR_DAC_INDEX, index);
+
+    if (index >= NUM_ARITH_TBLS) { /* define AC table */
+      cinfo->arith_ac_K[index-NUM_ARITH_TBLS] = (UINT8) val;
+    } else {			/* define DC table */
+      cinfo->arith_dc_L[index] = (UINT8) (val & 0x0F);
+      cinfo->arith_dc_U[index] = (UINT8) (val >> 4);
+      if (cinfo->arith_dc_L[index] > cinfo->arith_dc_U[index])
+	ERREXIT1(cinfo, JERR_DAC_VALUE, val);
+    }
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+#else /* ! D_ARITH_CODING_SUPPORTED */
+
+#define get_dac(cinfo)  skip_variable(cinfo)
+
+#endif /* D_ARITH_CODING_SUPPORTED */
+
+
+LOCAL(boolean)
+get_dht (j_decompress_ptr cinfo)
+/* Process a DHT marker */
+{
+  INT32 length;
+  UINT8 bits[17];
+  UINT8 huffval[256];
+  int i, index, count;
+  JHUFF_TBL **htblptr;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  while (length > 16) {
+    INPUT_BYTE(cinfo, index, return FALSE);
+
+    TRACEMS1(cinfo, 1, JTRC_DHT, index);
+      
+    bits[0] = 0;
+    count = 0;
+    for (i = 1; i <= 16; i++) {
+      INPUT_BYTE(cinfo, bits[i], return FALSE);
+      count += bits[i];
+    }
+
+    length -= 1 + 16;
+
+    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
+	     bits[1], bits[2], bits[3], bits[4],
+	     bits[5], bits[6], bits[7], bits[8]);
+    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
+	     bits[9], bits[10], bits[11], bits[12],
+	     bits[13], bits[14], bits[15], bits[16]);
+
+    /* Here we just do minimal validation of the counts to avoid walking
+     * off the end of our table space.  jdhuff.c will check more carefully.
+     */
+    if (count > 256 || ((INT32) count) > length)
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+
+    for (i = 0; i < count; i++)
+      INPUT_BYTE(cinfo, huffval[i], return FALSE);
+
+    length -= count;
+
+    if (index & 0x10) {		/* AC table definition */
+      index -= 0x10;
+      htblptr = &cinfo->ac_huff_tbl_ptrs[index];
+    } else {			/* DC table definition */
+      htblptr = &cinfo->dc_huff_tbl_ptrs[index];
+    }
+
+    if (index < 0 || index >= NUM_HUFF_TBLS)
+      ERREXIT1(cinfo, JERR_DHT_INDEX, index);
+
+    if (*htblptr == NULL)
+      *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+  
+    MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
+    if (count > 0)
+      MEMCOPY((*htblptr)->huffval, huffval, count * SIZEOF(UINT8));
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_dqt (j_decompress_ptr cinfo)
+/* Process a DQT marker */
+{
+  INT32 length, count, i;
+  int n, prec;
+  unsigned int tmp;
+  JQUANT_TBL *quant_ptr;
+  const int *natural_order;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+
+  while (length > 0) {
+    length--;
+    INPUT_BYTE(cinfo, n, return FALSE);
+    prec = n >> 4;
+    n &= 0x0F;
+
+    TRACEMS2(cinfo, 1, JTRC_DQT, n, prec);
+
+    if (n >= NUM_QUANT_TBLS)
+      ERREXIT1(cinfo, JERR_DQT_INDEX, n);
+      
+    if (cinfo->quant_tbl_ptrs[n] == NULL)
+      cinfo->quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) cinfo);
+    quant_ptr = cinfo->quant_tbl_ptrs[n];
+
+    if (prec) {
+      if (length < DCTSIZE2 * 2) {
+	/* Initialize full table for safety. */
+	for (i = 0; i < DCTSIZE2; i++) {
+	  quant_ptr->quantval[i] = 1;
+	}
+	count = length >> 1;
+      } else
+	count = DCTSIZE2;
+    } else {
+      if (length < DCTSIZE2) {
+	/* Initialize full table for safety. */
+	for (i = 0; i < DCTSIZE2; i++) {
+	  quant_ptr->quantval[i] = 1;
+	}
+	count = length;
+      } else
+	count = DCTSIZE2;
+    }
+
+    switch ((int) count) {
+    case (2*2): natural_order = jpeg_natural_order2; break;
+    case (3*3): natural_order = jpeg_natural_order3; break;
+    case (4*4): natural_order = jpeg_natural_order4; break;
+    case (5*5): natural_order = jpeg_natural_order5; break;
+    case (6*6): natural_order = jpeg_natural_order6; break;
+    case (7*7): natural_order = jpeg_natural_order7; break;
+    default:    natural_order = jpeg_natural_order;
+    }
+
+    for (i = 0; i < count; i++) {
+      if (prec)
+	INPUT_2BYTES(cinfo, tmp, return FALSE);
+      else
+	INPUT_BYTE(cinfo, tmp, return FALSE);
+      /* We convert the zigzag-order table to natural array order. */
+      quant_ptr->quantval[natural_order[i]] = (UINT16) tmp;
+    }
+
+    if (cinfo->err->trace_level >= 2) {
+      for (i = 0; i < DCTSIZE2; i += 8) {
+	TRACEMS8(cinfo, 2, JTRC_QUANTVALS,
+		 quant_ptr->quantval[i],   quant_ptr->quantval[i+1],
+		 quant_ptr->quantval[i+2], quant_ptr->quantval[i+3],
+		 quant_ptr->quantval[i+4], quant_ptr->quantval[i+5],
+		 quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]);
+      }
+    }
+
+    length -= count;
+    if (prec) length -= count;
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_dri (j_decompress_ptr cinfo)
+/* Process a DRI marker */
+{
+  INT32 length;
+  unsigned int tmp;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  
+  if (length != 4)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+
+  TRACEMS1(cinfo, 1, JTRC_DRI, tmp);
+
+  cinfo->restart_interval = tmp;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_lse (j_decompress_ptr cinfo)
+/* Process an LSE marker */
+{
+  INT32 length;
+  unsigned int tmp;
+  int cid;
+  INPUT_VARS(cinfo);
+
+  if (! cinfo->marker->saw_SOF)
+    ERREXITS(cinfo, JERR_SOF_BEFORE, "LSE");
+
+  if (cinfo->num_components < 3) goto bad;
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  if (length != 24)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_BYTE(cinfo, tmp, return FALSE);
+  if (tmp != 0x0D)	/* ID inverse transform specification */
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != MAXJSAMPLE) goto bad;		/* MAXTRANS */
+  INPUT_BYTE(cinfo, tmp, return FALSE);
+  if (tmp != 3) goto bad;			/* Nt=3 */
+  INPUT_BYTE(cinfo, cid, return FALSE);
+  if (cid != cinfo->comp_info[1].component_id) goto bad;
+  INPUT_BYTE(cinfo, cid, return FALSE);
+  if (cid != cinfo->comp_info[0].component_id) goto bad;
+  INPUT_BYTE(cinfo, cid, return FALSE);
+  if (cid != cinfo->comp_info[2].component_id) goto bad;
+  INPUT_BYTE(cinfo, tmp, return FALSE);
+  if (tmp != 0x80) goto bad;		/* F1: CENTER1=1, NORM1=0 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 0) goto bad;			/* A(1,1)=0 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 0) goto bad;			/* A(1,2)=0 */
+  INPUT_BYTE(cinfo, tmp, return FALSE);
+  if (tmp != 0) goto bad;		/* F2: CENTER2=0, NORM2=0 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 1) goto bad;			/* A(2,1)=1 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 0) goto bad;			/* A(2,2)=0 */
+  INPUT_BYTE(cinfo, tmp, return FALSE);
+  if (tmp != 0) goto bad;		/* F3: CENTER3=0, NORM3=0 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 1) goto bad;			/* A(3,1)=1 */
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+  if (tmp != 0) {				/* A(3,2)=0 */
+    bad:
+    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+  }
+
+  /* OK, valid transform that we can handle. */
+  cinfo->color_transform = JCT_SUBTRACT_GREEN;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Routines for processing APPn and COM markers.
+ * These are either saved in memory or discarded, per application request.
+ * APP0 and APP14 are specially checked to see if they are
+ * JFIF and Adobe markers, respectively.
+ */
+
+#define APP0_DATA_LEN	14	/* Length of interesting data in APP0 */
+#define APP14_DATA_LEN	12	/* Length of interesting data in APP14 */
+#define APPN_DATA_LEN	14	/* Must be the largest of the above!! */
+
+
+LOCAL(void)
+examine_app0 (j_decompress_ptr cinfo, JOCTET FAR * data,
+	      unsigned int datalen, INT32 remaining)
+/* Examine first few bytes from an APP0.
+ * Take appropriate action if it is a JFIF marker.
+ * datalen is # of bytes at data[], remaining is length of rest of marker data.
+ */
+{
+  INT32 totallen = (INT32) datalen + remaining;
+
+  if (datalen >= APP0_DATA_LEN &&
+      GETJOCTET(data[0]) == 0x4A &&
+      GETJOCTET(data[1]) == 0x46 &&
+      GETJOCTET(data[2]) == 0x49 &&
+      GETJOCTET(data[3]) == 0x46 &&
+      GETJOCTET(data[4]) == 0) {
+    /* Found JFIF APP0 marker: save info */
+    cinfo->saw_JFIF_marker = TRUE;
+    cinfo->JFIF_major_version = GETJOCTET(data[5]);
+    cinfo->JFIF_minor_version = GETJOCTET(data[6]);
+    cinfo->density_unit = GETJOCTET(data[7]);
+    cinfo->X_density = (GETJOCTET(data[8]) << 8) + GETJOCTET(data[9]);
+    cinfo->Y_density = (GETJOCTET(data[10]) << 8) + GETJOCTET(data[11]);
+    /* Check version.
+     * Major version must be 1 or 2, anything else signals an incompatible
+     * change.
+     * (We used to treat this as an error, but now it's a nonfatal warning,
+     * because some bozo at Hijaak couldn't read the spec.)
+     * Minor version should be 0..2, but process anyway if newer.
+     */
+    if (cinfo->JFIF_major_version != 1 && cinfo->JFIF_major_version != 2)
+      WARNMS2(cinfo, JWRN_JFIF_MAJOR,
+	      cinfo->JFIF_major_version, cinfo->JFIF_minor_version);
+    /* Generate trace messages */
+    TRACEMS5(cinfo, 1, JTRC_JFIF,
+	     cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
+	     cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
+    /* Validate thumbnail dimensions and issue appropriate messages */
+    if (GETJOCTET(data[12]) | GETJOCTET(data[13]))
+      TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL,
+	       GETJOCTET(data[12]), GETJOCTET(data[13]));
+    totallen -= APP0_DATA_LEN;
+    if (totallen !=
+	((INT32)GETJOCTET(data[12]) * (INT32)GETJOCTET(data[13]) * (INT32) 3))
+      TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int) totallen);
+  } else if (datalen >= 6 &&
+      GETJOCTET(data[0]) == 0x4A &&
+      GETJOCTET(data[1]) == 0x46 &&
+      GETJOCTET(data[2]) == 0x58 &&
+      GETJOCTET(data[3]) == 0x58 &&
+      GETJOCTET(data[4]) == 0) {
+    /* Found JFIF "JFXX" extension APP0 marker */
+    /* The library doesn't actually do anything with these,
+     * but we try to produce a helpful trace message.
+     */
+    switch (GETJOCTET(data[5])) {
+    case 0x10:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int) totallen);
+      break;
+    case 0x11:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_PALETTE, (int) totallen);
+      break;
+    case 0x13:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int) totallen);
+      break;
+    default:
+      TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION,
+	       GETJOCTET(data[5]), (int) totallen);
+    }
+  } else {
+    /* Start of APP0 does not match "JFIF" or "JFXX", or too short */
+    TRACEMS1(cinfo, 1, JTRC_APP0, (int) totallen);
+  }
+}
+
+
+LOCAL(void)
+examine_app14 (j_decompress_ptr cinfo, JOCTET FAR * data,
+	       unsigned int datalen, INT32 remaining)
+/* Examine first few bytes from an APP14.
+ * Take appropriate action if it is an Adobe marker.
+ * datalen is # of bytes at data[], remaining is length of rest of marker data.
+ */
+{
+  unsigned int version, flags0, flags1, transform;
+
+  if (datalen >= APP14_DATA_LEN &&
+      GETJOCTET(data[0]) == 0x41 &&
+      GETJOCTET(data[1]) == 0x64 &&
+      GETJOCTET(data[2]) == 0x6F &&
+      GETJOCTET(data[3]) == 0x62 &&
+      GETJOCTET(data[4]) == 0x65) {
+    /* Found Adobe APP14 marker */
+    version = (GETJOCTET(data[5]) << 8) + GETJOCTET(data[6]);
+    flags0 = (GETJOCTET(data[7]) << 8) + GETJOCTET(data[8]);
+    flags1 = (GETJOCTET(data[9]) << 8) + GETJOCTET(data[10]);
+    transform = GETJOCTET(data[11]);
+    TRACEMS4(cinfo, 1, JTRC_ADOBE, version, flags0, flags1, transform);
+    cinfo->saw_Adobe_marker = TRUE;
+    cinfo->Adobe_transform = (UINT8) transform;
+  } else {
+    /* Start of APP14 does not match "Adobe", or too short */
+    TRACEMS1(cinfo, 1, JTRC_APP14, (int) (datalen + remaining));
+  }
+}
+
+
+METHODDEF(boolean)
+get_interesting_appn (j_decompress_ptr cinfo)
+/* Process an APP0 or APP14 marker without saving it */
+{
+  INT32 length;
+  JOCTET b[APPN_DATA_LEN];
+  unsigned int i, numtoread;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+
+  /* get the interesting part of the marker data */
+  if (length >= APPN_DATA_LEN)
+    numtoread = APPN_DATA_LEN;
+  else if (length > 0)
+    numtoread = (unsigned int) length;
+  else
+    numtoread = 0;
+  for (i = 0; i < numtoread; i++)
+    INPUT_BYTE(cinfo, b[i], return FALSE);
+  length -= numtoread;
+
+  /* process it */
+  switch (cinfo->unread_marker) {
+  case M_APP0:
+    examine_app0(cinfo, (JOCTET FAR *) b, numtoread, length);
+    break;
+  case M_APP14:
+    examine_app14(cinfo, (JOCTET FAR *) b, numtoread, length);
+    break;
+  default:
+    /* can't get here unless jpeg_save_markers chooses wrong processor */
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
+  }
+
+  /* skip any remaining data -- could be lots */
+  INPUT_SYNC(cinfo);
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+
+#ifdef SAVE_MARKERS_SUPPORTED
+
+METHODDEF(boolean)
+save_marker (j_decompress_ptr cinfo)
+/* Save an APPn or COM marker into the marker list */
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+  jpeg_saved_marker_ptr cur_marker = marker->cur_marker;
+  unsigned int bytes_read, data_length;
+  JOCTET FAR * data;
+  INT32 length = 0;
+  INPUT_VARS(cinfo);
+
+  if (cur_marker == NULL) {
+    /* begin reading a marker */
+    INPUT_2BYTES(cinfo, length, return FALSE);
+    length -= 2;
+    if (length >= 0) {		/* watch out for bogus length word */
+      /* figure out how much we want to save */
+      unsigned int limit;
+      if (cinfo->unread_marker == (int) M_COM)
+	limit = marker->length_limit_COM;
+      else
+	limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0];
+      if ((unsigned int) length < limit)
+	limit = (unsigned int) length;
+      /* allocate and initialize the marker item */
+      cur_marker = (jpeg_saved_marker_ptr)
+	(*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				    SIZEOF(struct jpeg_marker_struct) + limit);
+      cur_marker->next = NULL;
+      cur_marker->marker = (UINT8) cinfo->unread_marker;
+      cur_marker->original_length = (unsigned int) length;
+      cur_marker->data_length = limit;
+      /* data area is just beyond the jpeg_marker_struct */
+      data = cur_marker->data = (JOCTET FAR *) (cur_marker + 1);
+      marker->cur_marker = cur_marker;
+      marker->bytes_read = 0;
+      bytes_read = 0;
+      data_length = limit;
+    } else {
+      /* deal with bogus length word */
+      bytes_read = data_length = 0;
+      data = NULL;
+    }
+  } else {
+    /* resume reading a marker */
+    bytes_read = marker->bytes_read;
+    data_length = cur_marker->data_length;
+    data = cur_marker->data + bytes_read;
+  }
+
+  while (bytes_read < data_length) {
+    INPUT_SYNC(cinfo);		/* move the restart point to here */
+    marker->bytes_read = bytes_read;
+    /* If there's not at least one byte in buffer, suspend */
+    MAKE_BYTE_AVAIL(cinfo, return FALSE);
+    /* Copy bytes with reasonable rapidity */
+    while (bytes_read < data_length && bytes_in_buffer > 0) {
+      *data++ = *next_input_byte++;
+      bytes_in_buffer--;
+      bytes_read++;
+    }
+  }
+
+  /* Done reading what we want to read */
+  if (cur_marker != NULL) {	/* will be NULL if bogus length word */
+    /* Add new marker to end of list */
+    if (cinfo->marker_list == NULL) {
+      cinfo->marker_list = cur_marker;
+    } else {
+      jpeg_saved_marker_ptr prev = cinfo->marker_list;
+      while (prev->next != NULL)
+	prev = prev->next;
+      prev->next = cur_marker;
+    }
+    /* Reset pointer & calc remaining data length */
+    data = cur_marker->data;
+    length = cur_marker->original_length - data_length;
+  }
+  /* Reset to initial state for next marker */
+  marker->cur_marker = NULL;
+
+  /* Process the marker if interesting; else just make a generic trace msg */
+  switch (cinfo->unread_marker) {
+  case M_APP0:
+    examine_app0(cinfo, data, data_length, length);
+    break;
+  case M_APP14:
+    examine_app14(cinfo, data, data_length, length);
+    break;
+  default:
+    TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker,
+	     (int) (data_length + length));
+  }
+
+  /* skip any remaining data -- could be lots */
+  INPUT_SYNC(cinfo);		/* do before skip_input_data */
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+#endif /* SAVE_MARKERS_SUPPORTED */
+
+
+METHODDEF(boolean)
+skip_variable (j_decompress_ptr cinfo)
+/* Skip over an unknown or uninteresting variable-length marker */
+{
+  INT32 length;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, (int) length);
+
+  INPUT_SYNC(cinfo);		/* do before skip_input_data */
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+
+/*
+ * Find the next JPEG marker, save it in cinfo->unread_marker.
+ * Returns FALSE if had to suspend before reaching a marker;
+ * in that case cinfo->unread_marker is unchanged.
+ *
+ * Note that the result might not be a valid marker code,
+ * but it will never be 0 or FF.
+ */
+
+LOCAL(boolean)
+next_marker (j_decompress_ptr cinfo)
+{
+  int c;
+  INPUT_VARS(cinfo);
+
+  for (;;) {
+    INPUT_BYTE(cinfo, c, return FALSE);
+    /* Skip any non-FF bytes.
+     * This may look a bit inefficient, but it will not occur in a valid file.
+     * We sync after each discarded byte so that a suspending data source
+     * can discard the byte from its buffer.
+     */
+    while (c != 0xFF) {
+      cinfo->marker->discarded_bytes++;
+      INPUT_SYNC(cinfo);
+      INPUT_BYTE(cinfo, c, return FALSE);
+    }
+    /* This loop swallows any duplicate FF bytes.  Extra FFs are legal as
+     * pad bytes, so don't count them in discarded_bytes.  We assume there
+     * will not be so many consecutive FF bytes as to overflow a suspending
+     * data source's input buffer.
+     */
+    do {
+      INPUT_BYTE(cinfo, c, return FALSE);
+    } while (c == 0xFF);
+    if (c != 0)
+      break;			/* found a valid marker, exit loop */
+    /* Reach here if we found a stuffed-zero data sequence (FF/00).
+     * Discard it and loop back to try again.
+     */
+    cinfo->marker->discarded_bytes += 2;
+    INPUT_SYNC(cinfo);
+  }
+
+  if (cinfo->marker->discarded_bytes != 0) {
+    WARNMS2(cinfo, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c);
+    cinfo->marker->discarded_bytes = 0;
+  }
+
+  cinfo->unread_marker = c;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+first_marker (j_decompress_ptr cinfo)
+/* Like next_marker, but used to obtain the initial SOI marker. */
+/* For this marker, we do not allow preceding garbage or fill; otherwise,
+ * we might well scan an entire input file before realizing it ain't JPEG.
+ * If an application wants to process non-JFIF files, it must seek to the
+ * SOI before calling the JPEG library.
+ */
+{
+  int c, c2;
+  INPUT_VARS(cinfo);
+
+  INPUT_BYTE(cinfo, c, return FALSE);
+  INPUT_BYTE(cinfo, c2, return FALSE);
+  if (c != 0xFF || c2 != (int) M_SOI)
+    ERREXIT2(cinfo, JERR_NO_SOI, c, c2);
+
+  cinfo->unread_marker = c2;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Read markers until SOS or EOI.
+ *
+ * Returns same codes as are defined for jpeg_consume_input:
+ * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+ *
+ * Note: This function may return a pseudo SOS marker (with zero
+ * component number) for treat by input controller's consume_input.
+ * consume_input itself should filter out (skip) the pseudo marker
+ * after processing for the caller.
+ */
+
+METHODDEF(int)
+read_markers (j_decompress_ptr cinfo)
+{
+  /* Outer loop repeats once for each marker. */
+  for (;;) {
+    /* Collect the marker proper, unless we already did. */
+    /* NB: first_marker() enforces the requirement that SOI appear first. */
+    if (cinfo->unread_marker == 0) {
+      if (! cinfo->marker->saw_SOI) {
+	if (! first_marker(cinfo))
+	  return JPEG_SUSPENDED;
+      } else {
+	if (! next_marker(cinfo))
+	  return JPEG_SUSPENDED;
+      }
+    }
+    /* At this point cinfo->unread_marker contains the marker code and the
+     * input point is just past the marker proper, but before any parameters.
+     * A suspension will cause us to return with this state still true.
+     */
+    switch (cinfo->unread_marker) {
+    case M_SOI:
+      if (! get_soi(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF0:		/* Baseline */
+      if (! get_sof(cinfo, TRUE, FALSE, FALSE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF1:		/* Extended sequential, Huffman */
+      if (! get_sof(cinfo, FALSE, FALSE, FALSE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF2:		/* Progressive, Huffman */
+      if (! get_sof(cinfo, FALSE, TRUE, FALSE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF9:		/* Extended sequential, arithmetic */
+      if (! get_sof(cinfo, FALSE, FALSE, TRUE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF10:		/* Progressive, arithmetic */
+      if (! get_sof(cinfo, FALSE, TRUE, TRUE))
+	return JPEG_SUSPENDED;
+      break;
+
+    /* Currently unsupported SOFn types */
+    case M_SOF3:		/* Lossless, Huffman */
+    case M_SOF5:		/* Differential sequential, Huffman */
+    case M_SOF6:		/* Differential progressive, Huffman */
+    case M_SOF7:		/* Differential lossless, Huffman */
+    case M_JPG:			/* Reserved for JPEG extensions */
+    case M_SOF11:		/* Lossless, arithmetic */
+    case M_SOF13:		/* Differential sequential, arithmetic */
+    case M_SOF14:		/* Differential progressive, arithmetic */
+    case M_SOF15:		/* Differential lossless, arithmetic */
+      ERREXIT1(cinfo, JERR_SOF_UNSUPPORTED, cinfo->unread_marker);
+      break;
+
+    case M_SOS:
+      if (! get_sos(cinfo))
+	return JPEG_SUSPENDED;
+      cinfo->unread_marker = 0;	/* processed the marker */
+      return JPEG_REACHED_SOS;
+
+    case M_EOI:
+      TRACEMS(cinfo, 1, JTRC_EOI);
+      cinfo->unread_marker = 0;	/* processed the marker */
+      return JPEG_REACHED_EOI;
+
+    case M_DAC:
+      if (! get_dac(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_DHT:
+      if (! get_dht(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_DQT:
+      if (! get_dqt(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_DRI:
+      if (! get_dri(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_JPG8:
+      if (! get_lse(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_APP0:
+    case M_APP1:
+    case M_APP2:
+    case M_APP3:
+    case M_APP4:
+    case M_APP5:
+    case M_APP6:
+    case M_APP7:
+    case M_APP8:
+    case M_APP9:
+    case M_APP10:
+    case M_APP11:
+    case M_APP12:
+    case M_APP13:
+    case M_APP14:
+    case M_APP15:
+      if (! (*((my_marker_ptr) cinfo->marker)->process_APPn[
+		cinfo->unread_marker - (int) M_APP0]) (cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_COM:
+      if (! (*((my_marker_ptr) cinfo->marker)->process_COM) (cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_RST0:		/* these are all parameterless */
+    case M_RST1:
+    case M_RST2:
+    case M_RST3:
+    case M_RST4:
+    case M_RST5:
+    case M_RST6:
+    case M_RST7:
+    case M_TEM:
+      TRACEMS1(cinfo, 1, JTRC_PARMLESS_MARKER, cinfo->unread_marker);
+      break;
+
+    case M_DNL:			/* Ignore DNL ... perhaps the wrong thing */
+      if (! skip_variable(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    default:			/* must be DHP, EXP, JPGn, or RESn */
+      /* For now, we treat the reserved markers as fatal errors since they are
+       * likely to be used to signal incompatible JPEG Part 3 extensions.
+       * Once the JPEG 3 version-number marker is well defined, this code
+       * ought to change!
+       */
+      ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
+    }
+    /* Successfully processed marker, so reset state variable */
+    cinfo->unread_marker = 0;
+  } /* end loop */
+}
+
+
+/*
+ * Read a restart marker, which is expected to appear next in the datastream;
+ * if the marker is not there, take appropriate recovery action.
+ * Returns FALSE if suspension is required.
+ *
+ * This is called by the entropy decoder after it has read an appropriate
+ * number of MCUs.  cinfo->unread_marker may be nonzero if the entropy decoder
+ * has already read a marker from the data source.  Under normal conditions
+ * cinfo->unread_marker will be reset to 0 before returning; if not reset,
+ * it holds a marker which the decoder will be unable to read past.
+ */
+
+METHODDEF(boolean)
+read_restart_marker (j_decompress_ptr cinfo)
+{
+  /* Obtain a marker unless we already did. */
+  /* Note that next_marker will complain if it skips any data. */
+  if (cinfo->unread_marker == 0) {
+    if (! next_marker(cinfo))
+      return FALSE;
+  }
+
+  if (cinfo->unread_marker ==
+      ((int) M_RST0 + cinfo->marker->next_restart_num)) {
+    /* Normal case --- swallow the marker and let entropy decoder continue */
+    TRACEMS1(cinfo, 3, JTRC_RST, cinfo->marker->next_restart_num);
+    cinfo->unread_marker = 0;
+  } else {
+    /* Uh-oh, the restart markers have been messed up. */
+    /* Let the data source manager determine how to resync. */
+    if (! (*cinfo->src->resync_to_restart) (cinfo,
+					    cinfo->marker->next_restart_num))
+      return FALSE;
+  }
+
+  /* Update next-restart state */
+  cinfo->marker->next_restart_num = (cinfo->marker->next_restart_num + 1) & 7;
+
+  return TRUE;
+}
+
+
+/*
+ * This is the default resync_to_restart method for data source managers
+ * to use if they don't have any better approach.  Some data source managers
+ * may be able to back up, or may have additional knowledge about the data
+ * which permits a more intelligent recovery strategy; such managers would
+ * presumably supply their own resync method.
+ *
+ * read_restart_marker calls resync_to_restart if it finds a marker other than
+ * the restart marker it was expecting.  (This code is *not* used unless
+ * a nonzero restart interval has been declared.)  cinfo->unread_marker is
+ * the marker code actually found (might be anything, except 0 or FF).
+ * The desired restart marker number (0..7) is passed as a parameter.
+ * This routine is supposed to apply whatever error recovery strategy seems
+ * appropriate in order to position the input stream to the next data segment.
+ * Note that cinfo->unread_marker is treated as a marker appearing before
+ * the current data-source input point; usually it should be reset to zero
+ * before returning.
+ * Returns FALSE if suspension is required.
+ *
+ * This implementation is substantially constrained by wanting to treat the
+ * input as a data stream; this means we can't back up.  Therefore, we have
+ * only the following actions to work with:
+ *   1. Simply discard the marker and let the entropy decoder resume at next
+ *      byte of file.
+ *   2. Read forward until we find another marker, discarding intervening
+ *      data.  (In theory we could look ahead within the current bufferload,
+ *      without having to discard data if we don't find the desired marker.
+ *      This idea is not implemented here, in part because it makes behavior
+ *      dependent on buffer size and chance buffer-boundary positions.)
+ *   3. Leave the marker unread (by failing to zero cinfo->unread_marker).
+ *      This will cause the entropy decoder to process an empty data segment,
+ *      inserting dummy zeroes, and then we will reprocess the marker.
+ *
+ * #2 is appropriate if we think the desired marker lies ahead, while #3 is
+ * appropriate if the found marker is a future restart marker (indicating
+ * that we have missed the desired restart marker, probably because it got
+ * corrupted).
+ * We apply #2 or #3 if the found marker is a restart marker no more than
+ * two counts behind or ahead of the expected one.  We also apply #2 if the
+ * found marker is not a legal JPEG marker code (it's certainly bogus data).
+ * If the found marker is a restart marker more than 2 counts away, we do #1
+ * (too much risk that the marker is erroneous; with luck we will be able to
+ * resync at some future point).
+ * For any valid non-restart JPEG marker, we apply #3.  This keeps us from
+ * overrunning the end of a scan.  An implementation limited to single-scan
+ * files might find it better to apply #2 for markers other than EOI, since
+ * any other marker would have to be bogus data in that case.
+ */
+
+GLOBAL(boolean)
+jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired)
+{
+  int marker = cinfo->unread_marker;
+  int action = 1;
+  
+  /* Always put up a warning. */
+  WARNMS2(cinfo, JWRN_MUST_RESYNC, marker, desired);
+  
+  /* Outer loop handles repeated decision after scanning forward. */
+  for (;;) {
+    if (marker < (int) M_SOF0)
+      action = 2;		/* invalid marker */
+    else if (marker < (int) M_RST0 || marker > (int) M_RST7)
+      action = 3;		/* valid non-restart marker */
+    else {
+      if (marker == ((int) M_RST0 + ((desired+1) & 7)) ||
+	  marker == ((int) M_RST0 + ((desired+2) & 7)))
+	action = 3;		/* one of the next two expected restarts */
+      else if (marker == ((int) M_RST0 + ((desired-1) & 7)) ||
+	       marker == ((int) M_RST0 + ((desired-2) & 7)))
+	action = 2;		/* a prior restart, so advance */
+      else
+	action = 1;		/* desired restart or too far away */
+    }
+    TRACEMS2(cinfo, 4, JTRC_RECOVERY_ACTION, marker, action);
+    switch (action) {
+    case 1:
+      /* Discard marker and let entropy decoder resume processing. */
+      cinfo->unread_marker = 0;
+      return TRUE;
+    case 2:
+      /* Scan to the next marker, and repeat the decision loop. */
+      if (! next_marker(cinfo))
+	return FALSE;
+      marker = cinfo->unread_marker;
+      break;
+    case 3:
+      /* Return without advancing past this marker. */
+      /* Entropy decoder will be forced to process an empty segment. */
+      return TRUE;
+    }
+  } /* end loop */
+}
+
+
+/*
+ * Reset marker processing state to begin a fresh datastream.
+ */
+
+METHODDEF(void)
+reset_marker_reader (j_decompress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+
+  cinfo->comp_info = NULL;		/* until allocated by get_sof */
+  cinfo->input_scan_number = 0;		/* no SOS seen yet */
+  cinfo->unread_marker = 0;		/* no pending marker */
+  marker->pub.saw_SOI = FALSE;		/* set internal state too */
+  marker->pub.saw_SOF = FALSE;
+  marker->pub.discarded_bytes = 0;
+  marker->cur_marker = NULL;
+}
+
+
+/*
+ * Initialize the marker reader module.
+ * This is called only once, when the decompression object is created.
+ */
+
+GLOBAL(void)
+jinit_marker_reader (j_decompress_ptr cinfo)
+{
+  my_marker_ptr marker;
+  int i;
+
+  /* Create subobject in permanent pool */
+  marker = (my_marker_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(my_marker_reader));
+  cinfo->marker = &marker->pub;
+  /* Initialize public method pointers */
+  marker->pub.reset_marker_reader = reset_marker_reader;
+  marker->pub.read_markers = read_markers;
+  marker->pub.read_restart_marker = read_restart_marker;
+  /* Initialize COM/APPn processing.
+   * By default, we examine and then discard APP0 and APP14,
+   * but simply discard COM and all other APPn.
+   */
+  marker->process_COM = skip_variable;
+  marker->length_limit_COM = 0;
+  for (i = 0; i < 16; i++) {
+    marker->process_APPn[i] = skip_variable;
+    marker->length_limit_APPn[i] = 0;
+  }
+  marker->process_APPn[0] = get_interesting_appn;
+  marker->process_APPn[14] = get_interesting_appn;
+  /* Reset marker processing state */
+  reset_marker_reader(cinfo);
+}
+
+
+/*
+ * Control saving of COM and APPn markers into marker_list.
+ */
+
+#ifdef SAVE_MARKERS_SUPPORTED
+
+GLOBAL(void)
+jpeg_save_markers (j_decompress_ptr cinfo, int marker_code,
+		   unsigned int length_limit)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+  long maxlength;
+  jpeg_marker_parser_method processor;
+
+  /* Length limit mustn't be larger than what we can allocate
+   * (should only be a concern in a 16-bit environment).
+   */
+  maxlength = cinfo->mem->max_alloc_chunk - SIZEOF(struct jpeg_marker_struct);
+  if (((long) length_limit) > maxlength)
+    length_limit = (unsigned int) maxlength;
+
+  /* Choose processor routine to use.
+   * APP0/APP14 have special requirements.
+   */
+  if (length_limit) {
+    processor = save_marker;
+    /* If saving APP0/APP14, save at least enough for our internal use. */
+    if (marker_code == (int) M_APP0 && length_limit < APP0_DATA_LEN)
+      length_limit = APP0_DATA_LEN;
+    else if (marker_code == (int) M_APP14 && length_limit < APP14_DATA_LEN)
+      length_limit = APP14_DATA_LEN;
+  } else {
+    processor = skip_variable;
+    /* If discarding APP0/APP14, use our regular on-the-fly processor. */
+    if (marker_code == (int) M_APP0 || marker_code == (int) M_APP14)
+      processor = get_interesting_appn;
+  }
+
+  if (marker_code == (int) M_COM) {
+    marker->process_COM = processor;
+    marker->length_limit_COM = length_limit;
+  } else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15) {
+    marker->process_APPn[marker_code - (int) M_APP0] = processor;
+    marker->length_limit_APPn[marker_code - (int) M_APP0] = length_limit;
+  } else
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
+}
+
+#endif /* SAVE_MARKERS_SUPPORTED */
+
+
+/*
+ * Install a special processing method for COM or APPn markers.
+ */
+
+GLOBAL(void)
+jpeg_set_marker_processor (j_decompress_ptr cinfo, int marker_code,
+			   jpeg_marker_parser_method routine)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+
+  if (marker_code == (int) M_COM)
+    marker->process_COM = routine;
+  else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15)
+    marker->process_APPn[marker_code - (int) M_APP0] = routine;
+  else
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
+}
diff --git a/cmake/externals/libjpeg/jdmaster.c b/cmake/externals/libjpeg/jdmaster.c
new file mode 100644
index 000000000..3070b7bb4
--- /dev/null
+++ b/cmake/externals/libjpeg/jdmaster.c
@@ -0,0 +1,532 @@
+/*
+ * jdmaster.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2002-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains master control logic for the JPEG decompressor.
+ * These routines are concerned with selecting the modules to be executed
+ * and with determining the number of passes and the work to be done in each
+ * pass.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_decomp_master pub; /* public fields */
+
+  int pass_number;		/* # of passes completed */
+
+  boolean using_merged_upsample; /* TRUE if using merged upsample/cconvert */
+
+  /* Saved references to initialized quantizer modules,
+   * in case we need to switch modes.
+   */
+  struct jpeg_color_quantizer * quantizer_1pass;
+  struct jpeg_color_quantizer * quantizer_2pass;
+} my_decomp_master;
+
+typedef my_decomp_master * my_master_ptr;
+
+
+/*
+ * Determine whether merged upsample/color conversion should be used.
+ * CRUCIAL: this must match the actual capabilities of jdmerge.c!
+ */
+
+LOCAL(boolean)
+use_merged_upsample (j_decompress_ptr cinfo)
+{
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+  /* Merging is the equivalent of plain box-filter upsampling. */
+  /* The following condition is only needed if fancy shall select
+   * a different upsampling method.  In our current implementation
+   * fancy only affects the DCT scaling, thus we can use fancy
+   * upsampling and merged upsample simultaneously, in particular
+   * with scaled DCT sizes larger than the default DCTSIZE.
+   */
+#if 0
+  if (cinfo->do_fancy_upsampling)
+    return FALSE;
+#endif
+  if (cinfo->CCIR601_sampling)
+    return FALSE;
+  /* jdmerge.c only supports YCC=>RGB color conversion */
+  if ((cinfo->jpeg_color_space != JCS_YCbCr &&
+       cinfo->jpeg_color_space != JCS_BG_YCC) ||
+      cinfo->num_components != 3 ||
+      cinfo->out_color_space != JCS_RGB ||
+      cinfo->out_color_components != RGB_PIXELSIZE ||
+      cinfo->color_transform)
+    return FALSE;
+  /* and it only handles 2h1v or 2h2v sampling ratios */
+  if (cinfo->comp_info[0].h_samp_factor != 2 ||
+      cinfo->comp_info[1].h_samp_factor != 1 ||
+      cinfo->comp_info[2].h_samp_factor != 1 ||
+      cinfo->comp_info[0].v_samp_factor >  2 ||
+      cinfo->comp_info[1].v_samp_factor != 1 ||
+      cinfo->comp_info[2].v_samp_factor != 1)
+    return FALSE;
+  /* furthermore, it doesn't work if we've scaled the IDCTs differently */
+  if (cinfo->comp_info[0].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
+      cinfo->comp_info[1].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
+      cinfo->comp_info[2].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
+      cinfo->comp_info[0].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size ||
+      cinfo->comp_info[1].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size ||
+      cinfo->comp_info[2].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size)
+    return FALSE;
+  /* ??? also need to test for upsample-time rescaling, when & if supported */
+  return TRUE;			/* by golly, it'll work... */
+#else
+  return FALSE;
+#endif
+}
+
+
+/*
+ * Compute output image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
+ * Also note that it may be called before the master module is initialized!
+ */
+
+GLOBAL(void)
+jpeg_calc_output_dimensions (j_decompress_ptr cinfo)
+/* Do computations that are needed before master selection phase.
+ * This function is used for full decompression.
+ */
+{
+  int ci, i;
+  jpeg_component_info *compptr;
+
+  /* Prevent application from calling me at wrong times */
+  if (cinfo->global_state != DSTATE_READY)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Compute core output image dimensions and DCT scaling choices. */
+  jpeg_core_output_dimensions(cinfo);
+
+#ifdef IDCT_SCALING_SUPPORTED
+
+  /* In selecting the actual DCT scaling for each component, we try to
+   * scale up the chroma components via IDCT scaling rather than upsampling.
+   * This saves time if the upsampler gets to use 1:1 scaling.
+   * Note this code adapts subsampling ratios which are powers of 2.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    int ssize = 1;
+    if (! cinfo->raw_data_out)
+      while (cinfo->min_DCT_h_scaled_size * ssize <=
+	     (cinfo->do_fancy_upsampling ? DCTSIZE : DCTSIZE / 2) &&
+	     (cinfo->max_h_samp_factor % (compptr->h_samp_factor * ssize * 2)) ==
+	     0) {
+	ssize = ssize * 2;
+      }
+    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size * ssize;
+    ssize = 1;
+    if (! cinfo->raw_data_out)
+      while (cinfo->min_DCT_v_scaled_size * ssize <=
+	     (cinfo->do_fancy_upsampling ? DCTSIZE : DCTSIZE / 2) &&
+	     (cinfo->max_v_samp_factor % (compptr->v_samp_factor * ssize * 2)) ==
+	     0) {
+	ssize = ssize * 2;
+      }
+    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size * ssize;
+
+    /* We don't support IDCT ratios larger than 2. */
+    if (compptr->DCT_h_scaled_size > compptr->DCT_v_scaled_size * 2)
+	compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size * 2;
+    else if (compptr->DCT_v_scaled_size > compptr->DCT_h_scaled_size * 2)
+	compptr->DCT_v_scaled_size = compptr->DCT_h_scaled_size * 2;
+
+    /* Recompute downsampled dimensions of components;
+     * application needs to know these if using raw downsampled data.
+     */
+    /* Size in samples, after IDCT scaling */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width *
+		    (long) (compptr->h_samp_factor * compptr->DCT_h_scaled_size),
+		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height *
+		    (long) (compptr->v_samp_factor * compptr->DCT_v_scaled_size),
+		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
+  }
+
+#endif /* IDCT_SCALING_SUPPORTED */
+
+  /* Report number of components in selected colorspace. */
+  /* This should correspond to the actual code in the color conversion module. */
+  switch (cinfo->out_color_space) {
+  case JCS_GRAYSCALE:
+    cinfo->out_color_components = 1;
+    break;
+  case JCS_RGB:
+  case JCS_BG_RGB:
+    cinfo->out_color_components = RGB_PIXELSIZE;
+    break;
+  default:	/* YCCK <=> CMYK conversion or same colorspace as in file */
+    i = 0;
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++)
+      if (compptr->component_needed)
+	i++;	/* count output color components */
+    cinfo->out_color_components = i;
+  }
+  cinfo->output_components = (cinfo->quantize_colors ? 1 :
+			      cinfo->out_color_components);
+
+  /* See if upsampler will want to emit more than one row at a time */
+  if (use_merged_upsample(cinfo))
+    cinfo->rec_outbuf_height = cinfo->max_v_samp_factor;
+  else
+    cinfo->rec_outbuf_height = 1;
+}
+
+
+/*
+ * Several decompression processes need to range-limit values to the range
+ * 0..MAXJSAMPLE; the input value may fall somewhat outside this range
+ * due to noise introduced by quantization, roundoff error, etc.  These
+ * processes are inner loops and need to be as fast as possible.  On most
+ * machines, particularly CPUs with pipelines or instruction prefetch,
+ * a (subscript-check-less) C table lookup
+ *		x = sample_range_limit[x];
+ * is faster than explicit tests
+ *		if (x < 0)  x = 0;
+ *		else if (x > MAXJSAMPLE)  x = MAXJSAMPLE;
+ * These processes all use a common table prepared by the routine below.
+ *
+ * For most steps we can mathematically guarantee that the initial value
+ * of x is within 2*(MAXJSAMPLE+1) of the legal range, so a table running
+ * from -2*(MAXJSAMPLE+1) to 3*MAXJSAMPLE+2 is sufficient.  But for the
+ * initial limiting step (just after the IDCT), a wildly out-of-range value
+ * is possible if the input data is corrupt.  To avoid any chance of indexing
+ * off the end of memory and getting a bad-pointer trap, we perform the
+ * post-IDCT limiting thus:
+ *		x = (sample_range_limit - SUBSET)[(x + CENTER) & MASK];
+ * where MASK is 2 bits wider than legal sample data, ie 10 bits for 8-bit
+ * samples.  Under normal circumstances this is more than enough range and
+ * a correct output will be generated; with bogus input data the mask will
+ * cause wraparound, and we will safely generate a bogus-but-in-range output.
+ * For the post-IDCT step, we want to convert the data from signed to unsigned
+ * representation by adding CENTERJSAMPLE at the same time that we limit it.
+ * This is accomplished with SUBSET = CENTER - CENTERJSAMPLE.
+ *
+ * Note that the table is allocated in near data space on PCs; it's small
+ * enough and used often enough to justify this.
+ */
+
+LOCAL(void)
+prepare_range_limit_table (j_decompress_ptr cinfo)
+/* Allocate and fill in the sample_range_limit table */
+{
+  JSAMPLE * table;
+  int i;
+
+  table = (JSAMPLE *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo,
+    JPOOL_IMAGE, (RANGE_CENTER * 2 + MAXJSAMPLE + 1) * SIZEOF(JSAMPLE));
+  /* First segment of range limit table: limit[x] = 0 for x < 0 */
+  MEMZERO(table, RANGE_CENTER * SIZEOF(JSAMPLE));
+  table += RANGE_CENTER;	/* allow negative subscripts of table */
+  cinfo->sample_range_limit = table;
+  /* Main part of range limit table: limit[x] = x */
+  for (i = 0; i <= MAXJSAMPLE; i++)
+    table[i] = (JSAMPLE) i;
+  /* End of range limit table: limit[x] = MAXJSAMPLE for x > MAXJSAMPLE */
+  for (; i <=  MAXJSAMPLE + RANGE_CENTER; i++)
+    table[i] = MAXJSAMPLE;
+}
+
+
+/*
+ * Master selection of decompression modules.
+ * This is done once at jpeg_start_decompress time.  We determine
+ * which modules will be used and give them appropriate initialization calls.
+ * We also initialize the decompressor input side to begin consuming data.
+ *
+ * Since jpeg_read_header has finished, we know what is in the SOF
+ * and (first) SOS markers.  We also have all the application parameter
+ * settings.
+ */
+
+LOCAL(void)
+master_selection (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+  boolean use_c_buffer;
+  long samplesperrow;
+  JDIMENSION jd_samplesperrow;
+
+  /* For now, precision must match compiled-in value... */
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Initialize dimensions and other stuff */
+  jpeg_calc_output_dimensions(cinfo);
+  prepare_range_limit_table(cinfo);
+
+  /* Sanity check on image dimensions */
+  if (cinfo->output_height <= 0 || cinfo->output_width <= 0 ||
+      cinfo->out_color_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  /* Width of an output scanline must be representable as JDIMENSION. */
+  samplesperrow = (long) cinfo->output_width * (long) cinfo->out_color_components;
+  jd_samplesperrow = (JDIMENSION) samplesperrow;
+  if ((long) jd_samplesperrow != samplesperrow)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+
+  /* Initialize my private state */
+  master->pass_number = 0;
+  master->using_merged_upsample = use_merged_upsample(cinfo);
+
+  /* Color quantizer selection */
+  master->quantizer_1pass = NULL;
+  master->quantizer_2pass = NULL;
+  /* No mode changes if not using buffered-image mode. */
+  if (! cinfo->quantize_colors || ! cinfo->buffered_image) {
+    cinfo->enable_1pass_quant = FALSE;
+    cinfo->enable_external_quant = FALSE;
+    cinfo->enable_2pass_quant = FALSE;
+  }
+  if (cinfo->quantize_colors) {
+    if (cinfo->raw_data_out)
+      ERREXIT(cinfo, JERR_NOTIMPL);
+    /* 2-pass quantizer only works in 3-component color space. */
+    if (cinfo->out_color_components != 3) {
+      cinfo->enable_1pass_quant = TRUE;
+      cinfo->enable_external_quant = FALSE;
+      cinfo->enable_2pass_quant = FALSE;
+      cinfo->colormap = NULL;
+    } else if (cinfo->colormap != NULL) {
+      cinfo->enable_external_quant = TRUE;
+    } else if (cinfo->two_pass_quantize) {
+      cinfo->enable_2pass_quant = TRUE;
+    } else {
+      cinfo->enable_1pass_quant = TRUE;
+    }
+
+    if (cinfo->enable_1pass_quant) {
+#ifdef QUANT_1PASS_SUPPORTED
+      jinit_1pass_quantizer(cinfo);
+      master->quantizer_1pass = cinfo->cquantize;
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    }
+
+    /* We use the 2-pass code to map to external colormaps. */
+    if (cinfo->enable_2pass_quant || cinfo->enable_external_quant) {
+#ifdef QUANT_2PASS_SUPPORTED
+      jinit_2pass_quantizer(cinfo);
+      master->quantizer_2pass = cinfo->cquantize;
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    }
+    /* If both quantizers are initialized, the 2-pass one is left active;
+     * this is necessary for starting with quantization to an external map.
+     */
+  }
+
+  /* Post-processing: in particular, color conversion first */
+  if (! cinfo->raw_data_out) {
+    if (master->using_merged_upsample) {
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+      jinit_merged_upsampler(cinfo); /* does color conversion too */
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else {
+      jinit_color_deconverter(cinfo);
+      jinit_upsampler(cinfo);
+    }
+    jinit_d_post_controller(cinfo, cinfo->enable_2pass_quant);
+  }
+  /* Inverse DCT */
+  jinit_inverse_dct(cinfo);
+  /* Entropy decoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code)
+    jinit_arith_decoder(cinfo);
+  else {
+    jinit_huff_decoder(cinfo);
+  }
+
+  /* Initialize principal buffer controllers. */
+  use_c_buffer = cinfo->inputctl->has_multiple_scans || cinfo->buffered_image;
+  jinit_d_coef_controller(cinfo, use_c_buffer);
+
+  if (! cinfo->raw_data_out)
+    jinit_d_main_controller(cinfo, FALSE /* never need full buffer here */);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Initialize input side of decompressor to consume first scan. */
+  (*cinfo->inputctl->start_input_pass) (cinfo);
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+  /* If jpeg_start_decompress will read the whole file, initialize
+   * progress monitoring appropriately.  The input step is counted
+   * as one pass.
+   */
+  if (cinfo->progress != NULL && ! cinfo->buffered_image &&
+      cinfo->inputctl->has_multiple_scans) {
+    int nscans;
+    /* Estimate number of scans to set pass_limit. */
+    if (cinfo->progressive_mode) {
+      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
+      nscans = 2 + 3 * cinfo->num_components;
+    } else {
+      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
+      nscans = cinfo->num_components;
+    }
+    cinfo->progress->pass_counter = 0L;
+    cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
+    cinfo->progress->completed_passes = 0;
+    cinfo->progress->total_passes = (cinfo->enable_2pass_quant ? 3 : 2);
+    /* Count the input pass as done */
+    master->pass_number++;
+  }
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+}
+
+
+/*
+ * Per-pass setup.
+ * This is called at the beginning of each output pass.  We determine which
+ * modules will be active during this pass and give them appropriate
+ * start_pass calls.  We also set is_dummy_pass to indicate whether this
+ * is a "real" output pass or a dummy pass for color quantization.
+ * (In the latter case, jdapistd.c will crank the pass to completion.)
+ */
+
+METHODDEF(void)
+prepare_for_output_pass (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  if (master->pub.is_dummy_pass) {
+#ifdef QUANT_2PASS_SUPPORTED
+    /* Final pass of 2-pass quantization */
+    master->pub.is_dummy_pass = FALSE;
+    (*cinfo->cquantize->start_pass) (cinfo, FALSE);
+    (*cinfo->post->start_pass) (cinfo, JBUF_CRANK_DEST);
+    (*cinfo->main->start_pass) (cinfo, JBUF_CRANK_DEST);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* QUANT_2PASS_SUPPORTED */
+  } else {
+    if (cinfo->quantize_colors && cinfo->colormap == NULL) {
+      /* Select new quantization method */
+      if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) {
+	cinfo->cquantize = master->quantizer_2pass;
+	master->pub.is_dummy_pass = TRUE;
+      } else if (cinfo->enable_1pass_quant) {
+	cinfo->cquantize = master->quantizer_1pass;
+      } else {
+	ERREXIT(cinfo, JERR_MODE_CHANGE);
+      }
+    }
+    (*cinfo->idct->start_pass) (cinfo);
+    (*cinfo->coef->start_output_pass) (cinfo);
+    if (! cinfo->raw_data_out) {
+      if (! master->using_merged_upsample)
+	(*cinfo->cconvert->start_pass) (cinfo);
+      (*cinfo->upsample->start_pass) (cinfo);
+      if (cinfo->quantize_colors)
+	(*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass);
+      (*cinfo->post->start_pass) (cinfo,
+	    (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
+      (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
+    }
+  }
+
+  /* Set up progress monitor's pass info if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->completed_passes = master->pass_number;
+    cinfo->progress->total_passes = master->pass_number +
+				    (master->pub.is_dummy_pass ? 2 : 1);
+    /* In buffered-image mode, we assume one more output pass if EOI not
+     * yet reached, but no more passes if EOI has been reached.
+     */
+    if (cinfo->buffered_image && ! cinfo->inputctl->eoi_reached) {
+      cinfo->progress->total_passes += (cinfo->enable_2pass_quant ? 2 : 1);
+    }
+  }
+}
+
+
+/*
+ * Finish up at end of an output pass.
+ */
+
+METHODDEF(void)
+finish_output_pass (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  if (cinfo->quantize_colors)
+    (*cinfo->cquantize->finish_pass) (cinfo);
+  master->pass_number++;
+}
+
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Switch to a new external colormap between output passes.
+ */
+
+GLOBAL(void)
+jpeg_new_colormap (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  /* Prevent application from calling me at wrong times */
+  if (cinfo->global_state != DSTATE_BUFIMAGE)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (cinfo->quantize_colors && cinfo->enable_external_quant &&
+      cinfo->colormap != NULL) {
+    /* Select 2-pass quantizer for external colormap use */
+    cinfo->cquantize = master->quantizer_2pass;
+    /* Notify quantizer of colormap change */
+    (*cinfo->cquantize->new_color_map) (cinfo);
+    master->pub.is_dummy_pass = FALSE; /* just in case */
+  } else
+    ERREXIT(cinfo, JERR_MODE_CHANGE);
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+
+
+/*
+ * Initialize master decompression control and select active modules.
+ * This is performed at the start of jpeg_start_decompress.
+ */
+
+GLOBAL(void)
+jinit_master_decompress (j_decompress_ptr cinfo)
+{
+  my_master_ptr master;
+
+  master = (my_master_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_decomp_master));
+  cinfo->master = &master->pub;
+  master->pub.prepare_for_output_pass = prepare_for_output_pass;
+  master->pub.finish_output_pass = finish_output_pass;
+
+  master->pub.is_dummy_pass = FALSE;
+
+  master_selection(cinfo);
+}
diff --git a/cmake/externals/libjpeg/jdmerge.c b/cmake/externals/libjpeg/jdmerge.c
new file mode 100644
index 000000000..0d16821be
--- /dev/null
+++ b/cmake/externals/libjpeg/jdmerge.c
@@ -0,0 +1,437 @@
+/*
+ * jdmerge.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2013-2022 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains code for merged upsampling/color conversion.
+ *
+ * This file combines functions from jdsample.c and jdcolor.c;
+ * read those files first to understand what's going on.
+ *
+ * When the chroma components are to be upsampled by simple replication
+ * (ie, box filtering), we can save some work in color conversion by
+ * calculating all the output pixels corresponding to a pair of chroma
+ * samples at one time.  In the conversion equations
+ *	R = Y           + K1 * Cr
+ *	G = Y + K2 * Cb + K3 * Cr
+ *	B = Y + K4 * Cb
+ * only the Y term varies among the group of pixels corresponding to a pair
+ * of chroma samples, so the rest of the terms can be calculated just once.
+ * At typical sampling ratios, this eliminates half or three-quarters
+ * of the multiplications needed for color conversion.
+ *
+ * This file currently provides implementations for the following cases:
+ *	YCC => RGB color conversion only (YCbCr or BG_YCC).
+ *	Sampling ratios of 2h1v or 2h2v.
+ *	No scaling needed at upsample time.
+ *	Corner-aligned (non-CCIR601) sampling alignment.
+ * Other special cases could be added, but in most applications these
+ * are the only common cases.  (For uncommon cases we fall back on
+ * the more general code in jdsample.c and jdcolor.c.)
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+
+
+#if RANGE_BITS < 2
+  /* Deliberate syntax err */
+  Sorry, this code requires 2 or more range extension bits.
+#endif
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_upsampler pub;	/* public fields */
+
+  /* Pointer to routine to do actual upsampling/conversion of one row group */
+  JMETHOD(void, upmethod, (j_decompress_ptr cinfo,
+			   JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+			   JSAMPARRAY output_buf));
+
+  /* Private state for YCC->RGB conversion */
+  int * Cr_r_tab;		/* => table for Cr to R conversion */
+  int * Cb_b_tab;		/* => table for Cb to B conversion */
+  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
+  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
+
+  /* For 2:1 vertical sampling, we produce two output rows at a time.
+   * We need a "spare" row buffer to hold the second output row if the
+   * application provides just a one-row buffer; we also use the spare
+   * to discard the dummy last row if the image height is odd.
+   */
+  JSAMPROW spare_row;
+  boolean spare_full;		/* T if spare buffer is occupied */
+
+  JDIMENSION out_row_width;	/* samples per output row */
+  JDIMENSION rows_to_go;	/* counts rows remaining in image */
+} my_upsampler;
+
+typedef my_upsampler * my_upsample_ptr;
+
+#define SCALEBITS	16	/* speediest right-shift on some machines */
+#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+
+/*
+ * Initialize tables for YCbCr->RGB and BG_YCC->RGB colorspace conversion.
+ * This is taken directly from jdcolor.c; see that file for more info.
+ */
+
+LOCAL(void)
+build_ycc_rgb_table (j_decompress_ptr cinfo)
+/* Normal case, sYCC */
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  upsample->Cr_r_tab = (int *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
+  upsample->Cb_b_tab = (int *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
+  upsample->Cr_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
+  upsample->Cb_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 1.402 * x */
+    upsample->Cr_r_tab[i] = (int) DESCALE(FIX(1.402) * x, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.772 * x */
+    upsample->Cb_b_tab[i] = (int) DESCALE(FIX(1.772) * x, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.714136286 * x */
+    upsample->Cr_g_tab[i] = (- FIX(0.714136286)) * x;
+    /* Cb=>G value is scaled-up -0.344136286 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    upsample->Cb_g_tab[i] = (- FIX(0.344136286)) * x + ONE_HALF;
+  }
+}
+
+
+LOCAL(void)
+build_bg_ycc_rgb_table (j_decompress_ptr cinfo)
+/* Wide gamut case, bg-sYCC */
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  upsample->Cr_r_tab = (int *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
+  upsample->Cb_b_tab = (int *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
+  upsample->Cr_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
+  upsample->Cb_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 2.804 * x */
+    upsample->Cr_r_tab[i] = (int) DESCALE(FIX(2.804) * x, SCALEBITS);
+    /* Cb=>B value is nearest int to 3.544 * x */
+    upsample->Cb_b_tab[i] = (int) DESCALE(FIX(3.544) * x, SCALEBITS);
+    /* Cr=>G value is scaled-up -1.428272572 * x */
+    upsample->Cr_g_tab[i] = (- FIX(1.428272572)) * x;
+    /* Cb=>G value is scaled-up -0.688272572 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    upsample->Cb_g_tab[i] = (- FIX(0.688272572)) * x + ONE_HALF;
+  }
+}
+
+
+/*
+ * Initialize for an upsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_merged_upsample (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Mark the spare buffer empty */
+  upsample->spare_full = FALSE;
+  /* Initialize total-height counter for detecting bottom of image */
+  upsample->rows_to_go = cinfo->output_height;
+}
+
+
+/*
+ * Control routine to do upsampling (and color conversion).
+ *
+ * The control routine just handles the row buffering considerations.
+ */
+
+METHODDEF(void)
+merged_2v_upsample (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+/* 2:1 vertical sampling case: may need a spare row. */
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  JSAMPROW work_ptrs[2];
+  JDIMENSION num_rows;		/* number of rows returned to caller */
+
+  if (upsample->spare_full) {
+    /* If we have a spare row saved from a previous cycle, just return it. */
+    jcopy_sample_rows(& upsample->spare_row, output_buf + *out_row_ctr,
+		      1, upsample->out_row_width);
+    num_rows = 1;
+    upsample->spare_full = FALSE;
+  } else {
+    /* Figure number of rows to return to caller. */
+    num_rows = 2;
+    /* Not more than the distance to the end of the image. */
+    if (num_rows > upsample->rows_to_go)
+      num_rows = upsample->rows_to_go;
+    /* And not more than what the client can accept: */
+    out_rows_avail -= *out_row_ctr;
+    if (num_rows > out_rows_avail)
+      num_rows = out_rows_avail;
+    /* Create output pointer array for upsampler. */
+    work_ptrs[0] = output_buf[*out_row_ctr];
+    if (num_rows > 1) {
+      work_ptrs[1] = output_buf[*out_row_ctr + 1];
+    } else {
+      work_ptrs[1] = upsample->spare_row;
+      upsample->spare_full = TRUE;
+    }
+    /* Now do the upsampling. */
+    (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr, work_ptrs);
+  }
+
+  /* Adjust counts */
+  *out_row_ctr += num_rows;
+  upsample->rows_to_go -= num_rows;
+  /* When the buffer is emptied, declare this input row group consumed */
+  if (! upsample->spare_full)
+    (*in_row_group_ctr)++;
+}
+
+
+METHODDEF(void)
+merged_1v_upsample (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+/* 1:1 vertical sampling case: much easier, never need a spare row. */
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Just do the upsampling. */
+  (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr,
+			 output_buf + *out_row_ctr);
+  /* Adjust counts */
+  (*out_row_ctr)++;
+  (*in_row_group_ctr)++;
+}
+
+
+/*
+ * These are the routines invoked by the control routines to do
+ * the actual upsampling/conversion.  One row group is processed per call.
+ *
+ * Note: since we may be writing directly into application-supplied buffers,
+ * we have to be honest about the output width; we can't assume the buffer
+ * has been rounded up to an even width.
+ */
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
+ */
+
+METHODDEF(void)
+h2v1_merged_upsample (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+		      JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr;
+  JSAMPROW inptr0, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr0 = input_buf[0][in_row_group_ctr];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr = output_buf[0];
+  /* Loop for each pair of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue  = Cbbtab[cb];
+    cred   = Crrtab[cr];
+    /* Fetch 2 Y values and emit 2 pixels */
+    y  = GETJSAMPLE(*inptr0++);
+    outptr[RGB_RED]   = range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE]  = range_limit[y + cblue];
+    outptr += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr0++);
+    outptr[RGB_RED]   = range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE]  = range_limit[y + cblue];
+    outptr += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    y  = GETJSAMPLE(*inptr0);
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    outptr[RGB_RED]   = range_limit[y + Crrtab[cr]];
+    outptr[RGB_GREEN] = range_limit[y +
+			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+						 SCALEBITS))];
+    outptr[RGB_BLUE]  = range_limit[y + Cbbtab[cb]];
+  }
+}
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+ */
+
+METHODDEF(void)
+h2v2_merged_upsample (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+		      JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr0, outptr1;
+  JSAMPROW inptr00, inptr01, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr00 = input_buf[0][in_row_group_ctr*2];
+  inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr0 = output_buf[0];
+  outptr1 = output_buf[1];
+  /* Loop for each group of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue  = Cbbtab[cb];
+    cred   = Crrtab[cr];
+    /* Fetch 4 Y values and emit 4 pixels */
+    y  = GETJSAMPLE(*inptr00++);
+    outptr0[RGB_RED]   = range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE]  = range_limit[y + cblue];
+    outptr0 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr00++);
+    outptr0[RGB_RED]   = range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE]  = range_limit[y + cblue];
+    outptr0 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr01++);
+    outptr1[RGB_RED]   = range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE]  = range_limit[y + cblue];
+    outptr1 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr01++);
+    outptr1[RGB_RED]   = range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE]  = range_limit[y + cblue];
+    outptr1 += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue  = Cbbtab[cb];
+    cred   = Crrtab[cr];
+    y  = GETJSAMPLE(*inptr00);
+    outptr0[RGB_RED]   = range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE]  = range_limit[y + cblue];
+    y  = GETJSAMPLE(*inptr01);
+    outptr1[RGB_RED]   = range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE]  = range_limit[y + cblue];
+  }
+}
+
+
+/*
+ * Module initialization routine for merged upsampling/color conversion.
+ *
+ * NB: this is called under the conditions determined by use_merged_upsample()
+ * in jdmaster.c.  That routine MUST correspond to the actual capabilities
+ * of this module; no safety checks are made here.
+ */
+
+GLOBAL(void)
+jinit_merged_upsampler (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample;
+
+  upsample = (my_upsample_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_upsampler));
+  cinfo->upsample = &upsample->pub;
+  upsample->pub.start_pass = start_pass_merged_upsample;
+  upsample->pub.need_context_rows = FALSE;
+
+  upsample->out_row_width = cinfo->output_width * cinfo->out_color_components;
+
+  if (cinfo->max_v_samp_factor == 2) {
+    upsample->pub.upsample = merged_2v_upsample;
+    upsample->upmethod = h2v2_merged_upsample;
+    /* Allocate a spare row buffer */
+    upsample->spare_row = (JSAMPROW) (*cinfo->mem->alloc_large)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       (size_t) upsample->out_row_width * SIZEOF(JSAMPLE));
+  } else {
+    upsample->pub.upsample = merged_1v_upsample;
+    upsample->upmethod = h2v1_merged_upsample;
+    /* No spare row needed */
+    upsample->spare_row = NULL;
+  }
+
+  if (cinfo->jpeg_color_space == JCS_BG_YCC)
+    build_bg_ycc_rgb_table(cinfo);
+  else
+    build_ycc_rgb_table(cinfo);
+}
+
+#endif /* UPSAMPLE_MERGING_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jdpostct.c b/cmake/externals/libjpeg/jdpostct.c
new file mode 100644
index 000000000..571563d72
--- /dev/null
+++ b/cmake/externals/libjpeg/jdpostct.c
@@ -0,0 +1,290 @@
+/*
+ * jdpostct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the decompression postprocessing controller.
+ * This controller manages the upsampling, color conversion, and color
+ * quantization/reduction steps; specifically, it controls the buffering
+ * between upsample/color conversion and color quantization/reduction.
+ *
+ * If no color quantization/reduction is required, then this module has no
+ * work to do, and it just hands off to the upsample/color conversion code.
+ * An integrated upsample/convert/quantize process would replace this module
+ * entirely.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_post_controller pub; /* public fields */
+
+  /* Color quantization source buffer: this holds output data from
+   * the upsample/color conversion step to be passed to the quantizer.
+   * For two-pass color quantization, we need a full-image buffer;
+   * for one-pass operation, a strip buffer is sufficient.
+   */
+  jvirt_sarray_ptr whole_image;	/* virtual array, or NULL if one-pass */
+  JSAMPARRAY buffer;		/* strip buffer, or current strip of virtual */
+  JDIMENSION strip_height;	/* buffer size in rows */
+  /* for two-pass mode only: */
+  JDIMENSION starting_row;	/* row # of first row in current strip */
+  JDIMENSION next_row;		/* index of next row to fill/empty in strip */
+} my_post_controller;
+
+typedef my_post_controller * my_post_ptr;
+
+
+/* Forward declarations */
+METHODDEF(void) post_process_1pass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+#ifdef QUANT_2PASS_SUPPORTED
+METHODDEF(void) post_process_prepass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+METHODDEF(void) post_process_2pass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+#endif
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_dpost (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (cinfo->quantize_colors) {
+      /* Single-pass processing with color quantization. */
+      post->pub.post_process_data = post_process_1pass;
+      /* We could be doing buffered-image output before starting a 2-pass
+       * color quantization; in that case, jinit_d_post_controller did not
+       * allocate a strip buffer.  Use the virtual-array buffer as workspace.
+       */
+      if (post->buffer == NULL) {
+	post->buffer = (*cinfo->mem->access_virt_sarray)
+	  ((j_common_ptr) cinfo, post->whole_image,
+	   (JDIMENSION) 0, post->strip_height, TRUE);
+      }
+    } else {
+      /* For single-pass processing without color quantization,
+       * I have no work to do; just call the upsampler directly.
+       */
+      post->pub.post_process_data = cinfo->upsample->upsample;
+    }
+    break;
+#ifdef QUANT_2PASS_SUPPORTED
+  case JBUF_SAVE_AND_PASS:
+    /* First pass of 2-pass quantization */
+    if (post->whole_image == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    post->pub.post_process_data = post_process_prepass;
+    break;
+  case JBUF_CRANK_DEST:
+    /* Second pass of 2-pass quantization */
+    if (post->whole_image == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    post->pub.post_process_data = post_process_2pass;
+    break;
+#endif /* QUANT_2PASS_SUPPORTED */
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+  post->starting_row = post->next_row = 0;
+}
+
+
+/*
+ * Process some data in the one-pass (strip buffer) case.
+ * This is used for color precision reduction as well as one-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_1pass (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION num_rows, max_rows;
+
+  /* Fill the buffer, but not more than what we can dump out in one go. */
+  /* Note we rely on the upsampler to detect bottom of image. */
+  max_rows = out_rows_avail - *out_row_ctr;
+  if (max_rows > post->strip_height)
+    max_rows = post->strip_height;
+  num_rows = 0;
+  (*cinfo->upsample->upsample) (cinfo,
+		input_buf, in_row_group_ctr, in_row_groups_avail,
+		post->buffer, &num_rows, max_rows);
+  /* Quantize and emit data. */
+  (*cinfo->cquantize->color_quantize) (cinfo,
+		post->buffer, output_buf + *out_row_ctr, (int) num_rows);
+  *out_row_ctr += num_rows;
+}
+
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+/*
+ * Process some data in the first pass of 2-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_prepass (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		      JDIMENSION in_row_groups_avail,
+		      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		      JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION old_next_row, num_rows;
+
+  /* Reposition virtual buffer if at start of strip. */
+  if (post->next_row == 0) {
+    post->buffer = (*cinfo->mem->access_virt_sarray)
+	((j_common_ptr) cinfo, post->whole_image,
+	 post->starting_row, post->strip_height, TRUE);
+  }
+
+  /* Upsample some data (up to a strip height's worth). */
+  old_next_row = post->next_row;
+  (*cinfo->upsample->upsample) (cinfo,
+		input_buf, in_row_group_ctr, in_row_groups_avail,
+		post->buffer, &post->next_row, post->strip_height);
+
+  /* Allow quantizer to scan new data.  No data is emitted, */
+  /* but we advance out_row_ctr so outer loop can tell when we're done. */
+  if (post->next_row > old_next_row) {
+    num_rows = post->next_row - old_next_row;
+    (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + old_next_row,
+					 (JSAMPARRAY) NULL, (int) num_rows);
+    *out_row_ctr += num_rows;
+  }
+
+  /* Advance if we filled the strip. */
+  if (post->next_row >= post->strip_height) {
+    post->starting_row += post->strip_height;
+    post->next_row = 0;
+  }
+}
+
+
+/*
+ * Process some data in the second pass of 2-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_2pass (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION num_rows, max_rows;
+
+  /* Reposition virtual buffer if at start of strip. */
+  if (post->next_row == 0) {
+    post->buffer = (*cinfo->mem->access_virt_sarray)
+	((j_common_ptr) cinfo, post->whole_image,
+	 post->starting_row, post->strip_height, FALSE);
+  }
+
+  /* Determine number of rows to emit. */
+  num_rows = post->strip_height - post->next_row; /* available in strip */
+  max_rows = out_rows_avail - *out_row_ctr; /* available in output area */
+  if (num_rows > max_rows)
+    num_rows = max_rows;
+  /* We have to check bottom of image here, can't depend on upsampler. */
+  max_rows = cinfo->output_height - post->starting_row;
+  if (num_rows > max_rows)
+    num_rows = max_rows;
+
+  /* Quantize and emit data. */
+  (*cinfo->cquantize->color_quantize) (cinfo,
+		post->buffer + post->next_row, output_buf + *out_row_ctr,
+		(int) num_rows);
+  *out_row_ctr += num_rows;
+
+  /* Advance if we filled the strip. */
+  post->next_row += num_rows;
+  if (post->next_row >= post->strip_height) {
+    post->starting_row += post->strip_height;
+    post->next_row = 0;
+  }
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
+
+
+/*
+ * Initialize postprocessing controller.
+ */
+
+GLOBAL(void)
+jinit_d_post_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_post_ptr post;
+
+  post = (my_post_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_post_controller));
+  cinfo->post = (struct jpeg_d_post_controller *) post;
+  post->pub.start_pass = start_pass_dpost;
+  post->whole_image = NULL;	/* flag for no virtual arrays */
+  post->buffer = NULL;		/* flag for no strip buffer */
+
+  /* Create the quantization buffer, if needed */
+  if (cinfo->quantize_colors) {
+    /* The buffer strip height is max_v_samp_factor, which is typically
+     * an efficient number of rows for upsampling to return.
+     * (In the presence of output rescaling, we might want to be smarter?)
+     */
+    post->strip_height = (JDIMENSION) cinfo->max_v_samp_factor;
+    if (need_full_buffer) {
+      /* Two-pass color quantization: need full-image storage. */
+      /* We round up the number of rows to a multiple of the strip height. */
+#ifdef QUANT_2PASS_SUPPORTED
+      post->whole_image = (*cinfo->mem->request_virt_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+	 cinfo->output_width * cinfo->out_color_components,
+	 (JDIMENSION) jround_up((long) cinfo->output_height,
+				(long) post->strip_height),
+	 post->strip_height);
+#else
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif /* QUANT_2PASS_SUPPORTED */
+    } else {
+      /* One-pass color quantization: just make a strip buffer. */
+      post->buffer = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 cinfo->output_width * cinfo->out_color_components,
+	 post->strip_height);
+    }
+  }
+}
diff --git a/cmake/externals/libjpeg/jdsample.c b/cmake/externals/libjpeg/jdsample.c
new file mode 100644
index 000000000..15afeafe3
--- /dev/null
+++ b/cmake/externals/libjpeg/jdsample.c
@@ -0,0 +1,341 @@
+/*
+ * jdsample.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modified 2002-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains upsampling routines.
+ *
+ * Upsampling input data is counted in "row groups".  A row group
+ * is defined to be (v_samp_factor * DCT_v_scaled_size / min_DCT_v_scaled_size)
+ * sample rows of each component.  Upsampling will normally produce
+ * max_v_samp_factor pixel rows from each row group (but this could vary
+ * if the upsampler is applying a scale factor of its own).
+ *
+ * An excellent reference for image resampling is
+ *   Digital Image Warping, George Wolberg, 1990.
+ *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Pointer to routine to upsample a single component */
+typedef JMETHOD(void, upsample1_ptr,
+		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPIMAGE output_data_ptr));
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_upsampler pub;	/* public fields */
+
+  /* Color conversion buffer.  When using separate upsampling and color
+   * conversion steps, this buffer holds one upsampled row group until it
+   * has been color converted and output.
+   * Note: we do not allocate any storage for component(s) which are full-size,
+   * ie do not need rescaling.  The corresponding entry of color_buf[] is
+   * simply set to point to the input data array, thereby avoiding copying.
+   */
+  JSAMPARRAY color_buf[MAX_COMPONENTS];
+
+  /* Per-component upsampling method pointers */
+  upsample1_ptr methods[MAX_COMPONENTS];
+
+  int next_row_out;		/* counts rows emitted from color_buf */
+  JDIMENSION rows_to_go;	/* counts rows remaining in image */
+
+  /* Height of an input row group for each component. */
+  int rowgroup_height[MAX_COMPONENTS];
+
+  /* These arrays save pixel expansion factors so that int_expand need not
+   * recompute them each time.  They are unused for other upsampling methods.
+   */
+  UINT8 h_expand[MAX_COMPONENTS];
+  UINT8 v_expand[MAX_COMPONENTS];
+} my_upsampler;
+
+typedef my_upsampler * my_upsample_ptr;
+
+
+/*
+ * Initialize for an upsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_upsample (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Mark the conversion buffer empty */
+  upsample->next_row_out = cinfo->max_v_samp_factor;
+  /* Initialize total-height counter for detecting bottom of image */
+  upsample->rows_to_go = cinfo->output_height;
+}
+
+
+/*
+ * Control routine to do upsampling (and color conversion).
+ *
+ * In this version we upsample each component independently.
+ * We upsample one row group into the conversion buffer, then apply
+ * color conversion a row at a time.
+ */
+
+METHODDEF(void)
+sep_upsample (j_decompress_ptr cinfo,
+	      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	      JDIMENSION in_row_groups_avail,
+	      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	      JDIMENSION out_rows_avail)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  int ci;
+  jpeg_component_info * compptr;
+  JDIMENSION num_rows;
+
+  /* Fill the conversion buffer, if it's empty */
+  if (upsample->next_row_out >= cinfo->max_v_samp_factor) {
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      /* Don't bother to upsample an uninteresting component. */
+      if (! compptr->component_needed)
+	continue;
+      /* Invoke per-component upsample method.  Notice we pass a POINTER
+       * to color_buf[ci], so that fullsize_upsample can change it.
+       */
+      (*upsample->methods[ci]) (cinfo, compptr,
+	input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]),
+	upsample->color_buf + ci);
+    }
+    upsample->next_row_out = 0;
+  }
+
+  /* Color-convert and emit rows */
+
+  /* How many we have in the buffer: */
+  num_rows = (JDIMENSION) (cinfo->max_v_samp_factor - upsample->next_row_out);
+  /* Not more than the distance to the end of the image.  Need this test
+   * in case the image height is not a multiple of max_v_samp_factor:
+   */
+  if (num_rows > upsample->rows_to_go) 
+    num_rows = upsample->rows_to_go;
+  /* And not more than what the client can accept: */
+  out_rows_avail -= *out_row_ctr;
+  if (num_rows > out_rows_avail)
+    num_rows = out_rows_avail;
+
+  (*cinfo->cconvert->color_convert) (cinfo, upsample->color_buf,
+				     (JDIMENSION) upsample->next_row_out,
+				     output_buf + *out_row_ctr,
+				     (int) num_rows);
+
+  /* Adjust counts */
+  *out_row_ctr += num_rows;
+  upsample->rows_to_go -= num_rows;
+  upsample->next_row_out += num_rows;
+  /* When the buffer is emptied, declare this input row group consumed */
+  if (upsample->next_row_out >= cinfo->max_v_samp_factor)
+    (*in_row_group_ctr)++;
+}
+
+
+/*
+ * These are the routines invoked by sep_upsample to upsample pixel values
+ * of a single component.  One row group is processed per call.
+ */
+
+
+/*
+ * For full-size components, we just make color_buf[ci] point at the
+ * input buffer, and thus avoid copying any data.  Note that this is
+ * safe only because sep_upsample doesn't declare the input row group
+ * "consumed" until we are done color converting and emitting it.
+ */
+
+METHODDEF(void)
+fullsize_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		   JSAMPARRAY input_data, JSAMPIMAGE output_data_ptr)
+{
+  *output_data_ptr = input_data;
+}
+
+
+/*
+ * This version handles any integral sampling ratios.
+ * This is not used for typical JPEG files, so it need not be fast.
+ * Nor, for that matter, is it particularly accurate: the algorithm is
+ * simple replication of the input pixel onto the corresponding output
+ * pixels.  The hi-falutin sampling literature refers to this as a
+ * "box filter".  A box filter tends to introduce visible artifacts,
+ * so if you are actually going to use 3:1 or 4:1 sampling ratios
+ * you would be well advised to improve this code.
+ */
+
+METHODDEF(void)
+int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	      JSAMPARRAY input_data, JSAMPIMAGE output_data_ptr)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  JSAMPARRAY output_data, output_end;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  register int h;
+  JSAMPROW outend;
+  int h_expand, v_expand;
+
+  h_expand = upsample->h_expand[compptr->component_index];
+  v_expand = upsample->v_expand[compptr->component_index];
+
+  output_data = *output_data_ptr;
+  output_end = output_data + cinfo->max_v_samp_factor;
+  for (; output_data < output_end; output_data += v_expand) {
+    /* Generate one output row with proper horizontal expansion */
+    inptr = *input_data++;
+    outptr = *output_data;
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      for (h = h_expand; h > 0; h--) {
+	*outptr++ = invalue;
+      }
+    }
+    /* Generate any additional output rows by duplicating the first one */
+    if (v_expand > 1) {
+      jcopy_sample_rows(output_data, output_data + 1,
+			v_expand - 1, cinfo->output_width);
+    }
+  }
+}
+
+
+/*
+ * Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
+ * It's still a box filter.
+ */
+
+METHODDEF(void)
+h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JSAMPARRAY input_data, JSAMPIMAGE output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  JSAMPROW outend;
+  int outrow;
+
+  for (outrow = 0; outrow < cinfo->max_v_samp_factor; outrow++) {
+    inptr = input_data[outrow];
+    outptr = output_data[outrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      *outptr++ = invalue;
+      *outptr++ = invalue;
+    }
+  }
+}
+
+
+/*
+ * Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
+ * It's still a box filter.
+ */
+
+METHODDEF(void)
+h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JSAMPARRAY input_data, JSAMPIMAGE output_data_ptr)
+{
+  JSAMPARRAY output_data, output_end;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  JSAMPROW outend;
+
+  output_data = *output_data_ptr;
+  output_end = output_data + cinfo->max_v_samp_factor;
+  for (; output_data < output_end; output_data += 2) {
+    inptr = *input_data++;
+    outptr = *output_data;
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      *outptr++ = invalue;
+      *outptr++ = invalue;
+    }
+    jcopy_sample_rows(output_data, output_data + 1,
+		      1, cinfo->output_width);
+  }
+}
+
+
+/*
+ * Module initialization routine for upsampling.
+ */
+
+GLOBAL(void)
+jinit_upsampler (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample;
+  int ci;
+  jpeg_component_info * compptr;
+  int h_in_group, v_in_group, h_out_group, v_out_group;
+
+  upsample = (my_upsample_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_upsampler));
+  cinfo->upsample = &upsample->pub;
+  upsample->pub.start_pass = start_pass_upsample;
+  upsample->pub.upsample = sep_upsample;
+  upsample->pub.need_context_rows = FALSE; /* until we find out differently */
+
+  if (cinfo->CCIR601_sampling)	/* this isn't supported */
+    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
+
+  /* Verify we can handle the sampling factors, select per-component methods,
+   * and create storage as needed.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Don't bother to upsample an uninteresting component. */
+    if (! compptr->component_needed)
+      continue;
+    /* Compute size of an "input group" after IDCT scaling.  This many samples
+     * are to be converted to max_h_samp_factor * max_v_samp_factor pixels.
+     */
+    h_in_group = (compptr->h_samp_factor * compptr->DCT_h_scaled_size) /
+		 cinfo->min_DCT_h_scaled_size;
+    v_in_group = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
+		 cinfo->min_DCT_v_scaled_size;
+    h_out_group = cinfo->max_h_samp_factor;
+    v_out_group = cinfo->max_v_samp_factor;
+    upsample->rowgroup_height[ci] = v_in_group; /* save for use later */
+    if (h_in_group == h_out_group && v_in_group == v_out_group) {
+      /* Fullsize components can be processed without any work. */
+      upsample->methods[ci] = fullsize_upsample;
+      continue;		/* don't need to allocate buffer */
+    }
+    if (h_in_group * 2 == h_out_group && v_in_group == v_out_group) {
+      /* Special case for 2h1v upsampling */
+      upsample->methods[ci] = h2v1_upsample;
+    } else if (h_in_group * 2 == h_out_group &&
+	       v_in_group * 2 == v_out_group) {
+      /* Special case for 2h2v upsampling */
+      upsample->methods[ci] = h2v2_upsample;
+    } else if ((h_out_group % h_in_group) == 0 &&
+	       (v_out_group % v_in_group) == 0) {
+      /* Generic integral-factors upsampling method */
+      upsample->methods[ci] = int_upsample;
+      upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group);
+      upsample->v_expand[ci] = (UINT8) (v_out_group / v_in_group);
+    } else
+      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
+    upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       (JDIMENSION) jround_up((long) cinfo->output_width,
+			      (long) cinfo->max_h_samp_factor),
+       (JDIMENSION) cinfo->max_v_samp_factor);
+  }
+}
diff --git a/cmake/externals/libjpeg/jdtrans.c b/cmake/externals/libjpeg/jdtrans.c
new file mode 100644
index 000000000..22dd47fb5
--- /dev/null
+++ b/cmake/externals/libjpeg/jdtrans.c
@@ -0,0 +1,140 @@
+/*
+ * jdtrans.c
+ *
+ * Copyright (C) 1995-1997, Thomas G. Lane.
+ * Modified 2000-2009 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains library routines for transcoding decompression,
+ * that is, reading raw DCT coefficient arrays from an input JPEG file.
+ * The routines in jdapimin.c will also be needed by a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Forward declarations */
+LOCAL(void) transdecode_master_selection JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * Read the coefficient arrays from a JPEG file.
+ * jpeg_read_header must be completed before calling this.
+ *
+ * The entire image is read into a set of virtual coefficient-block arrays,
+ * one per component.  The return value is a pointer to the array of
+ * virtual-array descriptors.  These can be manipulated directly via the
+ * JPEG memory manager, or handed off to jpeg_write_coefficients().
+ * To release the memory occupied by the virtual arrays, call
+ * jpeg_finish_decompress() when done with the data.
+ *
+ * An alternative usage is to simply obtain access to the coefficient arrays
+ * during a buffered-image-mode decompression operation.  This is allowed
+ * after any jpeg_finish_output() call.  The arrays can be accessed until
+ * jpeg_finish_decompress() is called.  (Note that any call to the library
+ * may reposition the arrays, so don't rely on access_virt_barray() results
+ * to stay valid across library calls.)
+ *
+ * Returns NULL if suspended.  This case need be checked only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(jvirt_barray_ptr *)
+jpeg_read_coefficients (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    /* First call: initialize active modules */
+    transdecode_master_selection(cinfo);
+    cinfo->global_state = DSTATE_RDCOEFS;
+  }
+  if (cinfo->global_state == DSTATE_RDCOEFS) {
+    /* Absorb whole file into the coef buffer */
+    for (;;) {
+      int retcode;
+      /* Call progress monitor hook if present */
+      if (cinfo->progress != NULL)
+	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      /* Absorb some more input */
+      retcode = (*cinfo->inputctl->consume_input) (cinfo);
+      if (retcode == JPEG_SUSPENDED)
+	return NULL;
+      if (retcode == JPEG_REACHED_EOI)
+	break;
+      /* Advance progress counter if appropriate */
+      if (cinfo->progress != NULL &&
+	  (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+	if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+	  /* startup underestimated number of scans; ratchet up one scan */
+	  cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+	}
+      }
+    }
+    /* Set state so that jpeg_finish_decompress does the right thing */
+    cinfo->global_state = DSTATE_STOPPING;
+  }
+  /* At this point we should be in state DSTATE_STOPPING if being used
+   * standalone, or in state DSTATE_BUFIMAGE if being invoked to get access
+   * to the coefficients during a full buffered-image-mode decompression.
+   */
+  if ((cinfo->global_state == DSTATE_STOPPING ||
+       cinfo->global_state == DSTATE_BUFIMAGE) && cinfo->buffered_image) {
+    return cinfo->coef->coef_arrays;
+  }
+  /* Oops, improper usage */
+  ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return NULL;			/* keep compiler happy */
+}
+
+
+/*
+ * Master selection of decompression modules for transcoding.
+ * This substitutes for jdmaster.c's initialization of the full decompressor.
+ */
+
+LOCAL(void)
+transdecode_master_selection (j_decompress_ptr cinfo)
+{
+  /* This is effectively a buffered-image operation. */
+  cinfo->buffered_image = TRUE;
+
+  /* Compute output image dimensions and related values. */
+  jpeg_core_output_dimensions(cinfo);
+
+  /* Entropy decoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code)
+    jinit_arith_decoder(cinfo);
+  else {
+    jinit_huff_decoder(cinfo);
+  }
+
+  /* Always get a full-image coefficient buffer. */
+  jinit_d_coef_controller(cinfo, TRUE);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Initialize input side of decompressor to consume first scan. */
+  (*cinfo->inputctl->start_input_pass) (cinfo);
+
+  /* Initialize progress monitoring. */
+  if (cinfo->progress != NULL) {
+    int nscans;
+    /* Estimate number of scans to set pass_limit. */
+    if (cinfo->progressive_mode) {
+      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
+      nscans = 2 + 3 * cinfo->num_components;
+    } else if (cinfo->inputctl->has_multiple_scans) {
+      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
+      nscans = cinfo->num_components;
+    } else {
+      nscans = 1;
+    }
+    cinfo->progress->pass_counter = 0L;
+    cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
+    cinfo->progress->completed_passes = 0;
+    cinfo->progress->total_passes = 1;
+  }
+}
diff --git a/cmake/externals/libjpeg/jerror.c b/cmake/externals/libjpeg/jerror.c
new file mode 100644
index 000000000..7163af699
--- /dev/null
+++ b/cmake/externals/libjpeg/jerror.c
@@ -0,0 +1,253 @@
+/*
+ * jerror.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modified 2012-2015 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains simple error-reporting and trace-message routines.
+ * These are suitable for Unix-like systems and others where writing to
+ * stderr is the right thing to do.  Many applications will want to replace
+ * some or all of these routines.
+ *
+ * If you define USE_WINDOWS_MESSAGEBOX in jconfig.h or in the makefile,
+ * you get a Windows-specific hack to display error messages in a dialog box.
+ * It ain't much, but it beats dropping error messages into the bit bucket,
+ * which is what happens to output to stderr under most Windows C compilers.
+ *
+ * These routines are used by both the compression and decompression code.
+ */
+
+#ifdef USE_WINDOWS_MESSAGEBOX
+#include <windows.h>
+#endif
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jversion.h"
+#include "jerror.h"
+
+#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
+#define EXIT_FAILURE  1
+#endif
+
+
+/*
+ * Create the message string table.
+ * We do this from the master message list in jerror.h by re-reading
+ * jerror.h with a suitable definition for macro JMESSAGE.
+ * The message table is made an external symbol just in case any applications
+ * want to refer to it directly.
+ */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_std_message_table	jMsgTable
+#endif
+
+#define JMESSAGE(code,string)	string ,
+
+const char * const jpeg_std_message_table[] = {
+#include "jerror.h"
+  NULL
+};
+
+
+/*
+ * Error exit handler: must not return to caller.
+ *
+ * Applications may override this if they want to get control back after
+ * an error.  Typically one would longjmp somewhere instead of exiting.
+ * The setjmp buffer can be made a private field within an expanded error
+ * handler object.  Note that the info needed to generate an error message
+ * is stored in the error object, so you can generate the message now or
+ * later, at your convenience.
+ * You should make sure that the JPEG object is cleaned up (with jpeg_abort
+ * or jpeg_destroy) at some point.
+ */
+
+METHODDEF(noreturn_t)
+error_exit (j_common_ptr cinfo)
+{
+  /* Always display the message */
+  (*cinfo->err->output_message) (cinfo);
+
+  /* Let the memory manager delete any temp files before we die */
+  jpeg_destroy(cinfo);
+
+  exit(EXIT_FAILURE);
+}
+
+
+/*
+ * Actual output of an error or trace message.
+ * Applications may override this method to send JPEG messages somewhere
+ * other than stderr.
+ *
+ * On Windows, printing to stderr is generally completely useless,
+ * so we provide optional code to produce an error-dialog popup.
+ * Most Windows applications will still prefer to override this routine,
+ * but if they don't, it'll do something at least marginally useful.
+ *
+ * NOTE: to use the library in an environment that doesn't support the
+ * C stdio library, you may have to delete the call to fprintf() entirely,
+ * not just not use this routine.
+ */
+
+METHODDEF(void)
+output_message (j_common_ptr cinfo)
+{
+  char buffer[JMSG_LENGTH_MAX];
+
+  /* Create the message */
+  (*cinfo->err->format_message) (cinfo, buffer);
+
+#ifdef USE_WINDOWS_MESSAGEBOX
+  /* Display it in a message dialog box */
+  MessageBox(GetActiveWindow(), buffer, "JPEG Library Error",
+	     MB_OK | MB_ICONERROR);
+#else
+  /* Send it to stderr, adding a newline */
+  fprintf(stderr, "%s\n", buffer);
+#endif
+}
+
+
+/*
+ * Decide whether to emit a trace or warning message.
+ * msg_level is one of:
+ *   -1: recoverable corrupt-data warning, may want to abort.
+ *    0: important advisory messages (always display to user).
+ *    1: first level of tracing detail.
+ *    2,3,...: successively more detailed tracing messages.
+ * An application might override this method if it wanted to abort on warnings
+ * or change the policy about which messages to display.
+ */
+
+METHODDEF(void)
+emit_message (j_common_ptr cinfo, int msg_level)
+{
+  struct jpeg_error_mgr * err = cinfo->err;
+
+  if (msg_level < 0) {
+    /* It's a warning message.  Since corrupt files may generate many warnings,
+     * the policy implemented here is to show only the first warning,
+     * unless trace_level >= 3.
+     */
+    if (err->num_warnings == 0 || err->trace_level >= 3)
+      (*err->output_message) (cinfo);
+    /* Always count warnings in num_warnings. */
+    err->num_warnings++;
+  } else {
+    /* It's a trace message.  Show it if trace_level >= msg_level. */
+    if (err->trace_level >= msg_level)
+      (*err->output_message) (cinfo);
+  }
+}
+
+
+/*
+ * Format a message string for the most recent JPEG error or message.
+ * The message is stored into buffer, which should be at least JMSG_LENGTH_MAX
+ * characters.  Note that no '\n' character is added to the string.
+ * Few applications should need to override this method.
+ */
+
+METHODDEF(void)
+format_message (j_common_ptr cinfo, char * buffer)
+{
+  struct jpeg_error_mgr * err = cinfo->err;
+  int msg_code = err->msg_code;
+  const char * msgtext = NULL;
+  const char * msgptr;
+  char ch;
+  boolean isstring;
+
+  /* Look up message string in proper table */
+  if (msg_code > 0 && msg_code <= err->last_jpeg_message) {
+    msgtext = err->jpeg_message_table[msg_code];
+  } else if (err->addon_message_table != NULL &&
+	     msg_code >= err->first_addon_message &&
+	     msg_code <= err->last_addon_message) {
+    msgtext = err->addon_message_table[msg_code - err->first_addon_message];
+  }
+
+  /* Defend against bogus message number */
+  if (msgtext == NULL) {
+    err->msg_parm.i[0] = msg_code;
+    msgtext = err->jpeg_message_table[0];
+  }
+
+  /* Check for string parameter, as indicated by %s in the message text */
+  isstring = FALSE;
+  msgptr = msgtext;
+  while ((ch = *msgptr++) != '\0') {
+    if (ch == '%') {
+      if (*msgptr == 's') isstring = TRUE;
+      break;
+    }
+  }
+
+  /* Format the message into the passed buffer */
+  if (isstring)
+    sprintf(buffer, msgtext, err->msg_parm.s);
+  else
+    sprintf(buffer, msgtext,
+	    err->msg_parm.i[0], err->msg_parm.i[1],
+	    err->msg_parm.i[2], err->msg_parm.i[3],
+	    err->msg_parm.i[4], err->msg_parm.i[5],
+	    err->msg_parm.i[6], err->msg_parm.i[7]);
+}
+
+
+/*
+ * Reset error state variables at start of a new image.
+ * This is called during compression startup to reset trace/error
+ * processing to default state, without losing any application-specific
+ * method pointers.  An application might possibly want to override
+ * this method if it has additional error processing state.
+ */
+
+METHODDEF(void)
+reset_error_mgr (j_common_ptr cinfo)
+{
+  cinfo->err->num_warnings = 0;
+  /* trace_level is not reset since it is an application-supplied parameter */
+  cinfo->err->msg_code = 0;	/* may be useful as a flag for "no error" */
+}
+
+
+/*
+ * Fill in the standard error-handling methods in a jpeg_error_mgr object.
+ * Typical call is:
+ *	struct jpeg_compress_struct cinfo;
+ *	struct jpeg_error_mgr err;
+ *
+ *	cinfo.err = jpeg_std_error(&err);
+ * after which the application may override some of the methods.
+ */
+
+GLOBAL(struct jpeg_error_mgr *)
+jpeg_std_error (struct jpeg_error_mgr * err)
+{
+  err->error_exit = error_exit;
+  err->emit_message = emit_message;
+  err->output_message = output_message;
+  err->format_message = format_message;
+  err->reset_error_mgr = reset_error_mgr;
+
+  err->trace_level = 0;		/* default = no tracing */
+  err->num_warnings = 0;	/* no warnings emitted yet */
+  err->msg_code = 0;		/* may be useful as a flag for "no error" */
+
+  /* Initialize message table pointers */
+  err->jpeg_message_table = jpeg_std_message_table;
+  err->last_jpeg_message = (int) JMSG_LASTMSGCODE - 1;
+
+  err->addon_message_table = NULL;
+  err->first_addon_message = 0;	/* for safety */
+  err->last_addon_message = 0;
+
+  return err;
+}
diff --git a/cmake/externals/libjpeg/jerror.h b/cmake/externals/libjpeg/jerror.h
new file mode 100644
index 000000000..db608b9c0
--- /dev/null
+++ b/cmake/externals/libjpeg/jerror.h
@@ -0,0 +1,304 @@
+/*
+ * jerror.h
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * Modified 1997-2018 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file defines the error and message codes for the JPEG library.
+ * Edit this file to add new codes, or to translate the message strings to
+ * some other language.
+ * A set of error-reporting macros are defined too.  Some applications using
+ * the JPEG library may wish to include this file to get the error codes
+ * and/or the macros.
+ */
+
+/*
+ * To define the enum list of message codes, include this file without
+ * defining macro JMESSAGE.  To create a message string table, include it
+ * again with a suitable JMESSAGE definition (see jerror.c for an example).
+ */
+#ifndef JMESSAGE
+#ifndef JERROR_H
+/* First time through, define the enum list */
+#define JMAKE_ENUM_LIST
+#else
+/* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */
+#define JMESSAGE(code,string)
+#endif /* JERROR_H */
+#endif /* JMESSAGE */
+
+#ifdef JMAKE_ENUM_LIST
+
+typedef enum {
+
+#define JMESSAGE(code,string)	code ,
+
+#endif /* JMAKE_ENUM_LIST */
+
+JMESSAGE(JMSG_NOMESSAGE, "Bogus message code %d") /* Must be first entry! */
+
+/* For maintenance convenience, list is alphabetical by message code name */
+JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix")
+JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix")
+JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode")
+JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS")
+JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
+JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range")
+JMESSAGE(JERR_BAD_DCTSIZE, "DCT scaled block size %dx%d not supported")
+JMESSAGE(JERR_BAD_DROP_SAMPLING,
+	 "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
+JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition")
+JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace")
+JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace")
+JMESSAGE(JERR_BAD_LENGTH, "Bogus marker length")
+JMESSAGE(JERR_BAD_LIB_VERSION,
+	 "Wrong JPEG library version: library is %d, caller expects %d")
+JMESSAGE(JERR_BAD_MCU_SIZE, "Sampling factors too large for interleaved scan")
+JMESSAGE(JERR_BAD_POOL_ID, "Invalid memory pool code %d")
+JMESSAGE(JERR_BAD_PRECISION, "Unsupported JPEG data precision %d")
+JMESSAGE(JERR_BAD_PROGRESSION,
+	 "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d")
+JMESSAGE(JERR_BAD_PROG_SCRIPT,
+	 "Invalid progressive parameters at scan script entry %d")
+JMESSAGE(JERR_BAD_SAMPLING, "Bogus sampling factors")
+JMESSAGE(JERR_BAD_SCAN_SCRIPT, "Invalid scan script at entry %d")
+JMESSAGE(JERR_BAD_STATE, "Improper call to JPEG library in state %d")
+JMESSAGE(JERR_BAD_STRUCT_SIZE,
+	 "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u")
+JMESSAGE(JERR_BAD_VIRTUAL_ACCESS, "Bogus virtual array access")
+JMESSAGE(JERR_BUFFER_SIZE, "Buffer passed to JPEG library is too small")
+JMESSAGE(JERR_CANT_SUSPEND, "Suspension not allowed here")
+JMESSAGE(JERR_CCIR601_NOTIMPL, "CCIR601 sampling not implemented yet")
+JMESSAGE(JERR_COMPONENT_COUNT, "Too many color components: %d, max %d")
+JMESSAGE(JERR_CONVERSION_NOTIMPL, "Unsupported color conversion request")
+JMESSAGE(JERR_DAC_INDEX, "Bogus DAC index %d")
+JMESSAGE(JERR_DAC_VALUE, "Bogus DAC value 0x%x")
+JMESSAGE(JERR_DHT_INDEX, "Bogus DHT index %d")
+JMESSAGE(JERR_DQT_INDEX, "Bogus DQT index %d")
+JMESSAGE(JERR_EMPTY_IMAGE, "Empty JPEG image (DNL not supported)")
+JMESSAGE(JERR_EMS_READ, "Read from EMS failed")
+JMESSAGE(JERR_EMS_WRITE, "Write to EMS failed")
+JMESSAGE(JERR_EOI_EXPECTED, "Didn't expect more than one scan")
+JMESSAGE(JERR_FILE_READ, "Input file read error")
+JMESSAGE(JERR_FILE_WRITE, "Output file write error --- out of disk space?")
+JMESSAGE(JERR_FRACT_SAMPLE_NOTIMPL, "Fractional sampling not implemented yet")
+JMESSAGE(JERR_HUFF_CLEN_OUTOFBOUNDS, "Huffman code size table out of bounds")
+JMESSAGE(JERR_HUFF_MISSING_CODE, "Missing Huffman code table entry")
+JMESSAGE(JERR_IMAGE_TOO_BIG, "Maximum supported image dimension is %u pixels")
+JMESSAGE(JERR_INPUT_EMPTY, "Empty input file")
+JMESSAGE(JERR_INPUT_EOF, "Premature end of input file")
+JMESSAGE(JERR_MISMATCHED_QUANT_TABLE,
+	 "Cannot transcode due to multiple use of quantization table %d")
+JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
+JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
+JMESSAGE(JERR_NOTIMPL, "Not implemented yet")
+JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
+JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
+JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported")
+JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined")
+JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image")
+JMESSAGE(JERR_NO_QUANT_TABLE, "Quantization table 0x%02x was not defined")
+JMESSAGE(JERR_NO_SOI, "Not a JPEG file: starts with 0x%02x 0x%02x")
+JMESSAGE(JERR_OUT_OF_MEMORY, "Insufficient memory (case %d)")
+JMESSAGE(JERR_QUANT_COMPONENTS,
+	 "Cannot quantize more than %d color components")
+JMESSAGE(JERR_QUANT_FEW_COLORS, "Cannot quantize to fewer than %d colors")
+JMESSAGE(JERR_QUANT_MANY_COLORS, "Cannot quantize to more than %d colors")
+JMESSAGE(JERR_SOF_BEFORE, "Invalid JPEG file structure: %s before SOF")
+JMESSAGE(JERR_SOF_DUPLICATE, "Invalid JPEG file structure: two SOF markers")
+JMESSAGE(JERR_SOF_NO_SOS, "Invalid JPEG file structure: missing SOS marker")
+JMESSAGE(JERR_SOF_UNSUPPORTED, "Unsupported JPEG process: SOF type 0x%02x")
+JMESSAGE(JERR_SOI_DUPLICATE, "Invalid JPEG file structure: two SOI markers")
+JMESSAGE(JERR_TFILE_CREATE, "Failed to create temporary file %s")
+JMESSAGE(JERR_TFILE_READ, "Read failed on temporary file")
+JMESSAGE(JERR_TFILE_SEEK, "Seek failed on temporary file")
+JMESSAGE(JERR_TFILE_WRITE,
+	 "Write failed on temporary file --- out of disk space?")
+JMESSAGE(JERR_TOO_LITTLE_DATA, "Application transferred too few scanlines")
+JMESSAGE(JERR_UNKNOWN_MARKER, "Unsupported marker type 0x%02x")
+JMESSAGE(JERR_VIRTUAL_BUG, "Virtual array controller messed up")
+JMESSAGE(JERR_WIDTH_OVERFLOW, "Image too wide for this implementation")
+JMESSAGE(JERR_XMS_READ, "Read from XMS failed")
+JMESSAGE(JERR_XMS_WRITE, "Write to XMS failed")
+JMESSAGE(JMSG_COPYRIGHT, JCOPYRIGHT)
+JMESSAGE(JMSG_VERSION, JVERSION)
+JMESSAGE(JTRC_16BIT_TABLES,
+	 "Caution: quantization tables are too coarse for baseline JPEG")
+JMESSAGE(JTRC_ADOBE,
+	 "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d")
+JMESSAGE(JTRC_APP0, "Unknown APP0 marker (not JFIF), length %u")
+JMESSAGE(JTRC_APP14, "Unknown APP14 marker (not Adobe), length %u")
+JMESSAGE(JTRC_DAC, "Define Arithmetic Table 0x%02x: 0x%02x")
+JMESSAGE(JTRC_DHT, "Define Huffman Table 0x%02x")
+JMESSAGE(JTRC_DQT, "Define Quantization Table %d  precision %d")
+JMESSAGE(JTRC_DRI, "Define Restart Interval %u")
+JMESSAGE(JTRC_EMS_CLOSE, "Freed EMS handle %u")
+JMESSAGE(JTRC_EMS_OPEN, "Obtained EMS handle %u")
+JMESSAGE(JTRC_EOI, "End Of Image")
+JMESSAGE(JTRC_HUFFBITS, "        %3d %3d %3d %3d %3d %3d %3d %3d")
+JMESSAGE(JTRC_JFIF, "JFIF APP0 marker: version %d.%02d, density %dx%d  %d")
+JMESSAGE(JTRC_JFIF_BADTHUMBNAILSIZE,
+	 "Warning: thumbnail image size does not match data length %u")
+JMESSAGE(JTRC_JFIF_EXTENSION,
+	 "JFIF extension marker: type 0x%02x, length %u")
+JMESSAGE(JTRC_JFIF_THUMBNAIL, "    with %d x %d thumbnail image")
+JMESSAGE(JTRC_MISC_MARKER, "Miscellaneous marker 0x%02x, length %u")
+JMESSAGE(JTRC_PARMLESS_MARKER, "Unexpected marker 0x%02x")
+JMESSAGE(JTRC_QUANTVALS, "        %4u %4u %4u %4u %4u %4u %4u %4u")
+JMESSAGE(JTRC_QUANT_3_NCOLORS, "Quantizing to %d = %d*%d*%d colors")
+JMESSAGE(JTRC_QUANT_NCOLORS, "Quantizing to %d colors")
+JMESSAGE(JTRC_QUANT_SELECTED, "Selected %d colors for quantization")
+JMESSAGE(JTRC_RECOVERY_ACTION, "At marker 0x%02x, recovery action %d")
+JMESSAGE(JTRC_RST, "RST%d")
+JMESSAGE(JTRC_SMOOTH_NOTIMPL,
+	 "Smoothing not supported with nonstandard sampling ratios")
+JMESSAGE(JTRC_SOF, "Start Of Frame 0x%02x: width=%u, height=%u, components=%d")
+JMESSAGE(JTRC_SOF_COMPONENT, "    Component %d: %dhx%dv q=%d")
+JMESSAGE(JTRC_SOI, "Start of Image")
+JMESSAGE(JTRC_SOS, "Start Of Scan: %d components")
+JMESSAGE(JTRC_SOS_COMPONENT, "    Component %d: dc=%d ac=%d")
+JMESSAGE(JTRC_SOS_PARAMS, "  Ss=%d, Se=%d, Ah=%d, Al=%d")
+JMESSAGE(JTRC_TFILE_CLOSE, "Closed temporary file %s")
+JMESSAGE(JTRC_TFILE_OPEN, "Opened temporary file %s")
+JMESSAGE(JTRC_THUMB_JPEG,
+	 "JFIF extension marker: JPEG-compressed thumbnail image, length %u")
+JMESSAGE(JTRC_THUMB_PALETTE,
+	 "JFIF extension marker: palette thumbnail image, length %u")
+JMESSAGE(JTRC_THUMB_RGB,
+	 "JFIF extension marker: RGB thumbnail image, length %u")
+JMESSAGE(JTRC_UNKNOWN_IDS,
+	 "Unrecognized component IDs %d %d %d, assuming YCbCr")
+JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
+JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
+JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
+JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
+JMESSAGE(JWRN_BOGUS_PROGRESSION,
+	 "Inconsistent progression sequence for component %d coefficient %d")
+JMESSAGE(JWRN_EXTRANEOUS_DATA,
+	 "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x")
+JMESSAGE(JWRN_HIT_MARKER, "Corrupt JPEG data: premature end of data segment")
+JMESSAGE(JWRN_HUFF_BAD_CODE, "Corrupt JPEG data: bad Huffman code")
+JMESSAGE(JWRN_JFIF_MAJOR, "Warning: unknown JFIF revision number %d.%02d")
+JMESSAGE(JWRN_JPEG_EOF, "Premature end of JPEG file")
+JMESSAGE(JWRN_MUST_RESYNC,
+	 "Corrupt JPEG data: found marker 0x%02x instead of RST%d")
+JMESSAGE(JWRN_NOT_SEQUENTIAL, "Invalid SOS parameters for sequential JPEG")
+JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines")
+
+#ifdef JMAKE_ENUM_LIST
+
+  JMSG_LASTMSGCODE
+} J_MESSAGE_CODE;
+
+#undef JMAKE_ENUM_LIST
+#endif /* JMAKE_ENUM_LIST */
+
+/* Zap JMESSAGE macro so that future re-inclusions do nothing by default */
+#undef JMESSAGE
+
+
+#ifndef JERROR_H
+#define JERROR_H
+
+/* Macros to simplify using the error and trace message stuff */
+/* The first parameter is either type of cinfo pointer */
+
+/* Fatal errors (print message and exit) */
+#define ERREXIT(cinfo,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT1(cinfo,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT2(cinfo,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT3(cinfo,code,p1,p2,p3)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT4(cinfo,code,p1,p2,p3,p4)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (cinfo)->err->msg_parm.i[3] = (p4), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT6(cinfo,code,p1,p2,p3,p4,p5,p6)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (cinfo)->err->msg_parm.i[3] = (p4), \
+   (cinfo)->err->msg_parm.i[4] = (p5), \
+   (cinfo)->err->msg_parm.i[5] = (p6), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXITS(cinfo,code,str)  \
+  ((cinfo)->err->msg_code = (code), \
+   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+
+#define MAKESTMT(stuff)		do { stuff } while (0)
+
+/* Nonfatal errors (we can keep going, but the data is probably corrupt) */
+#define WARNMS(cinfo,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+#define WARNMS1(cinfo,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+#define WARNMS2(cinfo,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+
+/* Informational/debugging messages */
+#define TRACEMS(cinfo,lvl,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS1(cinfo,lvl,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS2(cinfo,lvl,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS3(cinfo,lvl,code,p1,p2,p3)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS4(cinfo,lvl,code,p1,p2,p3,p4)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS5(cinfo,lvl,code,p1,p2,p3,p4,p5)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   _mp[4] = (p5); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS8(cinfo,lvl,code,p1,p2,p3,p4,p5,p6,p7,p8)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMSS(cinfo,lvl,code,str)  \
+  ((cinfo)->err->msg_code = (code), \
+   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+
+#endif /* JERROR_H */
diff --git a/cmake/externals/libjpeg/jfdctflt.c b/cmake/externals/libjpeg/jfdctflt.c
new file mode 100644
index 000000000..013f29e0c
--- /dev/null
+++ b/cmake/externals/libjpeg/jfdctflt.c
@@ -0,0 +1,176 @@
+/*
+ * jfdctflt.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2003-2017 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a floating-point implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * This implementation should be more accurate than either of the integer
+ * DCT implementations.  However, it may not give the same results on all
+ * machines because of differences in roundoff behavior.  Speed will depend
+ * on the hardware's floating point capacity.
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with a fixed-point
+ * implementation, accuracy is lost due to imprecise representation of the
+ * scaled quantization values.  However, that problem does not arise if
+ * we use floating point arithmetic.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
+#endif
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ *
+ * cK represents cos(K*pi/16).
+ */
+
+GLOBAL(void)
+jpeg_fdct_float (FAST_FLOAT * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
+  FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;
+  FAST_FLOAT *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+
+  /* Pass 1: process rows. */
+
+  dataptr = data;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Load data into workspace */
+    tmp0 = (FAST_FLOAT) (GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]));
+    tmp7 = (FAST_FLOAT) (GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]));
+    tmp1 = (FAST_FLOAT) (GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]));
+    tmp6 = (FAST_FLOAT) (GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]));
+    tmp2 = (FAST_FLOAT) (GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]));
+    tmp5 = (FAST_FLOAT) (GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]));
+    tmp3 = (FAST_FLOAT) (GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]));
+    tmp4 = (FAST_FLOAT) (GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]));
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = tmp10 + tmp11 - 8 * CENTERJSAMPLE; /* phase 3 */
+    dataptr[4] = tmp10 - tmp11;
+
+    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
+    dataptr[2] = tmp13 + z1;	/* phase 5 */
+    dataptr[6] = tmp13 - z1;
+
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
+    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
+    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
+    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[5] = z13 + z2;	/* phase 6 */
+    dataptr[3] = z13 - z2;
+    dataptr[1] = z11 + z4;
+    dataptr[7] = z11 - z4;
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[DCTSIZE*4] = tmp10 - tmp11;
+
+    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
+    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
+    dataptr[DCTSIZE*6] = tmp13 - z1;
+
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
+    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
+    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
+    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
+    dataptr[DCTSIZE*3] = z13 - z2;
+    dataptr[DCTSIZE*1] = z11 + z4;
+    dataptr[DCTSIZE*7] = z11 - z4;
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jfdctfst.c b/cmake/externals/libjpeg/jfdctfst.c
new file mode 100644
index 000000000..5e4e01722
--- /dev/null
+++ b/cmake/externals/libjpeg/jfdctfst.c
@@ -0,0 +1,232 @@
+/*
+ * jfdctfst.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2003-2017 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a fast, not so accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with fixed-point math,
+ * accuracy is lost due to imprecise representation of the scaled
+ * quantization values.  The smaller the quantization table entry, the less
+ * precise the scaled value, so this implementation does worse with high-
+ * quality-setting files than with low-quality ones.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_IFAST_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
+#endif
+
+
+/* Scaling decisions are generally the same as in the LL&M algorithm;
+ * see jfdctint.c for more details.  However, we choose to descale
+ * (right shift) multiplication products as soon as they are formed,
+ * rather than carrying additional fractional bits into subsequent additions.
+ * This compromises accuracy slightly, but it lets us save a few shifts.
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
+ * everywhere except in the multiplications proper; this saves a good deal
+ * of work on 16-bit-int machines.
+ *
+ * Again to save a few shifts, the intermediate results between pass 1 and
+ * pass 2 are not upscaled, but are represented only to integral precision.
+ *
+ * A final compromise is to represent the multiplicative constants to only
+ * 8 fractional bits, rather than 13.  This saves some shifting work on some
+ * machines, and may also reduce the cost of multiplication (since there
+ * are fewer one-bits in the constants).
+ */
+
+#define CONST_BITS  8
+
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 8
+#define FIX_0_382683433  ((INT32)   98)		/* FIX(0.382683433) */
+#define FIX_0_541196100  ((INT32)  139)		/* FIX(0.541196100) */
+#define FIX_0_707106781  ((INT32)  181)		/* FIX(0.707106781) */
+#define FIX_1_306562965  ((INT32)  334)		/* FIX(1.306562965) */
+#else
+#define FIX_0_382683433  FIX(0.382683433)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_707106781  FIX(0.707106781)
+#define FIX_1_306562965  FIX(1.306562965)
+#endif
+
+
+/* We can gain a little more speed, with a further compromise in accuracy,
+ * by omitting the addition in a descaling shift.  This yields an incorrectly
+ * rounded result half the time...
+ */
+
+#ifndef USE_ACCURATE_ROUNDING
+#undef DESCALE
+#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/* Multiply a DCTELEM variable by an INT32 constant, and immediately
+ * descale to yield a DCTELEM result.
+ */
+
+#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ *
+ * cK represents cos(K*pi/16).
+ */
+
+GLOBAL(void)
+jpeg_fdct_ifast (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  DCTELEM tmp10, tmp11, tmp12, tmp13;
+  DCTELEM z1, z2, z3, z4, z5, z11, z13;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+
+  dataptr = data;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Load data into workspace */
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
+    tmp7 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
+    tmp6 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
+    tmp5 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
+    tmp4 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = tmp10 + tmp11 - 8 * CENTERJSAMPLE; /* phase 3 */
+    dataptr[4] = tmp10 - tmp11;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[2] = tmp13 + z1;	/* phase 5 */
+    dataptr[6] = tmp13 - z1;
+
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[5] = z13 + z2;	/* phase 6 */
+    dataptr[3] = z13 - z2;
+    dataptr[1] = z11 + z4;
+    dataptr[7] = z11 - z4;
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[DCTSIZE*4] = tmp10 - tmp11;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
+    dataptr[DCTSIZE*6] = tmp13 - z1;
+
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
+    dataptr[DCTSIZE*3] = z13 - z2;
+    dataptr[DCTSIZE*1] = z11 + z4;
+    dataptr[DCTSIZE*7] = z11 - z4;
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+#endif /* DCT_IFAST_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jfdctint.c b/cmake/externals/libjpeg/jfdctint.c
new file mode 100644
index 000000000..05df4750b
--- /dev/null
+++ b/cmake/externals/libjpeg/jfdctint.c
@@ -0,0 +1,4415 @@
+/*
+ * jfdctint.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modification developed 2003-2018 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a slow-but-accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ *
+ * We also provide FDCT routines with various input sample block sizes for
+ * direct resolution reduction or enlargement and for direct resolving the
+ * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
+ * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 output DCT block.
+ *
+ * For N<8 we fill the remaining block coefficients with zero.
+ * For N>8 we apply a partial N-point FDCT on the input samples, computing
+ * just the lower 8 frequency coefficients and discarding the rest.
+ *
+ * We must scale the output coefficients of the N-point FDCT appropriately
+ * to the standard 8-point FDCT level by 8/N per 1-D pass.  This scaling
+ * is folded into the constant multipliers (pass 2) and/or final/initial
+ * shifting.
+ *
+ * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
+ * since there would be too many additional constants to pre-calculate.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_ISLOW_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
+#endif
+
+
+/*
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true DCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D DCT,
+ * because the y0 and y4 outputs need not be divided by sqrt(N).
+ * In the IJG code, this factor of 8 is removed by the quantization step
+ * (in jcdctmgr.c), NOT in this module.
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (For 12-bit sample data, the intermediate
+ * array is INT32 anyway.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
+#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
+#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
+#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
+#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
+#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
+#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
+#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
+#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
+#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
+#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
+#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
+#else
+#define FIX_0_298631336  FIX(0.298631336)
+#define FIX_0_390180644  FIX(0.390180644)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_175875602  FIX(1.175875602)
+#define FIX_1_501321110  FIX(1.501321110)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_1_961570560  FIX(1.961570560)
+#define FIX_2_053119869  FIX(2.053119869)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_072711026  FIX(3.072711026)
+#endif
+
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * cK represents sqrt(2) * cos(K*pi/16).
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "c1" should be "c6".
+     */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
+
+    tmp10 = tmp0 + tmp3;
+    tmp12 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp13 = tmp1 - tmp2;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
+    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    dataptr[2] = (DCTELEM)
+      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM)
+      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS-PASS1_BITS);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * i0..i3 in the paper are tmp0..tmp3 here.
+     */
+
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
+    tmp12 += z1;
+    tmp13 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * cK represents sqrt(2) * cos(K*pi/16).
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "c1" should be "c6".
+     */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+
+    /* Add fudge factor here for final descale. */
+    tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
+    tmp12 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp13 = tmp1 - tmp2;
+
+    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
+
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS+PASS1_BITS);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * i0..i3 in the paper are tmp0..tmp3 here.
+     */
+
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
+
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
+    tmp12 += z1;
+    tmp13 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+#ifdef DCT_SCALING_SUPPORTED
+
+
+/*
+ * Perform the forward DCT on a 7x7 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * cK represents sqrt(2) * cos(K*pi/14).
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 7; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
+    tmp3 = GETJSAMPLE(elemptr[3]);
+
+    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
+    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
+    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
+
+    z1 = tmp0 + tmp2;
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
+    tmp3 += tmp3;
+    z1 -= tmp3;
+    z1 -= tmp3;
+    z1 = MULTIPLY(z1, FIX(0.353553391));                /* (c2+c6-c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002));       /* (c2+c4-c6)/2 */
+    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123));       /* c6 */
+    dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
+    z1 -= z2;
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734));       /* c4 */
+    dataptr[4] = (DCTELEM)
+      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347));   /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339));   /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
+    tmp1 += tmp2;
+    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268));   /* c5 */
+    tmp0 += tmp3;
+    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693));   /* c3+c1-c5 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/7)**2 = 64/49, which we fold
+   * into the constant multipliers:
+   * cK now represents sqrt(2) * cos(K*pi/14) * 64/49.
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 7; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
+    tmp3 = dataptr[DCTSIZE*3];
+
+    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
+    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
+    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
+
+    z1 = tmp0 + tmp2;
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
+	      CONST_BITS+PASS1_BITS);
+    tmp3 += tmp3;
+    z1 -= tmp3;
+    z1 -= tmp3;
+    z1 = MULTIPLY(z1, FIX(0.461784020));                /* (c2+c6-c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084));       /* (c2+c4-c6)/2 */
+    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446));       /* c6 */
+    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS);
+    z1 -= z2;
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509));       /* c4 */
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677));   /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464));   /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
+    tmp1 += tmp2;
+    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310));   /* c5 */
+    tmp0 += tmp3;
+    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355));   /* c3+c1-c5 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 6x6 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2;
+  INT32 tmp10, tmp11, tmp12;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * cK represents sqrt(2) * cos(K*pi/12).
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 6; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
+    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
+
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
+    dataptr[2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
+	      CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
+		    CONST_BITS-PASS1_BITS);
+
+    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
+    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
+    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/6)**2 = 16/9, which we fold
+   * into the constant multipliers:
+   * cK now represents sqrt(2) * cos(K*pi/12) * 16/9.
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 6; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
+    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
+
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+
+    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
+    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
+	      CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 5x5 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2;
+  INT32 tmp10, tmp11;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * cK represents sqrt(2) * cos(K*pi/10).
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 5; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
+    tmp2 = GETJSAMPLE(elemptr[2]);
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1));
+    tmp11 = MULTIPLY(tmp11, FIX(0.790569415));          /* (c2+c4)/2 */
+    tmp10 -= tmp2 << 2;
+    tmp10 = MULTIPLY(tmp10, FIX(0.353553391));          /* (c2-c4)/2 */
+    dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS-1);
+    dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS-1);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876));    /* c3 */
+
+    dataptr[1] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
+	      CONST_BITS-PASS1_BITS-1);
+    dataptr[3] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
+	      CONST_BITS-PASS1_BITS-1);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/5)**2 = 64/25, which we partially
+   * fold into the constant multipliers (other part was done in pass 1):
+   * cK now represents sqrt(2) * cos(K*pi/10) * 32/25.
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 5; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
+    tmp2 = dataptr[DCTSIZE*2];
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+
+    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
+    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)),        /* 32/25 */
+	      CONST_BITS+PASS1_BITS);
+    tmp11 = MULTIPLY(tmp11, FIX(1.011928851));          /* (c2+c4)/2 */
+    tmp10 -= tmp2 << 2;
+    tmp10 = MULTIPLY(tmp10, FIX(0.452548340));          /* (c2-c4)/2 */
+    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961));    /* c3 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
+	      CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 4x4 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1;
+  INT32 tmp10, tmp11;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We must also scale the output by (8/4)**2 = 2**2, which we add here.
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 4; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
+
+    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
+    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2));
+    dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2));
+
+    /* Odd part */
+
+    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-3);
+
+    dataptr[1] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS-PASS1_BITS-2);
+    dataptr[3] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS-PASS1_BITS-2);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 4; ctr++) {
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
+
+    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
+    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
+
+    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
+    dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
+
+    /* Odd part */
+
+    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 3x3 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We scale the results further by 2**2 as part of output adaption
+   * scaling for different DCT size.
+   * cK represents sqrt(2) * cos(K*pi/6).
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 3; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
+    tmp1 = GETJSAMPLE(elemptr[1]);
+
+    tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2));
+    dataptr[2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
+	      CONST_BITS-PASS1_BITS-2);
+
+    /* Odd part */
+
+    dataptr[1] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp2, FIX(1.224744871)),               /* c1 */
+	      CONST_BITS-PASS1_BITS-2);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/3)**2 = 64/9, which we partially
+   * fold into the constant multipliers (other part was done in pass 1):
+   * cK now represents sqrt(2) * cos(K*pi/6) * 16/9.
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 3; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
+    tmp1 = dataptr[DCTSIZE*1];
+
+    tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),        /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
+	      CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp2, FIX(2.177324216)),               /* c1 */
+	      CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 2x2 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  DCTELEM tmp0, tmp1, tmp2, tmp3;
+  JSAMPROW elemptr;
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   */
+
+  /* Row 0 */
+  elemptr = sample_data[0] + start_col;
+
+  tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
+  tmp1 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
+
+  /* Row 1 */
+  elemptr = sample_data[1] + start_col;
+
+  tmp2 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
+  tmp3 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/2)**2 = 2**4.
+   */
+
+  /* Column 0 */
+  /* Apply unsigned->signed conversion. */
+  data[DCTSIZE*0] = (tmp0 + tmp2 - 4 * CENTERJSAMPLE) << 4;
+  data[DCTSIZE*1] = (tmp0 - tmp2) << 4;
+
+  /* Column 1 */
+  data[DCTSIZE*0+1] = (tmp1 + tmp3) << 4;
+  data[DCTSIZE*1+1] = (tmp1 - tmp3) << 4;
+}
+
+
+/*
+ * Perform the forward DCT on a 1x1 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  DCTELEM dcval;
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  dcval = GETJSAMPLE(sample_data[0][start_col]);
+
+  /* We leave the result scaled up by an overall factor of 8. */
+  /* We must also scale the output by (8/1)**2 = 2**6. */
+  /* Apply unsigned->signed conversion. */
+  data[0] = (dcval - CENTERJSAMPLE) << 6;
+}
+
+
+/*
+ * Perform the forward DCT on a 9x9 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2;
+  DCTELEM workspace[8];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * we scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * cK represents sqrt(2) * cos(K*pi/18).
+   */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[8]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[7]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[6]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[5]);
+    tmp4 = GETJSAMPLE(elemptr[4]);
+
+    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[8]);
+    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[7]);
+    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]);
+    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]);
+
+    z1 = tmp0 + tmp2 + tmp3;
+    z2 = tmp1 + tmp4;
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM) ((z1 + z2 - 9 * CENTERJSAMPLE) << 1);
+    dataptr[6] = (DCTELEM)
+      DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)),  /* c6 */
+	      CONST_BITS-1);
+    z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049));        /* c2 */
+    z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(0.707106781)); /* c6 */
+    dataptr[2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.083350441))    /* c4 */
+	      + z1 + z2, CONST_BITS-1);
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.245575608))    /* c8 */
+	      + z1 - z2, CONST_BITS-1);
+
+    /* Odd part */
+
+    dataptr[3] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.224744871)), /* c3 */
+	      CONST_BITS-1);
+
+    tmp11 = MULTIPLY(tmp11, FIX(1.224744871));        /* c3 */
+    tmp0 = MULTIPLY(tmp10 + tmp12, FIX(0.909038955)); /* c5 */
+    tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.483689525)); /* c7 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS-1);
+
+    tmp2 = MULTIPLY(tmp12 - tmp13, FIX(1.392728481)); /* c1 */
+
+    dataptr[5] = (DCTELEM) DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS-1);
+    dataptr[7] = (DCTELEM) DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS-1);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 9)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/9)**2 = 64/81, which we partially
+   * fold into the constant multipliers and final/initial shifting:
+   * cK now represents sqrt(2) * cos(K*pi/18) * 128/81.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*0];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*7];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*6];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*5];
+    tmp4 = dataptr[DCTSIZE*4];
+
+    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*0];
+    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*7];
+    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*6];
+    tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*5];
+
+    z1 = tmp0 + tmp2 + tmp3;
+    z2 = tmp1 + tmp4;
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(z1 + z2, FIX(1.580246914)),       /* 128/81 */
+	      CONST_BITS+2);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(MULTIPLY(z1 - z2 - z2, FIX(1.117403309)),  /* c6 */
+	      CONST_BITS+2);
+    z1 = MULTIPLY(tmp0 - tmp2, FIX(2.100031287));        /* c2 */
+    z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(1.117403309)); /* c6 */
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.711961190))    /* c4 */
+	      + z1 + z2, CONST_BITS+2);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.388070096))    /* c8 */
+	      + z1 - z2, CONST_BITS+2);
+
+    /* Odd part */
+
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.935399303)), /* c3 */
+	      CONST_BITS+2);
+
+    tmp11 = MULTIPLY(tmp11, FIX(1.935399303));        /* c3 */
+    tmp0 = MULTIPLY(tmp10 + tmp12, FIX(1.436506004)); /* c5 */
+    tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.764348879)); /* c7 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS+2);
+
+    tmp2 = MULTIPLY(tmp12 - tmp13, FIX(2.200854883)); /* c1 */
+
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS+2);
+    dataptr[DCTSIZE*7] = (DCTELEM)
+      DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS+2);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 10x10 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  DCTELEM workspace[8*2];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * we scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * cK represents sqrt(2) * cos(K*pi/20).
+   */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
+    tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
+
+    tmp10 = tmp0 + tmp4;
+    tmp13 = tmp0 - tmp4;
+    tmp11 = tmp1 + tmp3;
+    tmp14 = tmp1 - tmp3;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
+    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << 1);
+    tmp12 += tmp12;
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
+	      MULTIPLY(tmp11 - tmp12, FIX(0.437016024)),  /* c8 */
+	      CONST_BITS-1);
+    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876));    /* c6 */
+    dataptr[2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)),  /* c2-c6 */
+	      CONST_BITS-1);
+    dataptr[6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)),  /* c2+c6 */
+	      CONST_BITS-1);
+
+    /* Odd part */
+
+    tmp10 = tmp0 + tmp4;
+    tmp11 = tmp1 - tmp3;
+    dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << 1);
+    tmp2 <<= CONST_BITS;
+    dataptr[1] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) +          /* c1 */
+	      MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 +   /* c3 */
+	      MULTIPLY(tmp3, FIX(0.642039522)) +          /* c7 */
+	      MULTIPLY(tmp4, FIX(0.221231742)),           /* c9 */
+	      CONST_BITS-1);
+    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) -     /* (c3+c7)/2 */
+	    MULTIPLY(tmp1 + tmp3, FIX(0.587785252));      /* (c1-c9)/2 */
+    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) +   /* (c3-c7)/2 */
+	    (tmp11 << (CONST_BITS - 1)) - tmp2;
+    dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-1);
+    dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-1);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 10)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/10)**2 = 16/25, which we partially
+   * fold into the constant multipliers and final/initial shifting:
+   * cK now represents sqrt(2) * cos(K*pi/20) * 32/25.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
+    tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
+    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
+
+    tmp10 = tmp0 + tmp4;
+    tmp13 = tmp0 - tmp4;
+    tmp11 = tmp1 + tmp3;
+    tmp14 = tmp1 - tmp3;
+
+    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
+    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
+    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
+    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
+    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
+	      CONST_BITS+2);
+    tmp12 += tmp12;
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
+	      MULTIPLY(tmp11 - tmp12, FIX(0.559380511)),  /* c8 */
+	      CONST_BITS+2);
+    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961));    /* c6 */
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)),  /* c2-c6 */
+	      CONST_BITS+2);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)),  /* c2+c6 */
+	      CONST_BITS+2);
+
+    /* Odd part */
+
+    tmp10 = tmp0 + tmp4;
+    tmp11 = tmp1 - tmp3;
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)),  /* 32/25 */
+	      CONST_BITS+2);
+    tmp2 = MULTIPLY(tmp2, FIX(1.28));                     /* 32/25 */
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) +          /* c1 */
+	      MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 +   /* c3 */
+	      MULTIPLY(tmp3, FIX(0.821810588)) +          /* c7 */
+	      MULTIPLY(tmp4, FIX(0.283176630)),           /* c9 */
+	      CONST_BITS+2);
+    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) -     /* (c3+c7)/2 */
+	    MULTIPLY(tmp1 + tmp3, FIX(0.752365123));      /* (c1-c9)/2 */
+    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) +   /* (c3-c7)/2 */
+	    MULTIPLY(tmp11, FIX(0.64)) - tmp2;            /* 16/25 */
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+2);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+2);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on an 11x11 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 z1, z2, z3;
+  DCTELEM workspace[8*3];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * we scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * cK represents sqrt(2) * cos(K*pi/22).
+   */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[10]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[9]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[8]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[7]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[6]);
+    tmp5 = GETJSAMPLE(elemptr[5]);
+
+    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[10]);
+    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]);
+    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]);
+    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]);
+    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE) << 1);
+    tmp5 += tmp5;
+    tmp0 -= tmp5;
+    tmp1 -= tmp5;
+    tmp2 -= tmp5;
+    tmp3 -= tmp5;
+    tmp4 -= tmp5;
+    z1 = MULTIPLY(tmp0 + tmp3, FIX(1.356927976)) +       /* c2 */
+	 MULTIPLY(tmp2 + tmp4, FIX(0.201263574));        /* c10 */
+    z2 = MULTIPLY(tmp1 - tmp3, FIX(0.926112931));        /* c6 */
+    z3 = MULTIPLY(tmp0 - tmp1, FIX(1.189712156));        /* c4 */
+    dataptr[2] = (DCTELEM)
+      DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.018300590)) /* c2+c8-c6 */
+	      - MULTIPLY(tmp4, FIX(1.390975730)),        /* c4+c10 */
+	      CONST_BITS-1);
+    dataptr[4] = (DCTELEM)
+      DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.062335650)) /* c4-c6-c10 */
+	      - MULTIPLY(tmp2, FIX(1.356927976))         /* c2 */
+	      + MULTIPLY(tmp4, FIX(0.587485545)),        /* c8 */
+	      CONST_BITS-1);
+    dataptr[6] = (DCTELEM)
+      DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.620527200)) /* c2+c4-c6 */
+	      - MULTIPLY(tmp2, FIX(0.788749120)),        /* c8+c10 */
+	      CONST_BITS-1);
+
+    /* Odd part */
+
+    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.286413905));    /* c3 */
+    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.068791298));    /* c5 */
+    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.764581576));    /* c7 */
+    tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.719967871)) /* c7+c5+c3-c1 */
+	   + MULTIPLY(tmp14, FIX(0.398430003));          /* c9 */
+    tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.764581576));  /* -c7 */
+    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.399818907));  /* -c1 */
+    tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.276416582)) /* c9+c7+c1-c3 */
+	    - MULTIPLY(tmp14, FIX(1.068791298));         /* c5 */
+    tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.398430003));   /* c9 */
+    tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(1.989053629)) /* c9+c5+c3-c7 */
+	    + MULTIPLY(tmp14, FIX(1.399818907));         /* c1 */
+    tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.305598626)) /* c1+c5-c9-c7 */
+	    - MULTIPLY(tmp14, FIX(1.286413905));         /* c3 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-1);
+    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-1);
+    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-1);
+    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-1);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 11)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/11)**2 = 64/121, which we partially
+   * fold into the constant multipliers and final/initial shifting:
+   * cK now represents sqrt(2) * cos(K*pi/22) * 128/121.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*2];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*1];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*0];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*7];
+    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*6];
+    tmp5 = dataptr[DCTSIZE*5];
+
+    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*2];
+    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*1];
+    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*0];
+    tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*7];
+    tmp14 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*6];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5,
+		       FIX(1.057851240)),                /* 128/121 */
+	      CONST_BITS+2);
+    tmp5 += tmp5;
+    tmp0 -= tmp5;
+    tmp1 -= tmp5;
+    tmp2 -= tmp5;
+    tmp3 -= tmp5;
+    tmp4 -= tmp5;
+    z1 = MULTIPLY(tmp0 + tmp3, FIX(1.435427942)) +       /* c2 */
+	 MULTIPLY(tmp2 + tmp4, FIX(0.212906922));        /* c10 */
+    z2 = MULTIPLY(tmp1 - tmp3, FIX(0.979689713));        /* c6 */
+    z3 = MULTIPLY(tmp0 - tmp1, FIX(1.258538479));        /* c4 */
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.077210542)) /* c2+c8-c6 */
+	      - MULTIPLY(tmp4, FIX(1.471445400)),        /* c4+c10 */
+	      CONST_BITS+2);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.065941844)) /* c4-c6-c10 */
+	      - MULTIPLY(tmp2, FIX(1.435427942))         /* c2 */
+	      + MULTIPLY(tmp4, FIX(0.621472312)),        /* c8 */
+	      CONST_BITS+2);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.714276708)) /* c2+c4-c6 */
+	      - MULTIPLY(tmp2, FIX(0.834379234)),        /* c8+c10 */
+	      CONST_BITS+2);
+
+    /* Odd part */
+
+    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.360834544));    /* c3 */
+    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.130622199));    /* c5 */
+    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.808813568));    /* c7 */
+    tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.819470145)) /* c7+c5+c3-c1 */
+	   + MULTIPLY(tmp14, FIX(0.421479672));          /* c9 */
+    tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.808813568));  /* -c7 */
+    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.480800167));  /* -c1 */
+    tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.350258864)) /* c9+c7+c1-c3 */
+	    - MULTIPLY(tmp14, FIX(1.130622199));         /* c5 */
+    tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.421479672));   /* c9 */
+    tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(2.104122847)) /* c9+c5+c3-c7 */
+	    + MULTIPLY(tmp14, FIX(1.480800167));         /* c1 */
+    tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.381129125)) /* c1+c5-c9-c7 */
+	    - MULTIPLY(tmp14, FIX(1.360834544));         /* c3 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 12x12 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  DCTELEM workspace[8*4];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   * cK represents sqrt(2) * cos(K*pi/24).
+   */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
+    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
+
+    tmp10 = tmp0 + tmp5;
+    tmp13 = tmp0 - tmp5;
+    tmp11 = tmp1 + tmp4;
+    tmp14 = tmp1 - tmp4;
+    tmp12 = tmp2 + tmp3;
+    tmp15 = tmp2 - tmp3;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
+    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
+    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE);
+    dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15);
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
+	      CONST_BITS);
+    dataptr[2] = (DCTELEM)
+      DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
+	      CONST_BITS);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100);    /* c9 */
+    tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865);   /* c3-c9 */
+    tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065);   /* c3+c9 */
+    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054));   /* c5 */
+    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669));   /* c7 */
+    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
+	    + MULTIPLY(tmp5, FIX(0.184591911));        /* c11 */
+    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
+    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
+	    + MULTIPLY(tmp5, FIX(0.860918669));        /* c7 */
+    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
+	    - MULTIPLY(tmp5, FIX(1.121971054));        /* c5 */
+    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
+	    - MULTIPLY(tmp2 + tmp5, FIX_0_541196100);  /* c9 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 12)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/12)**2 = 4/9, which we partially
+   * fold into the constant multipliers and final shifting:
+   * cK now represents sqrt(2) * cos(K*pi/24) * 8/9.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
+    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
+    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
+    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
+
+    tmp10 = tmp0 + tmp5;
+    tmp13 = tmp0 - tmp5;
+    tmp11 = tmp1 + tmp4;
+    tmp14 = tmp1 - tmp4;
+    tmp12 = tmp2 + tmp3;
+    tmp15 = tmp2 - tmp3;
+
+    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
+    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
+    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
+    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
+    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
+    tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
+	      CONST_BITS+1);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
+	      CONST_BITS+1);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)),         /* c4 */
+	      CONST_BITS+1);
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) +        /* 8/9 */
+	      MULTIPLY(tmp13 + tmp15, FIX(1.214244803)),         /* c2 */
+	      CONST_BITS+1);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200));   /* c9 */
+    tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102));  /* c3-c9 */
+    tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502));  /* c3+c9 */
+    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603));   /* c5 */
+    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039));   /* c7 */
+    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
+	    + MULTIPLY(tmp5, FIX(0.164081699));        /* c11 */
+    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
+    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
+	    + MULTIPLY(tmp5, FIX(0.765261039));        /* c7 */
+    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
+	    - MULTIPLY(tmp5, FIX(0.997307603));        /* c5 */
+    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
+	    - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+1);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+1);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+1);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+1);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 13x13 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 z1, z2;
+  DCTELEM workspace[8*5];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   * cK represents sqrt(2) * cos(K*pi/26).
+   */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[12]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[11]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[10]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[9]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[8]);
+    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[7]);
+    tmp6 = GETJSAMPLE(elemptr[6]);
+
+    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[12]);
+    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[11]);
+    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]);
+    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]);
+    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]);
+    tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      (tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE);
+    tmp6 += tmp6;
+    tmp0 -= tmp6;
+    tmp1 -= tmp6;
+    tmp2 -= tmp6;
+    tmp3 -= tmp6;
+    tmp4 -= tmp6;
+    tmp5 -= tmp6;
+    dataptr[2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0, FIX(1.373119086)) +   /* c2 */
+	      MULTIPLY(tmp1, FIX(1.058554052)) +   /* c6 */
+	      MULTIPLY(tmp2, FIX(0.501487041)) -   /* c10 */
+	      MULTIPLY(tmp3, FIX(0.170464608)) -   /* c12 */
+	      MULTIPLY(tmp4, FIX(0.803364869)) -   /* c8 */
+	      MULTIPLY(tmp5, FIX(1.252223920)),    /* c4 */
+	      CONST_BITS);
+    z1 = MULTIPLY(tmp0 - tmp2, FIX(1.155388986)) - /* (c4+c6)/2 */
+	 MULTIPLY(tmp3 - tmp4, FIX(0.435816023)) - /* (c2-c10)/2 */
+	 MULTIPLY(tmp1 - tmp5, FIX(0.316450131));  /* (c8-c12)/2 */
+    z2 = MULTIPLY(tmp0 + tmp2, FIX(0.096834934)) - /* (c4-c6)/2 */
+	 MULTIPLY(tmp3 + tmp4, FIX(0.937303064)) + /* (c2+c10)/2 */
+	 MULTIPLY(tmp1 + tmp5, FIX(0.486914739));  /* (c8+c12)/2 */
+
+    dataptr[4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS);
+    dataptr[6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS);
+
+    /* Odd part */
+
+    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.322312651));   /* c3 */
+    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.163874945));   /* c5 */
+    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.937797057)) +  /* c7 */
+	   MULTIPLY(tmp14 + tmp15, FIX(0.338443458));   /* c11 */
+    tmp0 = tmp1 + tmp2 + tmp3 -
+	   MULTIPLY(tmp10, FIX(2.020082300)) +          /* c3+c5+c7-c1 */
+	   MULTIPLY(tmp14, FIX(0.318774355));           /* c9-c11 */
+    tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.937797057)) -  /* c7 */
+	   MULTIPLY(tmp11 + tmp12, FIX(0.338443458));   /* c11 */
+    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.163874945)); /* -c5 */
+    tmp1 += tmp4 + tmp5 +
+	    MULTIPLY(tmp11, FIX(0.837223564)) -         /* c5+c9+c11-c3 */
+	    MULTIPLY(tmp14, FIX(2.341699410));          /* c1+c7 */
+    tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.657217813)); /* -c9 */
+    tmp2 += tmp4 + tmp6 -
+	    MULTIPLY(tmp12, FIX(1.572116027)) +         /* c1+c5-c9-c11 */
+	    MULTIPLY(tmp15, FIX(2.260109708));          /* c3+c7 */
+    tmp3 += tmp5 + tmp6 +
+	    MULTIPLY(tmp13, FIX(2.205608352)) -         /* c3+c5+c9-c7 */
+	    MULTIPLY(tmp15, FIX(1.742345811));          /* c1+c11 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 13)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/13)**2 = 64/169, which we partially
+   * fold into the constant multipliers and final shifting:
+   * cK now represents sqrt(2) * cos(K*pi/26) * 128/169.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*4];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*3];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*2];
+    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*1];
+    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*0];
+    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*7];
+    tmp6 = dataptr[DCTSIZE*6];
+
+    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*4];
+    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*3];
+    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*2];
+    tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*1];
+    tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*0];
+    tmp15 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*7];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6,
+		       FIX(0.757396450)),          /* 128/169 */
+	      CONST_BITS+1);
+    tmp6 += tmp6;
+    tmp0 -= tmp6;
+    tmp1 -= tmp6;
+    tmp2 -= tmp6;
+    tmp3 -= tmp6;
+    tmp4 -= tmp6;
+    tmp5 -= tmp6;
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0, FIX(1.039995521)) +   /* c2 */
+	      MULTIPLY(tmp1, FIX(0.801745081)) +   /* c6 */
+	      MULTIPLY(tmp2, FIX(0.379824504)) -   /* c10 */
+	      MULTIPLY(tmp3, FIX(0.129109289)) -   /* c12 */
+	      MULTIPLY(tmp4, FIX(0.608465700)) -   /* c8 */
+	      MULTIPLY(tmp5, FIX(0.948429952)),    /* c4 */
+	      CONST_BITS+1);
+    z1 = MULTIPLY(tmp0 - tmp2, FIX(0.875087516)) - /* (c4+c6)/2 */
+	 MULTIPLY(tmp3 - tmp4, FIX(0.330085509)) - /* (c2-c10)/2 */
+	 MULTIPLY(tmp1 - tmp5, FIX(0.239678205));  /* (c8-c12)/2 */
+    z2 = MULTIPLY(tmp0 + tmp2, FIX(0.073342435)) - /* (c4-c6)/2 */
+	 MULTIPLY(tmp3 + tmp4, FIX(0.709910013)) + /* (c2+c10)/2 */
+	 MULTIPLY(tmp1 + tmp5, FIX(0.368787494));  /* (c8+c12)/2 */
+
+    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+1);
+    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS+1);
+
+    /* Odd part */
+
+    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.001514908));   /* c3 */
+    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(0.881514751));   /* c5 */
+    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.710284161)) +  /* c7 */
+	   MULTIPLY(tmp14 + tmp15, FIX(0.256335874));   /* c11 */
+    tmp0 = tmp1 + tmp2 + tmp3 -
+	   MULTIPLY(tmp10, FIX(1.530003162)) +          /* c3+c5+c7-c1 */
+	   MULTIPLY(tmp14, FIX(0.241438564));           /* c9-c11 */
+    tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.710284161)) -  /* c7 */
+	   MULTIPLY(tmp11 + tmp12, FIX(0.256335874));   /* c11 */
+    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(0.881514751)); /* -c5 */
+    tmp1 += tmp4 + tmp5 +
+	    MULTIPLY(tmp11, FIX(0.634110155)) -         /* c5+c9+c11-c3 */
+	    MULTIPLY(tmp14, FIX(1.773594819));          /* c1+c7 */
+    tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.497774438)); /* -c9 */
+    tmp2 += tmp4 + tmp6 -
+	    MULTIPLY(tmp12, FIX(1.190715098)) +         /* c1+c5-c9-c11 */
+	    MULTIPLY(tmp15, FIX(1.711799069));          /* c3+c7 */
+    tmp3 += tmp5 + tmp6 +
+	    MULTIPLY(tmp13, FIX(1.670519935)) -         /* c3+c5+c9-c7 */
+	    MULTIPLY(tmp15, FIX(1.319646532));          /* c1+c11 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+1);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+1);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+1);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+1);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 14x14 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  DCTELEM workspace[8*6];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   * cK represents sqrt(2) * cos(K*pi/28).
+   */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
+    tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
+    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
+    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
+
+    tmp10 = tmp0 + tmp6;
+    tmp14 = tmp0 - tmp6;
+    tmp11 = tmp1 + tmp5;
+    tmp15 = tmp1 - tmp5;
+    tmp12 = tmp2 + tmp4;
+    tmp16 = tmp2 - tmp4;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
+    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
+    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
+    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      (tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE);
+    tmp13 += tmp13;
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
+	      MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
+	      MULTIPLY(tmp12 - tmp13, FIX(0.881747734)),  /* c8 */
+	      CONST_BITS);
+
+    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686));    /* c6 */
+
+    dataptr[2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590))   /* c2-c6 */
+	      + MULTIPLY(tmp16, FIX(0.613604268)),        /* c10 */
+	      CONST_BITS);
+    dataptr[6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954))   /* c6+c10 */
+	      - MULTIPLY(tmp16, FIX(1.378756276)),        /* c2 */
+	      CONST_BITS);
+
+    /* Odd part */
+
+    tmp10 = tmp1 + tmp2;
+    tmp11 = tmp5 - tmp4;
+    dataptr[7] = (DCTELEM) (tmp0 - tmp10 + tmp3 - tmp11 - tmp6);
+    tmp3 <<= CONST_BITS;
+    tmp10 = MULTIPLY(tmp10, - FIX(0.158341681));          /* -c13 */
+    tmp11 = MULTIPLY(tmp11, FIX(1.405321284));            /* c1 */
+    tmp10 += tmp11 - tmp3;
+    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) +     /* c5 */
+	    MULTIPLY(tmp4 + tmp6, FIX(0.752406978));      /* c9 */
+    dataptr[5] = (DCTELEM)
+      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
+	      + MULTIPLY(tmp4, FIX(1.119999435)),         /* c1+c11-c9 */
+	      CONST_BITS);
+    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) +     /* c3 */
+	    MULTIPLY(tmp5 - tmp6, FIX(0.467085129));      /* c11 */
+    dataptr[3] = (DCTELEM)
+      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
+	      - MULTIPLY(tmp5, FIX(3.069855259)),         /* c1+c5+c11 */
+	      CONST_BITS);
+    dataptr[1] = (DCTELEM)
+      DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
+	      MULTIPLY(tmp0 + tmp6, FIX(1.126980169)),    /* c3+c5-c1 */
+	      CONST_BITS);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 14)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/14)**2 = 16/49, which we partially
+   * fold into the constant multipliers and final shifting:
+   * cK now represents sqrt(2) * cos(K*pi/28) * 32/49.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
+    tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
+    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
+    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
+    tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
+
+    tmp10 = tmp0 + tmp6;
+    tmp14 = tmp0 - tmp6;
+    tmp11 = tmp1 + tmp5;
+    tmp15 = tmp1 - tmp5;
+    tmp12 = tmp2 + tmp4;
+    tmp16 = tmp2 - tmp4;
+
+    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
+    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
+    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
+    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
+    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
+    tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
+		       FIX(0.653061224)),                 /* 32/49 */
+	      CONST_BITS+1);
+    tmp13 += tmp13;
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
+	      MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
+	      MULTIPLY(tmp12 - tmp13, FIX(0.575835255)),  /* c8 */
+	      CONST_BITS+1);
+
+    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570));    /* c6 */
+
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691))   /* c2-c6 */
+	      + MULTIPLY(tmp16, FIX(0.400721155)),        /* c10 */
+	      CONST_BITS+1);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725))   /* c6+c10 */
+	      - MULTIPLY(tmp16, FIX(0.900412262)),        /* c2 */
+	      CONST_BITS+1);
+
+    /* Odd part */
+
+    tmp10 = tmp1 + tmp2;
+    tmp11 = tmp5 - tmp4;
+    dataptr[DCTSIZE*7] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
+		       FIX(0.653061224)),                 /* 32/49 */
+	      CONST_BITS+1);
+    tmp3  = MULTIPLY(tmp3 , FIX(0.653061224));            /* 32/49 */
+    tmp10 = MULTIPLY(tmp10, - FIX(0.103406812));          /* -c13 */
+    tmp11 = MULTIPLY(tmp11, FIX(0.917760839));            /* c1 */
+    tmp10 += tmp11 - tmp3;
+    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) +     /* c5 */
+	    MULTIPLY(tmp4 + tmp6, FIX(0.491367823));      /* c9 */
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
+	      + MULTIPLY(tmp4, FIX(0.731428202)),         /* c1+c11-c9 */
+	      CONST_BITS+1);
+    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) +     /* c3 */
+	    MULTIPLY(tmp5 - tmp6, FIX(0.305035186));      /* c11 */
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
+	      - MULTIPLY(tmp5, FIX(2.004803435)),         /* c1+c5+c11 */
+	      CONST_BITS+1);
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(tmp11 + tmp12 + tmp3
+	      - MULTIPLY(tmp0, FIX(0.735987049))          /* c3+c5-c1 */
+	      - MULTIPLY(tmp6, FIX(0.082925825)),         /* c9-c11-c13 */
+	      CONST_BITS+1);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 15x15 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 z1, z2, z3;
+  DCTELEM workspace[8*7];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   * cK represents sqrt(2) * cos(K*pi/30).
+   */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[14]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[13]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[12]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[11]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[10]);
+    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[9]);
+    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[8]);
+    tmp7 = GETJSAMPLE(elemptr[7]);
+
+    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[14]);
+    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[13]);
+    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[12]);
+    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[11]);
+    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[10]);
+    tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[9]);
+    tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]);
+
+    z1 = tmp0 + tmp4 + tmp5;
+    z2 = tmp1 + tmp3 + tmp6;
+    z3 = tmp2 + tmp7;
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE);
+    z3 += z3;
+    dataptr[6] = (DCTELEM)
+      DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */
+	      MULTIPLY(z2 - z3, FIX(0.437016024)),  /* c12 */
+	      CONST_BITS);
+    tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
+    z1 = MULTIPLY(tmp3 - tmp2, FIX(1.531135173)) -  /* c2+c14 */
+         MULTIPLY(tmp6 - tmp2, FIX(2.238241955));   /* c4+c8 */
+    z2 = MULTIPLY(tmp5 - tmp2, FIX(0.798468008)) -  /* c8-c14 */
+	 MULTIPLY(tmp0 - tmp2, FIX(0.091361227));   /* c2-c4 */
+    z3 = MULTIPLY(tmp0 - tmp3, FIX(1.383309603)) +  /* c2 */
+	 MULTIPLY(tmp6 - tmp5, FIX(0.946293579)) +  /* c8 */
+	 MULTIPLY(tmp1 - tmp4, FIX(0.790569415));   /* (c6+c12)/2 */
+
+    dataptr[2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS);
+    dataptr[4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS);
+
+    /* Odd part */
+
+    tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
+		    FIX(1.224744871));                         /* c5 */
+    tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.344997024)) + /* c3 */
+	   MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.831253876));  /* c9 */
+    tmp12 = MULTIPLY(tmp12, FIX(1.224744871));                 /* c5 */
+    tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.406466353)) +         /* c1 */
+	   MULTIPLY(tmp11 + tmp14, FIX(1.344997024)) +         /* c3 */
+	   MULTIPLY(tmp13 + tmp15, FIX(0.575212477));          /* c11 */
+    tmp0 = MULTIPLY(tmp13, FIX(0.475753014)) -                 /* c7-c11 */
+	   MULTIPLY(tmp14, FIX(0.513743148)) +                 /* c3-c9 */
+	   MULTIPLY(tmp16, FIX(1.700497885)) + tmp4 + tmp12;   /* c1+c13 */
+    tmp3 = MULTIPLY(tmp10, - FIX(0.355500862)) -               /* -(c1-c7) */
+	   MULTIPLY(tmp11, FIX(2.176250899)) -                 /* c3+c9 */
+	   MULTIPLY(tmp15, FIX(0.869244010)) + tmp4 - tmp12;   /* c11+c13 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 15)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/15)**2 = 64/225, which we partially
+   * fold into the constant multipliers and final shifting:
+   * cK now represents sqrt(2) * cos(K*pi/30) * 256/225.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*6];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*5];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*4];
+    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*3];
+    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*2];
+    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*1];
+    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*0];
+    tmp7 = dataptr[DCTSIZE*7];
+
+    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*6];
+    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*5];
+    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*4];
+    tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*3];
+    tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*2];
+    tmp15 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*1];
+    tmp16 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*0];
+
+    z1 = tmp0 + tmp4 + tmp5;
+    z2 = tmp1 + tmp3 + tmp6;
+    z3 = tmp2 + tmp7;
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(z1 + z2 + z3, FIX(1.137777778)), /* 256/225 */
+	      CONST_BITS+2);
+    z3 += z3;
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(MULTIPLY(z1 - z3, FIX(1.301757503)) - /* c6 */
+	      MULTIPLY(z2 - z3, FIX(0.497227121)),  /* c12 */
+	      CONST_BITS+2);
+    tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
+    z1 = MULTIPLY(tmp3 - tmp2, FIX(1.742091575)) -  /* c2+c14 */
+         MULTIPLY(tmp6 - tmp2, FIX(2.546621957));   /* c4+c8 */
+    z2 = MULTIPLY(tmp5 - tmp2, FIX(0.908479156)) -  /* c8-c14 */
+	 MULTIPLY(tmp0 - tmp2, FIX(0.103948774));   /* c2-c4 */
+    z3 = MULTIPLY(tmp0 - tmp3, FIX(1.573898926)) +  /* c2 */
+	 MULTIPLY(tmp6 - tmp5, FIX(1.076671805)) +  /* c8 */
+	 MULTIPLY(tmp1 - tmp4, FIX(0.899492312));   /* (c6+c12)/2 */
+
+    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS+2);
+    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS+2);
+
+    /* Odd part */
+
+    tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
+		    FIX(1.393487498));                         /* c5 */
+    tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.530307725)) + /* c3 */
+	   MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.945782187));  /* c9 */
+    tmp12 = MULTIPLY(tmp12, FIX(1.393487498));                 /* c5 */
+    tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.600246161)) +         /* c1 */
+	   MULTIPLY(tmp11 + tmp14, FIX(1.530307725)) +         /* c3 */
+	   MULTIPLY(tmp13 + tmp15, FIX(0.654463974));          /* c11 */
+    tmp0 = MULTIPLY(tmp13, FIX(0.541301207)) -                 /* c7-c11 */
+	   MULTIPLY(tmp14, FIX(0.584525538)) +                 /* c3-c9 */
+	   MULTIPLY(tmp16, FIX(1.934788705)) + tmp4 + tmp12;   /* c1+c13 */
+    tmp3 = MULTIPLY(tmp10, - FIX(0.404480980)) -               /* -(c1-c7) */
+	   MULTIPLY(tmp11, FIX(2.476089912)) -                 /* c3+c9 */
+	   MULTIPLY(tmp15, FIX(0.989006518)) + tmp4 - tmp12;   /* c11+c13 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 16x16 sample block.
+ */
+
+GLOBAL(void)
+jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
+  DCTELEM workspace[DCTSIZE2];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * cK represents sqrt(2) * cos(K*pi/32).
+   */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
+    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
+    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
+    tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
+
+    tmp10 = tmp0 + tmp7;
+    tmp14 = tmp0 - tmp7;
+    tmp11 = tmp1 + tmp6;
+    tmp15 = tmp1 - tmp6;
+    tmp12 = tmp2 + tmp5;
+    tmp16 = tmp2 - tmp5;
+    tmp13 = tmp3 + tmp4;
+    tmp17 = tmp3 - tmp4;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
+    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
+    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
+    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
+    tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
+	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
+	      CONST_BITS-PASS1_BITS);
+
+    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
+	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
+
+    dataptr[2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
+	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
+	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
+	      CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
+	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
+    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
+	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
+    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
+	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
+    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
+	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
+    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
+	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
+    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
+	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
+	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
+    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
+	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
+    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
+	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
+    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
+	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == DCTSIZE * 2)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/16)**2 = 1/2**2.
+   * cK represents sqrt(2) * cos(K*pi/32).
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
+    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
+    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
+    tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
+
+    tmp10 = tmp0 + tmp7;
+    tmp14 = tmp0 - tmp7;
+    tmp11 = tmp1 + tmp6;
+    tmp15 = tmp1 - tmp6;
+    tmp12 = tmp2 + tmp5;
+    tmp16 = tmp2 - tmp5;
+    tmp13 = tmp3 + tmp4;
+    tmp17 = tmp3 - tmp4;
+
+    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
+    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
+    tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
+    tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+2);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
+	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
+	      CONST_BITS+PASS1_BITS+2);
+
+    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
+	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
+
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
+	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+10 */
+	      CONST_BITS+PASS1_BITS+2);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
+	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
+	      CONST_BITS+PASS1_BITS+2);
+
+    /* Odd part */
+
+    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
+	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
+    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
+	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
+    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
+	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
+    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
+	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
+    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
+	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
+    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
+	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
+	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
+    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
+	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
+    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
+	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
+    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
+	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+2);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+2);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+2);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+2);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 16x8 sample block.
+ *
+ * 16-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
+  INT32 z1;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
+   */
+
+  dataptr = data;
+  ctr = 0;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
+    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
+    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
+    tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
+
+    tmp10 = tmp0 + tmp7;
+    tmp14 = tmp0 - tmp7;
+    tmp11 = tmp1 + tmp6;
+    tmp15 = tmp1 - tmp6;
+    tmp12 = tmp2 + tmp5;
+    tmp16 = tmp2 - tmp5;
+    tmp13 = tmp3 + tmp4;
+    tmp17 = tmp3 - tmp4;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
+    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
+    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
+    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
+    tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
+	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
+	      CONST_BITS-PASS1_BITS);
+
+    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
+	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
+
+    dataptr[2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
+	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
+	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
+	      CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
+	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
+    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
+	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
+    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
+	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
+    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
+	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
+    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
+	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
+    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
+	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
+	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
+    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
+	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
+    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
+	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
+    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
+	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by 8/16 = 1/2.
+   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "c1" should be "c6".
+     */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+
+    tmp10 = tmp0 + tmp3;
+    tmp12 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp13 = tmp1 - tmp2;
+
+    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+    dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1);
+    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1);
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);   /* c6 */
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+	      CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+	      CONST_BITS+PASS1_BITS+1);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * i0..i3 in the paper are tmp0..tmp3 here.
+     */
+
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);   /*  c3 */
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);      /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);      /* -c3-c5 */
+    tmp12 += z1;
+    tmp13 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);   /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);          /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);          /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);   /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);          /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);          /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+PASS1_BITS+1);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 14x7 sample block.
+ *
+ * 14-point FDCT in pass 1 (rows), 7-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 z1, z2, z3;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Zero bottom row of output coefficient block. */
+  MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 7; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
+    tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
+    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
+    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
+
+    tmp10 = tmp0 + tmp6;
+    tmp14 = tmp0 - tmp6;
+    tmp11 = tmp1 + tmp5;
+    tmp15 = tmp1 - tmp5;
+    tmp12 = tmp2 + tmp4;
+    tmp16 = tmp2 - tmp4;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
+    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
+    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
+    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE) << PASS1_BITS);
+    tmp13 += tmp13;
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
+	      MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
+	      MULTIPLY(tmp12 - tmp13, FIX(0.881747734)),  /* c8 */
+	      CONST_BITS-PASS1_BITS);
+
+    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686));    /* c6 */
+
+    dataptr[2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590))   /* c2-c6 */
+	      + MULTIPLY(tmp16, FIX(0.613604268)),        /* c10 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954))   /* c6+c10 */
+	      - MULTIPLY(tmp16, FIX(1.378756276)),        /* c2 */
+	      CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = tmp1 + tmp2;
+    tmp11 = tmp5 - tmp4;
+    dataptr[7] = (DCTELEM) ((tmp0 - tmp10 + tmp3 - tmp11 - tmp6) << PASS1_BITS);
+    tmp3 <<= CONST_BITS;
+    tmp10 = MULTIPLY(tmp10, - FIX(0.158341681));          /* -c13 */
+    tmp11 = MULTIPLY(tmp11, FIX(1.405321284));            /* c1 */
+    tmp10 += tmp11 - tmp3;
+    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) +     /* c5 */
+	    MULTIPLY(tmp4 + tmp6, FIX(0.752406978));      /* c9 */
+    dataptr[5] = (DCTELEM)
+      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
+	      + MULTIPLY(tmp4, FIX(1.119999435)),         /* c1+c11-c9 */
+	      CONST_BITS-PASS1_BITS);
+    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) +     /* c3 */
+	    MULTIPLY(tmp5 - tmp6, FIX(0.467085129));      /* c11 */
+    dataptr[3] = (DCTELEM)
+      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
+	      - MULTIPLY(tmp5, FIX(3.069855259)),         /* c1+c5+c11 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[1] = (DCTELEM)
+      DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
+	      MULTIPLY(tmp0 + tmp6, FIX(1.126980169)),    /* c3+c5-c1 */
+	      CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/14)*(8/7) = 32/49, which we
+   * partially fold into the constant multipliers and final shifting:
+   * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14) * 64/49.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
+    tmp3 = dataptr[DCTSIZE*3];
+
+    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
+    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
+    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
+
+    z1 = tmp0 + tmp2;
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
+	      CONST_BITS+PASS1_BITS+1);
+    tmp3 += tmp3;
+    z1 -= tmp3;
+    z1 -= tmp3;
+    z1 = MULTIPLY(z1, FIX(0.461784020));                /* (c2+c6-c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084));       /* (c2+c4-c6)/2 */
+    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446));       /* c6 */
+    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS+1);
+    z1 -= z2;
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509));       /* c4 */
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
+	      CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS+1);
+
+    /* Odd part */
+
+    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677));   /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464));   /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
+    tmp1 += tmp2;
+    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310));   /* c5 */
+    tmp0 += tmp3;
+    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355));   /* c3+c1-c5 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 12x6 sample block.
+ *
+ * 12-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Zero 2 bottom rows of output coefficient block. */
+  MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 6; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
+    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
+
+    tmp10 = tmp0 + tmp5;
+    tmp13 = tmp0 - tmp5;
+    tmp11 = tmp1 + tmp4;
+    tmp14 = tmp1 - tmp4;
+    tmp12 = tmp2 + tmp3;
+    tmp15 = tmp2 - tmp3;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
+    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
+    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE) << PASS1_BITS);
+    dataptr[6] = (DCTELEM) ((tmp13 - tmp14 - tmp15) << PASS1_BITS);
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[2] = (DCTELEM)
+      DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
+	      CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100);    /* c9 */
+    tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865);   /* c3-c9 */
+    tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065);   /* c3+c9 */
+    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054));   /* c5 */
+    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669));   /* c7 */
+    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
+	    + MULTIPLY(tmp5, FIX(0.184591911));        /* c11 */
+    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
+    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
+	    + MULTIPLY(tmp5, FIX(0.860918669));        /* c7 */
+    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
+	    - MULTIPLY(tmp5, FIX(1.121971054));        /* c5 */
+    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
+	    - MULTIPLY(tmp2 + tmp5, FIX_0_541196100);  /* c9 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/12)*(8/6) = 8/9, which we
+   * partially fold into the constant multipliers and final shifting:
+   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
+    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
+
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+
+    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
+    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
+	      CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
+	      CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
+	      CONST_BITS+PASS1_BITS+1);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
+	      CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
+	      CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
+	      CONST_BITS+PASS1_BITS+1);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 10x5 sample block.
+ *
+ * 10-point FDCT in pass 1 (rows), 5-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Zero 3 bottom rows of output coefficient block. */
+  MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 5; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
+    tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
+    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
+
+    tmp10 = tmp0 + tmp4;
+    tmp13 = tmp0 - tmp4;
+    tmp11 = tmp1 + tmp3;
+    tmp14 = tmp1 - tmp3;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
+    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << PASS1_BITS);
+    tmp12 += tmp12;
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
+	      MULTIPLY(tmp11 - tmp12, FIX(0.437016024)),  /* c8 */
+	      CONST_BITS-PASS1_BITS);
+    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876));    /* c6 */
+    dataptr[2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)),  /* c2-c6 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)),  /* c2+c6 */
+	      CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = tmp0 + tmp4;
+    tmp11 = tmp1 - tmp3;
+    dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << PASS1_BITS);
+    tmp2 <<= CONST_BITS;
+    dataptr[1] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) +          /* c1 */
+	      MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 +   /* c3 */
+	      MULTIPLY(tmp3, FIX(0.642039522)) +          /* c7 */
+	      MULTIPLY(tmp4, FIX(0.221231742)),           /* c9 */
+	      CONST_BITS-PASS1_BITS);
+    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) -     /* (c3+c7)/2 */
+	    MULTIPLY(tmp1 + tmp3, FIX(0.587785252));      /* (c1-c9)/2 */
+    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) +   /* (c3-c7)/2 */
+	    (tmp11 << (CONST_BITS - 1)) - tmp2;
+    dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/10)*(8/5) = 32/25, which we
+   * fold into the constant multipliers:
+   * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10) * 32/25.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
+    tmp2 = dataptr[DCTSIZE*2];
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+
+    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
+    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)),        /* 32/25 */
+	      CONST_BITS+PASS1_BITS);
+    tmp11 = MULTIPLY(tmp11, FIX(1.011928851));          /* (c2+c4)/2 */
+    tmp10 -= tmp2 << 2;
+    tmp10 = MULTIPLY(tmp10, FIX(0.452548340));          /* (c2-c4)/2 */
+    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961));    /* c3 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
+	      CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on an 8x4 sample block.
+ *
+ * 8-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Zero 4 bottom rows of output coefficient block. */
+  MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We must also scale the output by 8/4 = 2, which we add here.
+   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 4; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "c1" should be "c6".
+     */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
+
+    tmp10 = tmp0 + tmp3;
+    tmp12 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp13 = tmp1 - tmp2;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1));
+    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1));
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-2);
+
+    dataptr[2] = (DCTELEM)
+      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS-PASS1_BITS-1);
+    dataptr[6] = (DCTELEM)
+      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS-PASS1_BITS-1);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * i0..i3 in the paper are tmp0..tmp3 here.
+     */
+
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-2);
+
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
+    tmp12 += z1;
+    tmp13 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS-1);
+    dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS-1);
+    dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS-1);
+    dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS-1);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * 4-point FDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
+
+    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
+    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
+
+    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
+    dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
+
+    /* Odd part */
+
+    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 6x3 sample block.
+ *
+ * 6-point FDCT in pass 1 (rows), 3-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2;
+  INT32 tmp10, tmp11, tmp12;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 3; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
+    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
+
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1));
+    dataptr[2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
+	      CONST_BITS-PASS1_BITS-1);
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
+	      CONST_BITS-PASS1_BITS-1);
+
+    /* Odd part */
+
+    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
+		    CONST_BITS-PASS1_BITS-1);
+
+    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << (PASS1_BITS+1)));
+    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << (PASS1_BITS+1));
+    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << (PASS1_BITS+1)));
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
+   * fold into the constant multipliers (other part was done in pass 1):
+   * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6) * 16/9.
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 6; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
+    tmp1 = dataptr[DCTSIZE*1];
+
+    tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),        /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
+	      CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp2, FIX(2.177324216)),               /* c1 */
+	      CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 4x2 sample block.
+ *
+ * 4-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  DCTELEM tmp0, tmp2, tmp10, tmp12, tmp4, tmp5;
+  INT32 tmp1, tmp3, tmp11, tmp13;
+  INT32 z1, z2, z3;
+  JSAMPROW elemptr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   * 4-point FDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
+   */
+
+  /* Row 0 */
+  elemptr = sample_data[0] + start_col;
+
+  /* Even part */
+
+  tmp4 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
+  tmp5 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
+
+  tmp0 = tmp4 + tmp5;
+  tmp2 = tmp4 - tmp5;
+
+  /* Odd part */
+
+  z2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
+  z3 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
+
+  z1 = MULTIPLY(z2 + z3, FIX_0_541196100);    /* c6 */
+  /* Add fudge factor here for final descale. */
+  z1 += ONE << (CONST_BITS-3-1);
+  tmp1 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+  tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
+
+  /* Row 1 */
+  elemptr = sample_data[1] + start_col;
+
+  /* Even part */
+
+  tmp4 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
+  tmp5 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
+
+  tmp10 = tmp4 + tmp5;
+  tmp12 = tmp4 - tmp5;
+
+  /* Odd part */
+
+  z2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
+  z3 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
+
+  z1 = MULTIPLY(z2 + z3, FIX_0_541196100);    /* c6 */
+  tmp11 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+  tmp13 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/4)*(8/2) = 2**3.
+   */
+
+  /* Column 0 */
+  /* Apply unsigned->signed conversion. */
+  data[DCTSIZE*0] = (tmp0 + tmp10 - 8 * CENTERJSAMPLE) << 3;
+  data[DCTSIZE*1] = (tmp0 - tmp10) << 3;
+
+  /* Column 1 */
+  data[DCTSIZE*0+1] = (DCTELEM) RIGHT_SHIFT(tmp1 + tmp11, CONST_BITS-3);
+  data[DCTSIZE*1+1] = (DCTELEM) RIGHT_SHIFT(tmp1 - tmp11, CONST_BITS-3);
+
+  /* Column 2 */
+  data[DCTSIZE*0+2] = (tmp2 + tmp12) << 3;
+  data[DCTSIZE*1+2] = (tmp2 - tmp12) << 3;
+
+  /* Column 3 */
+  data[DCTSIZE*0+3] = (DCTELEM) RIGHT_SHIFT(tmp3 + tmp13, CONST_BITS-3);
+  data[DCTSIZE*1+3] = (DCTELEM) RIGHT_SHIFT(tmp3 - tmp13, CONST_BITS-3);
+}
+
+
+/*
+ * Perform the forward DCT on a 2x1 sample block.
+ *
+ * 2-point FDCT in pass 1 (rows), 1-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  DCTELEM tmp0, tmp1;
+  JSAMPROW elemptr;
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  elemptr = sample_data[0] + start_col;
+
+  tmp0 = GETJSAMPLE(elemptr[0]);
+  tmp1 = GETJSAMPLE(elemptr[1]);
+
+  /* We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/2)*(8/1) = 2**5.
+   */
+
+  /* Even part */
+
+  /* Apply unsigned->signed conversion. */
+  data[0] = (tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5;
+
+  /* Odd part */
+
+  data[1] = (tmp0 - tmp1) << 5;
+}
+
+
+/*
+ * Perform the forward DCT on an 8x16 sample block.
+ *
+ * 8-point FDCT in pass 1 (rows), 16-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
+  INT32 z1;
+  DCTELEM workspace[DCTSIZE2];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "c1" should be "c6".
+     */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
+    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
+
+    tmp10 = tmp0 + tmp3;
+    tmp12 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp13 = tmp1 - tmp2;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
+    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
+    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);   /* c6 */
+    dataptr[2] = (DCTELEM)
+      DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM)
+      DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+	      CONST_BITS-PASS1_BITS);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * i0..i3 in the paper are tmp0..tmp3 here.
+     */
+
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);   /*  c3 */
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);      /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);      /* -c3-c5 */
+    tmp12 += z1;
+    tmp13 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);   /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);          /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);          /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);   /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);          /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);          /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-PASS1_BITS);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == DCTSIZE * 2)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by 8/16 = 1/2.
+   * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
+    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
+    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
+    tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
+
+    tmp10 = tmp0 + tmp7;
+    tmp14 = tmp0 - tmp7;
+    tmp11 = tmp1 + tmp6;
+    tmp15 = tmp1 - tmp6;
+    tmp12 = tmp2 + tmp5;
+    tmp16 = tmp2 - tmp5;
+    tmp13 = tmp3 + tmp4;
+    tmp17 = tmp3 - tmp4;
+
+    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
+    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
+    tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
+    tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+1);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
+	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
+	      CONST_BITS+PASS1_BITS+1);
+
+    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
+	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
+
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
+	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
+	      CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
+	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
+	      CONST_BITS+PASS1_BITS+1);
+
+    /* Odd part */
+
+    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
+	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
+    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
+	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
+    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
+	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
+    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
+	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
+    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
+	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
+    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
+	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
+	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
+    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
+	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
+    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
+	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
+    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
+	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+1);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 7x14 sample block.
+ *
+ * 7-point FDCT in pass 1 (rows), 14-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 z1, z2, z3;
+  DCTELEM workspace[8*6];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
+   */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
+    tmp3 = GETJSAMPLE(elemptr[3]);
+
+    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
+    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
+    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
+
+    z1 = tmp0 + tmp2;
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
+    tmp3 += tmp3;
+    z1 -= tmp3;
+    z1 -= tmp3;
+    z1 = MULTIPLY(z1, FIX(0.353553391));                /* (c2+c6-c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002));       /* (c2+c4-c6)/2 */
+    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123));       /* c6 */
+    dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
+    z1 -= z2;
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734));       /* c4 */
+    dataptr[4] = (DCTELEM)
+      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347));   /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339));   /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
+    tmp1 += tmp2;
+    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268));   /* c5 */
+    tmp0 += tmp3;
+    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693));   /* c3+c1-c5 */
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 14)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/7)*(8/14) = 32/49, which we
+   * fold into the constant multipliers:
+   * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28) * 32/49.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
+    tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
+    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
+    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
+    tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
+
+    tmp10 = tmp0 + tmp6;
+    tmp14 = tmp0 - tmp6;
+    tmp11 = tmp1 + tmp5;
+    tmp15 = tmp1 - tmp5;
+    tmp12 = tmp2 + tmp4;
+    tmp16 = tmp2 - tmp4;
+
+    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
+    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
+    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
+    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
+    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
+    tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
+		       FIX(0.653061224)),                 /* 32/49 */
+	      CONST_BITS+PASS1_BITS);
+    tmp13 += tmp13;
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
+	      MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
+	      MULTIPLY(tmp12 - tmp13, FIX(0.575835255)),  /* c8 */
+	      CONST_BITS+PASS1_BITS);
+
+    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570));    /* c6 */
+
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691))   /* c2-c6 */
+	      + MULTIPLY(tmp16, FIX(0.400721155)),        /* c10 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725))   /* c6+c10 */
+	      - MULTIPLY(tmp16, FIX(0.900412262)),        /* c2 */
+	      CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = tmp1 + tmp2;
+    tmp11 = tmp5 - tmp4;
+    dataptr[DCTSIZE*7] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
+		       FIX(0.653061224)),                 /* 32/49 */
+	      CONST_BITS+PASS1_BITS);
+    tmp3  = MULTIPLY(tmp3 , FIX(0.653061224));            /* 32/49 */
+    tmp10 = MULTIPLY(tmp10, - FIX(0.103406812));          /* -c13 */
+    tmp11 = MULTIPLY(tmp11, FIX(0.917760839));            /* c1 */
+    tmp10 += tmp11 - tmp3;
+    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) +     /* c5 */
+	    MULTIPLY(tmp4 + tmp6, FIX(0.491367823));      /* c9 */
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
+	      + MULTIPLY(tmp4, FIX(0.731428202)),         /* c1+c11-c9 */
+	      CONST_BITS+PASS1_BITS);
+    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) +     /* c3 */
+	    MULTIPLY(tmp5 - tmp6, FIX(0.305035186));      /* c11 */
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
+	      - MULTIPLY(tmp5, FIX(2.004803435)),         /* c1+c5+c11 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(tmp11 + tmp12 + tmp3
+	      - MULTIPLY(tmp0, FIX(0.735987049))          /* c3+c5-c1 */
+	      - MULTIPLY(tmp6, FIX(0.082925825)),         /* c9-c11-c13 */
+	      CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 6x12 sample block.
+ *
+ * 6-point FDCT in pass 1 (rows), 12-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  DCTELEM workspace[8*4];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
+    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
+    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
+
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
+    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
+    dataptr[2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
+	      CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
+		    CONST_BITS-PASS1_BITS);
+
+    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
+    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
+    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 12)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/6)*(8/12) = 8/9, which we
+   * fold into the constant multipliers:
+   * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24) * 8/9.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
+    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
+    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
+    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
+    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
+
+    tmp10 = tmp0 + tmp5;
+    tmp13 = tmp0 - tmp5;
+    tmp11 = tmp1 + tmp4;
+    tmp14 = tmp1 - tmp4;
+    tmp12 = tmp2 + tmp3;
+    tmp15 = tmp2 - tmp3;
+
+    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
+    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
+    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
+    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
+    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
+    tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)),         /* c4 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) +        /* 8/9 */
+	      MULTIPLY(tmp13 + tmp15, FIX(1.214244803)),         /* c2 */
+	      CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200));   /* c9 */
+    tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102));  /* c3-c9 */
+    tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502));  /* c3+c9 */
+    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603));   /* c5 */
+    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039));   /* c7 */
+    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
+	    + MULTIPLY(tmp5, FIX(0.164081699));        /* c11 */
+    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
+    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
+	    + MULTIPLY(tmp5, FIX(0.765261039));        /* c7 */
+    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
+	    - MULTIPLY(tmp5, FIX(0.997307603));        /* c5 */
+    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
+	    - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 5x10 sample block.
+ *
+ * 5-point FDCT in pass 1 (rows), 10-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  DCTELEM workspace[8*2];
+  DCTELEM *dataptr;
+  DCTELEM *wsptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
+   */
+
+  dataptr = data;
+  ctr = 0;
+  for (;;) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
+    tmp2 = GETJSAMPLE(elemptr[2]);
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+
+    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
+    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << PASS1_BITS);
+    tmp11 = MULTIPLY(tmp11, FIX(0.790569415));          /* (c2+c4)/2 */
+    tmp10 -= tmp2 << 2;
+    tmp10 = MULTIPLY(tmp10, FIX(0.353553391));          /* (c2-c4)/2 */
+    dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS);
+    dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876));    /* c3 */
+
+    dataptr[1] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
+	      CONST_BITS-PASS1_BITS);
+
+    ctr++;
+
+    if (ctr != DCTSIZE) {
+      if (ctr == 10)
+	break;			/* Done. */
+      dataptr += DCTSIZE;	/* advance pointer to next row */
+    } else
+      dataptr = workspace;	/* switch pointer to extended workspace */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/5)*(8/10) = 32/25, which we
+   * fold into the constant multipliers:
+   * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20) * 32/25.
+   */
+
+  dataptr = data;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
+    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
+    tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
+    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
+
+    tmp10 = tmp0 + tmp4;
+    tmp13 = tmp0 - tmp4;
+    tmp11 = tmp1 + tmp3;
+    tmp14 = tmp1 - tmp3;
+
+    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
+    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
+    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
+    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
+    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
+	      CONST_BITS+PASS1_BITS);
+    tmp12 += tmp12;
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
+	      MULTIPLY(tmp11 - tmp12, FIX(0.559380511)),  /* c8 */
+	      CONST_BITS+PASS1_BITS);
+    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961));    /* c6 */
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)),  /* c2-c6 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)),  /* c2+c6 */
+	      CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = tmp0 + tmp4;
+    tmp11 = tmp1 - tmp3;
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)),  /* 32/25 */
+	      CONST_BITS+PASS1_BITS);
+    tmp2 = MULTIPLY(tmp2, FIX(1.28));                     /* 32/25 */
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) +          /* c1 */
+	      MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 +   /* c3 */
+	      MULTIPLY(tmp3, FIX(0.821810588)) +          /* c7 */
+	      MULTIPLY(tmp4, FIX(0.283176630)),           /* c9 */
+	      CONST_BITS+PASS1_BITS);
+    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) -     /* (c3+c7)/2 */
+	    MULTIPLY(tmp1 + tmp3, FIX(0.752365123));      /* (c1-c9)/2 */
+    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) +   /* (c3-c7)/2 */
+	    MULTIPLY(tmp11, FIX(0.64)) - tmp2;            /* 16/25 */
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+    wsptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 4x8 sample block.
+ *
+ * 4-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We must also scale the output by 8/4 = 2, which we add here.
+   * 4-point FDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
+    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
+
+    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
+    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1));
+    dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1));
+
+    /* Odd part */
+
+    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-2);
+
+    dataptr[1] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS-PASS1_BITS-1);
+    dataptr[3] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS-PASS1_BITS-1);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 4; ctr++) {
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "c1" should be "c6".
+     */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+
+    /* Add fudge factor here for final descale. */
+    tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
+    tmp12 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp13 = tmp1 - tmp2;
+
+    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
+
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS+PASS1_BITS);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * i0..i3 in the paper are tmp0..tmp3 here.
+     */
+
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
+
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
+    tmp12 += z1;
+    tmp13 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 3x6 sample block.
+ *
+ * 3-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1, tmp2;
+  INT32 tmp10, tmp11, tmp12;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 6; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
+    tmp1 = GETJSAMPLE(elemptr[1]);
+
+    tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM)
+      ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1));
+    dataptr[2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
+	      CONST_BITS-PASS1_BITS-1);
+
+    /* Odd part */
+
+    dataptr[1] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp2, FIX(1.224744871)),               /* c1 */
+	      CONST_BITS-PASS1_BITS-1);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
+   * fold into the constant multipliers (other part was done in pass 1):
+   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 3; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
+    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
+
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+
+    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
+    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
+    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
+
+    dataptr[DCTSIZE*0] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
+	      CONST_BITS+PASS1_BITS);
+
+    /* Odd part */
+
+    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM)
+      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
+	      CONST_BITS+PASS1_BITS);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 2x4 sample block.
+ *
+ * 2-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  INT32 tmp0, tmp1;
+  INT32 tmp10, tmp11;
+  DCTELEM *dataptr;
+  JSAMPROW elemptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 4; ctr++) {
+    elemptr = sample_data[ctr] + start_col;
+
+    /* Even part */
+
+    tmp0 = GETJSAMPLE(elemptr[0]);
+    tmp1 = GETJSAMPLE(elemptr[1]);
+
+    /* Apply unsigned->signed conversion. */
+    dataptr[0] = (DCTELEM) (tmp0 + tmp1 - 2 * CENTERJSAMPLE);
+
+    /* Odd part */
+
+    dataptr[1] = (DCTELEM) (tmp0 - tmp1);
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/2)*(8/4) = 2**3.
+   * 4-point FDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
+   */
+
+  dataptr = data;
+  for (ctr = 0; ctr < 2; ctr++) {
+    /* Even part */
+
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
+
+    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
+    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
+
+    dataptr[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1) << 3);
+    dataptr[DCTSIZE*2] = (DCTELEM) ((tmp0 - tmp1) << 3);
+
+    /* Odd part */
+
+    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-3-1);
+
+    dataptr[DCTSIZE*1] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS-3);
+    dataptr[DCTSIZE*3] = (DCTELEM)
+      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS-3);
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+
+/*
+ * Perform the forward DCT on a 1x2 sample block.
+ *
+ * 1-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
+ */
+
+GLOBAL(void)
+jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
+{
+  DCTELEM tmp0, tmp1;
+
+  /* Pre-zero output coefficient block. */
+  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
+
+  /* Pass 1: empty. */
+
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
+   * We must also scale the output by (8/1)*(8/2) = 2**5.
+   */
+
+  /* Even part */
+
+  tmp0 = GETJSAMPLE(sample_data[0][start_col]);
+  tmp1 = GETJSAMPLE(sample_data[1][start_col]);
+
+  /* Apply unsigned->signed conversion. */
+  data[DCTSIZE*0] = (tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5;
+
+  /* Odd part */
+
+  data[DCTSIZE*1] = (tmp0 - tmp1) << 5;
+}
+
+#endif /* DCT_SCALING_SUPPORTED */
+#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jidctflt.c b/cmake/externals/libjpeg/jidctflt.c
new file mode 100644
index 000000000..e33a2b5e4
--- /dev/null
+++ b/cmake/externals/libjpeg/jidctflt.c
@@ -0,0 +1,238 @@
+/*
+ * jidctflt.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * Modified 2010-2017 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a floating-point implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * This implementation should be more accurate than either of the integer
+ * IDCT implementations.  However, it may not give the same results on all
+ * machines because of differences in roundoff behavior.  Speed will depend
+ * on the hardware's floating point capacity.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with a fixed-point
+ * implementation, accuracy is lost due to imprecise representation of the
+ * scaled quantization values.  However, that problem does not arise if
+ * we use floating point arithmetic.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce a float result.
+ */
+
+#define DEQUANTIZE(coef,quantval)  (((FAST_FLOAT) (coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ *
+ * cK represents cos(K*pi/16).
+ */
+
+GLOBAL(void)
+jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
+  FAST_FLOAT z5, z10, z11, z12, z13;
+  JCOEFPTR inptr;
+  FLOAT_MULT_TYPE * quantptr;
+  FAST_FLOAT * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = tmp0 + tmp2;	/* phase 3 */
+    tmp11 = tmp0 - tmp2;
+
+    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
+    tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;	/* phase 2 */
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z13 = tmp6 + tmp5;		/* phase 6 */
+    z10 = tmp6 - tmp5;
+    z11 = tmp4 + tmp7;
+    z12 = tmp4 - tmp7;
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
+
+    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
+    tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */
+    tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 - tmp5;
+
+    wsptr[DCTSIZE*0] = tmp0 + tmp7;
+    wsptr[DCTSIZE*7] = tmp0 - tmp7;
+    wsptr[DCTSIZE*1] = tmp1 + tmp6;
+    wsptr[DCTSIZE*6] = tmp1 - tmp6;
+    wsptr[DCTSIZE*2] = tmp2 + tmp5;
+    wsptr[DCTSIZE*5] = tmp2 - tmp5;
+    wsptr[DCTSIZE*3] = tmp3 + tmp4;
+    wsptr[DCTSIZE*4] = tmp3 - tmp4;
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+
+  /* Pass 2: process rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * And testing floats for zero is relatively expensive, so we don't bother.
+     */
+
+    /* Even part */
+
+    /* Prepare range-limit and float->int conversion */
+    z5 = wsptr[0] + (((FAST_FLOAT) RANGE_CENTER) + ((FAST_FLOAT) 0.5));
+    tmp10 = z5 + wsptr[4];
+    tmp11 = z5 - wsptr[4];
+
+    tmp13 = wsptr[2] + wsptr[6];
+    tmp12 = (wsptr[2] - wsptr[6]) *
+	      ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z13 = wsptr[5] + wsptr[3];
+    z10 = wsptr[5] - wsptr[3];
+    z11 = wsptr[1] + wsptr[7];
+    z12 = wsptr[1] - wsptr[7];
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
+
+    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
+    tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */
+    tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 - tmp5;
+
+    /* Final output stage: float->int conversion and range-limit */
+
+    outptr[0] = range_limit[(int) (tmp0 + tmp7) & RANGE_MASK];
+    outptr[7] = range_limit[(int) (tmp0 - tmp7) & RANGE_MASK];
+    outptr[1] = range_limit[(int) (tmp1 + tmp6) & RANGE_MASK];
+    outptr[6] = range_limit[(int) (tmp1 - tmp6) & RANGE_MASK];
+    outptr[2] = range_limit[(int) (tmp2 + tmp5) & RANGE_MASK];
+    outptr[5] = range_limit[(int) (tmp2 - tmp5) & RANGE_MASK];
+    outptr[3] = range_limit[(int) (tmp3 + tmp4) & RANGE_MASK];
+    outptr[4] = range_limit[(int) (tmp3 - tmp4) & RANGE_MASK];
+
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jidctfst.c b/cmake/externals/libjpeg/jidctfst.c
new file mode 100644
index 000000000..1ac3e39cb
--- /dev/null
+++ b/cmake/externals/libjpeg/jidctfst.c
@@ -0,0 +1,351 @@
+/*
+ * jidctfst.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * Modified 2015-2017 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a fast, not so accurate integer implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with fixed-point math,
+ * accuracy is lost due to imprecise representation of the scaled
+ * quantization values.  The smaller the quantization table entry, the less
+ * precise the scaled value, so this implementation does worse with high-
+ * quality-setting files than with low-quality ones.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_IFAST_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
+#endif
+
+
+/* Scaling decisions are generally the same as in the LL&M algorithm;
+ * see jidctint.c for more details.  However, we choose to descale
+ * (right shift) multiplication products as soon as they are formed,
+ * rather than carrying additional fractional bits into subsequent additions.
+ * This compromises accuracy slightly, but it lets us save a few shifts.
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
+ * everywhere except in the multiplications proper; this saves a good deal
+ * of work on 16-bit-int machines.
+ *
+ * The dequantized coefficients are not integers because the AA&N scaling
+ * factors have been incorporated.  We represent them scaled up by PASS1_BITS,
+ * so that the first and second IDCT rounds have the same input scaling.
+ * For 8-bit JSAMPLEs, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
+ * avoid a descaling shift; this compromises accuracy rather drastically
+ * for small quantization table entries, but it saves a lot of shifts.
+ * For 12-bit JSAMPLEs, there's no hope of using 16x16 multiplies anyway,
+ * so we use a much larger scaling factor to preserve accuracy.
+ *
+ * A final compromise is to represent the multiplicative constants to only
+ * 8 fractional bits, rather than 13.  This saves some shifting work on some
+ * machines, and may also reduce the cost of multiplication (since there
+ * are fewer one-bits in the constants).
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  8
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  8
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 8
+#define FIX_1_082392200  ((INT32)  277)		/* FIX(1.082392200) */
+#define FIX_1_414213562  ((INT32)  362)		/* FIX(1.414213562) */
+#define FIX_1_847759065  ((INT32)  473)		/* FIX(1.847759065) */
+#define FIX_2_613125930  ((INT32)  669)		/* FIX(2.613125930) */
+#else
+#define FIX_1_082392200  FIX(1.082392200)
+#define FIX_1_414213562  FIX(1.414213562)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_2_613125930  FIX(2.613125930)
+#endif
+
+
+/* We can gain a little more speed, with a further compromise in accuracy,
+ * by omitting the addition in a descaling shift.  This yields an incorrectly
+ * rounded result half the time...
+ */
+
+#ifndef USE_ACCURATE_ROUNDING
+#undef DESCALE
+#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/* Multiply a DCTELEM variable by an INT32 constant, and immediately
+ * descale to yield a DCTELEM result.
+ */
+
+#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce a DCTELEM result.  For 8-bit data a 16x16->16
+ * multiplication will do.  For 12-bit data, the multiplier table is
+ * declared INT32, so a 32-bit multiply will be used.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define DEQUANTIZE(coef,quantval)  (((IFAST_MULT_TYPE) (coef)) * (quantval))
+#else
+#define DEQUANTIZE(coef,quantval)  \
+	DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS)
+#endif
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ *
+ * cK represents cos(K*pi/16).
+ */
+
+GLOBAL(void)
+jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  DCTELEM tmp10, tmp11, tmp12, tmp13;
+  DCTELEM z5, z10, z11, z12, z13;
+  JCOEFPTR inptr;
+  IFAST_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE2];	/* buffers data between passes */
+  SHIFT_TEMPS			/* for DESCALE */
+  ISHIFT_TEMPS			/* for IRIGHT_SHIFT */
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (IFAST_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+    
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+      
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+    
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = tmp0 + tmp2;	/* phase 3 */
+    tmp11 = tmp0 - tmp2;
+
+    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
+    tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;	/* phase 2 */
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+    
+    /* Odd part */
+
+    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z13 = tmp6 + tmp5;		/* phase 6 */
+    z10 = tmp6 - tmp5;
+    z11 = tmp4 + tmp7;
+    z12 = tmp4 - tmp7;
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
+
+    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
+    tmp10 = z5 - MULTIPLY(z12, FIX_1_082392200); /* 2*(c2-c6) */
+    tmp12 = z5 - MULTIPLY(z10, FIX_2_613125930); /* 2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 - tmp5;
+
+    wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
+    wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
+    wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
+    wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
+    wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5);
+    wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
+    wsptr[DCTSIZE*3] = (int) (tmp3 + tmp4);
+    wsptr[DCTSIZE*4] = (int) (tmp3 - tmp4);
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+  
+  /* Pass 2: process rows from work array, store into output array.
+   * Note that we must descale the results by a factor of 8 == 2**3,
+   * and also undo the PASS1_BITS scaling.
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z5 = (DCTELEM) wsptr[0] +
+	   ((((DCTELEM) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (1 << (PASS1_BITS+2)));
+
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+    
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
+	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[(int) IRIGHT_SHIFT(z5, PASS1_BITS+3)
+				  & RANGE_MASK];
+      
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+      outptr[4] = dcval;
+      outptr[5] = dcval;
+      outptr[6] = dcval;
+      outptr[7] = dcval;
+
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+    
+    /* Even part */
+
+    tmp10 = z5 + (DCTELEM) wsptr[4];
+    tmp11 = z5 - (DCTELEM) wsptr[4];
+
+    tmp13 = (DCTELEM) wsptr[2] + (DCTELEM) wsptr[6];
+    tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6],
+		     FIX_1_414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
+    z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
+    z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
+    z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
+
+    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
+    tmp10 = z5 - MULTIPLY(z12, FIX_1_082392200); /* 2*(c2-c6) */
+    tmp12 = z5 - MULTIPLY(z10, FIX_2_613125930); /* 2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 - tmp5;
+
+    /* Final output stage: scale down by a factor of 8 and range-limit */
+
+    outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp7, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp7, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp1 + tmp6, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) IRIGHT_SHIFT(tmp1 - tmp6, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp5, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp5, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) IRIGHT_SHIFT(tmp3 + tmp4, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) IRIGHT_SHIFT(tmp3 - tmp4, PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#endif /* DCT_IFAST_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jidctint.c b/cmake/externals/libjpeg/jidctint.c
new file mode 100644
index 000000000..e30ec8c8a
--- /dev/null
+++ b/cmake/externals/libjpeg/jidctint.c
@@ -0,0 +1,5240 @@
+/*
+ * jidctint.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modification developed 2002-2018 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a slow-but-accurate integer implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ *
+ * We also provide IDCT routines with various output sample block sizes for
+ * direct resolution reduction or enlargement and for direct resolving the
+ * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
+ * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
+ *
+ * For N<8 we simply take the corresponding low-frequency coefficients of
+ * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
+ * to yield the downscaled outputs.
+ * This can be seen as direct low-pass downsampling from the DCT domain
+ * point of view rather than the usual spatial domain point of view,
+ * yielding significant computational savings and results at least
+ * as good as common bilinear (averaging) spatial downsampling.
+ *
+ * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
+ * lower frequencies and higher frequencies assumed to be zero.
+ * It turns out that the computational effort is similar to the 8x8 IDCT
+ * regarding the output size.
+ * Furthermore, the scaling and descaling is the same for all IDCT sizes.
+ *
+ * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
+ * since there would be too many additional constants to pre-calculate.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_ISLOW_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
+#endif
+
+
+/*
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true IDCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D IDCT,
+ * because the y0 and y4 inputs need not be divided by sqrt(N).
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (To scale up 12-bit sample data further, an
+ * intermediate INT32 array would be needed.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
+#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
+#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
+#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
+#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
+#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
+#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
+#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
+#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
+#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
+#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
+#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
+#else
+#define FIX_0_298631336  FIX(0.298631336)
+#define FIX_0_390180644  FIX(0.390180644)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_175875602  FIX(1.175875602)
+#define FIX_1_501321110  FIX(1.501321110)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_1_961570560  FIX(1.961570560)
+#define FIX_2_053119869  FIX(2.053119869)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_072711026  FIX(3.072711026)
+#endif
+
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce an int result.  In this module, both inputs and result
+ * are 16 bits or less, so either int or short multiply will work.
+ */
+
+#define DEQUANTIZE(coef,quantval)  (((ISLOW_MULT_TYPE) (coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/16).
+ */
+
+GLOBAL(void)
+jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE2];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * Note results are scaled up by sqrt(8) compared to a true IDCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z2 <<= CONST_BITS;
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    tmp0 = z2 + z3;
+    tmp1 = z2 - z3;
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+
+  /* Pass 2: process rows from work array, store into output array.
+   * Note that we must descale the results by a factor of 8 == 2**3,
+   * and also undo the PASS1_BITS scaling.
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z2 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
+	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3)
+				  & RANGE_MASK];
+
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+      outptr[4] = dcval;
+      outptr[5] = dcval;
+      outptr[6] = dcval;
+      outptr[7] = dcval;
+
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
+    z3 = (INT32) wsptr[4];
+
+    tmp0 = (z2 + z3) << CONST_BITS;
+    tmp1 = (z2 - z3) << CONST_BITS;
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = (INT32) wsptr[7];
+    tmp1 = (INT32) wsptr[5];
+    tmp2 = (INT32) wsptr[3];
+    tmp3 = (INT32) wsptr[1];
+
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#ifdef IDCT_SCALING_SUPPORTED
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 7x7 output block.
+ *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/14).
+ */
+
+GLOBAL(void)
+jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[7*7];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp13 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
+    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp0 = z1 + z3;
+    z2 -= tmp0;
+    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
+    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
+    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
+    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+
+    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
+    tmp1 += tmp2;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
+    tmp0 += z2;
+    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 7 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp13 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
+    tmp13 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
+    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp0 = z1 + z3;
+    z2 -= tmp0;
+    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
+    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
+    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
+    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+
+    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
+    tmp1 += tmp2;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
+    tmp0 += z2;
+    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 7;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 6x6 output block.
+ *
+ * Optimized algorithm with 3 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/12).
+ */
+
+GLOBAL(void)
+jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[6*6];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[6*1] = (int) (tmp11 + tmp1);
+    wsptr[6*4] = (int) (tmp11 - tmp1);
+    wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 6 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp0 <<= CONST_BITS;
+    tmp2 = (INT32) wsptr[4];
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = tmp0 - tmp10 - tmp10;
+    tmp10 = (INT32) wsptr[2];
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+    tmp1 = (z1 - z2 - z3) << CONST_BITS;
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 6;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 5x5 output block.
+ *
+ * Optimized algorithm with 5 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/10).
+ */
+
+GLOBAL(void)
+jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[5*5];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp12 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= z2 << 2;
+
+    /* Odd part */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
+    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
+    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
+
+    /* Final output stage */
+
+    wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 5 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp12 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
+    tmp12 <<= CONST_BITS;
+    tmp0 = (INT32) wsptr[2];
+    tmp1 = (INT32) wsptr[4];
+    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= z2 << 2;
+
+    /* Odd part */
+
+    z2 = (INT32) wsptr[1];
+    z3 = (INT32) wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
+    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
+    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 5;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 4x4 output block.
+ *
+ * Optimized algorithm with 3 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
+ */
+
+GLOBAL(void)
+jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp2, tmp10, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[4*4];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    
+    tmp10 = (tmp0 + tmp2) << PASS1_BITS;
+    tmp12 = (tmp0 - tmp2) << PASS1_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
+		       CONST_BITS-PASS1_BITS);
+    tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
+		       CONST_BITS-PASS1_BITS);
+
+    /* Final output stage */
+
+    wsptr[4*0] = (int) (tmp10 + tmp0);
+    wsptr[4*3] = (int) (tmp10 - tmp0);
+    wsptr[4*1] = (int) (tmp12 + tmp2);
+    wsptr[4*2] = (int) (tmp12 - tmp2);
+  }
+
+  /* Pass 2: process 4 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp2 = (INT32) wsptr[2];
+
+    tmp10 = (tmp0 + tmp2) << CONST_BITS;
+    tmp12 = (tmp0 - tmp2) << CONST_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = (INT32) wsptr[1];
+    z3 = (INT32) wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
+    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 4;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 3x3 output block.
+ *
+ * Optimized algorithm with 2 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/6).
+ */
+
+GLOBAL(void)
+jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp2, tmp10, tmp12;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[3*3];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 3 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp0 <<= CONST_BITS;
+    tmp2 = (INT32) wsptr[2];
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = (INT32) wsptr[1];
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 3;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 2x2 output block.
+ *
+ * Multiplication-less algorithm.
+ */
+
+GLOBAL(void)
+jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+  ISLOW_MULT_TYPE * quantptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  ISHIFT_TEMPS
+
+  /* Pass 1: process columns from input. */
+
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+
+  /* Column 0 */
+  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
+  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
+  /* Add range center and fudge factor for final descale and range-limit. */
+  tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
+
+  tmp0 = tmp4 + tmp5;
+  tmp2 = tmp4 - tmp5;
+
+  /* Column 1 */
+  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
+  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
+
+  tmp1 = tmp4 + tmp5;
+  tmp3 = tmp4 - tmp5;
+
+  /* Pass 2: process 2 rows, store into output array. */
+
+  /* Row 0 */
+  outptr = output_buf[0] + output_col;
+
+  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
+  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
+
+  /* Row 1 */
+  outptr = output_buf[1] + output_col;
+
+  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
+  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 1x1 output block.
+ *
+ * We hardly need an inverse DCT routine for this: just take the
+ * average pixel value, which is one-eighth of the DC coefficient.
+ */
+
+GLOBAL(void)
+jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  DCTELEM dcval;
+  ISLOW_MULT_TYPE * quantptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  ISHIFT_TEMPS
+
+  /* 1x1 is trivial: just take the DC coefficient divided by 8. */
+
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+
+  dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
+  /* Add range center and fudge factor for descale and range-limit. */
+  dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
+
+  output_buf[0][output_col] =
+    range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK];
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 9x9 output block.
+ *
+ * Optimized algorithm with 10 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/18).
+ */
+
+GLOBAL(void)
+jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*9];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
+    tmp1 = tmp0 + tmp3;
+    tmp2 = tmp0 - tmp3 - tmp3;
+
+    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
+    tmp11 = tmp2 + tmp0;
+    tmp14 = tmp2 - tmp0 - tmp0;
+
+    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
+    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
+    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
+
+    tmp10 = tmp1 + tmp0 - tmp3;
+    tmp12 = tmp1 - tmp0 + tmp2;
+    tmp13 = tmp1 - tmp2 + tmp3;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
+
+    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
+    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
+    tmp0 = tmp2 + tmp3 - z2;
+    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
+    tmp2 += z2 - tmp1;
+    tmp3 += z2 + tmp1;
+    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 9 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 9; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp0 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
+    tmp1 = tmp0 + tmp3;
+    tmp2 = tmp0 - tmp3 - tmp3;
+
+    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
+    tmp11 = tmp2 + tmp0;
+    tmp14 = tmp2 - tmp0 - tmp0;
+
+    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
+    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
+    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
+
+    tmp10 = tmp1 + tmp0 - tmp3;
+    tmp12 = tmp1 - tmp0 + tmp2;
+    tmp13 = tmp1 - tmp2 + tmp3;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
+
+    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
+    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
+    tmp0 = tmp2 + tmp3 - z2;
+    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
+    tmp2 += z2 - tmp1;
+    tmp3 += z2 + tmp1;
+    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 10x10 output block.
+ *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/20).
+ */
+
+GLOBAL(void)
+jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
+  INT32 z1, z2, z3, z4, z5;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*10];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
+			CONST_BITS-PASS1_BITS);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+    z5 = z3 << CONST_BITS;
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z5 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
+
+    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2] = (int) (tmp22 + tmp12);
+    wsptr[8*7] = (int) (tmp22 - tmp12);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 10 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 10; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z3 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z3 <<= CONST_BITS;
+    z4 = (INT32) wsptr[4];
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z3 <<= CONST_BITS;
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z3 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
+
+    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing an 11x11 output block.
+ *
+ * Optimized algorithm with 24 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/22).
+ */
+
+GLOBAL(void)
+jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*11];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp10 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
+    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
+    z4 = z1 + z3;
+    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
+    z4 -= z2;
+    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
+    tmp21 = tmp20 + tmp23 + tmp25 -
+	    MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
+    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
+    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
+    tmp24 += tmp25;
+    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
+    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
+	     MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
+    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z1 + z2;
+    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
+    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
+    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
+    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
+    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
+    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
+    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
+    tmp11 += z1;
+    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
+    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
+	     MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
+	     MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 11 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 11; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp10 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
+    tmp10 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
+    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
+    z4 = z1 + z3;
+    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
+    z4 -= z2;
+    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
+    tmp21 = tmp20 + tmp23 + tmp25 -
+	    MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
+    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
+    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
+    tmp24 += tmp25;
+    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
+    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
+	     MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
+    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z1 + z2;
+    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
+    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
+    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
+    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
+    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
+    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
+    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
+    tmp11 += z1;
+    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
+    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
+	     MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
+	     MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 12x12 output block.
+ *
+ * Optimized algorithm with 15 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/24).
+ */
+
+GLOBAL(void)
+jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*12];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 <<= CONST_BITS;
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    z2 <<= CONST_BITS;
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 12 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 12; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z3 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z3 <<= CONST_BITS;
+
+    z4 = (INT32) wsptr[4];
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = (INT32) wsptr[2];
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 <<= CONST_BITS;
+    z2 = (INT32) wsptr[6];
+    z2 <<= CONST_BITS;
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 13x13 output block.
+ *
+ * Optimized algorithm with 29 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/26).
+ */
+
+GLOBAL(void)
+jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*13];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
+
+    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
+    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
+
+    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
+    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
+
+    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
+    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
+
+    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
+    tmp15 = z1 + z4;
+    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
+    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
+    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
+    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
+    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
+    tmp11 += tmp14;
+    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
+    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
+    tmp12 += tmp14;
+    tmp13 += tmp14;
+    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
+    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
+	    MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
+    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
+    tmp14 += z1;
+    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
+	     MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 13 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 13; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z1 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z1 <<= CONST_BITS;
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[4];
+    z4 = (INT32) wsptr[6];
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
+
+    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
+    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
+
+    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
+    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
+
+    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
+    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
+
+    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
+    tmp15 = z1 + z4;
+    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+	    MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
+    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
+    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
+    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
+    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
+    tmp11 += tmp14;
+    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
+    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
+    tmp12 += tmp14;
+    tmp13 += tmp14;
+    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
+    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
+	    MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
+    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
+    tmp14 += z1;
+    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
+	     MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 14x14 output block.
+ *
+ * Optimized algorithm with 20 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/28).
+ */
+
+GLOBAL(void)
+jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*14];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
+			CONST_BITS-PASS1_BITS);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp13 = z4 << CONST_BITS;
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
+    tmp16 += tmp15;
+    z1    += z4;
+    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
+    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
+    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
+    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
+
+    tmp13 = (z1 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) (tmp23 + tmp13);
+    wsptr[8*10] = (int) (tmp23 - tmp13);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 14 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 14; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z1 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z1 <<= CONST_BITS;
+    z4 = (INT32) wsptr[4];
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[6];
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+    z4 <<= CONST_BITS;
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
+    tmp16 += tmp15;
+    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
+    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
+    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
+    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
+
+    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 15x15 output block.
+ *
+ * Optimized algorithm with 22 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/30).
+ */
+
+GLOBAL(void)
+jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*15];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
+    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
+
+    tmp12 = z1 - tmp10;
+    tmp13 = z1 + tmp11;
+    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
+
+    z4 = z2 - z3;
+    z3 += z2;
+    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
+    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
+
+    tmp20 = tmp13 + tmp10 + tmp11;
+    tmp23 = tmp12 - tmp10 + tmp11 + z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
+
+    tmp25 = tmp13 - tmp10 - tmp11;
+    tmp26 = tmp12 + tmp10 - tmp11 - z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
+
+    tmp21 = tmp12 + tmp10 + tmp11;
+    tmp24 = tmp13 - tmp10 + tmp11;
+    tmp11 += tmp11;
+    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
+    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp13 = z2 - z4;
+    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
+    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
+    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
+
+    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
+    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
+    z2 = z1 - z4;
+    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
+
+    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
+    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
+    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
+    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
+    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
+    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 15 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 15; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z1 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z1 <<= CONST_BITS;
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[4];
+    z4 = (INT32) wsptr[6];
+
+    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
+    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
+
+    tmp12 = z1 - tmp10;
+    tmp13 = z1 + tmp11;
+    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
+
+    z4 = z2 - z3;
+    z3 += z2;
+    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
+    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
+
+    tmp20 = tmp13 + tmp10 + tmp11;
+    tmp23 = tmp12 - tmp10 + tmp11 + z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
+
+    tmp25 = tmp13 - tmp10 - tmp11;
+    tmp26 = tmp12 + tmp10 - tmp11 - z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
+
+    tmp21 = tmp12 + tmp10 + tmp11;
+    tmp24 = tmp13 - tmp10 + tmp11;
+    tmp11 += tmp11;
+    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
+    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z4 = (INT32) wsptr[5];
+    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
+    z4 = (INT32) wsptr[7];
+
+    tmp13 = z2 - z4;
+    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
+    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
+    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
+
+    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
+    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
+    z2 = z1 - z4;
+    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
+
+    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
+    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
+    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
+    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
+    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
+    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 16x16 output block.
+ *
+ * Optimized algorithm with 28 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/32).
+ */
+
+GLOBAL(void)
+jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*16];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
+    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
+    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 16 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 16; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp0 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[4];
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[6];
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 16x8 output block.
+ *
+ * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*8];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * Note results are scaled up by sqrt(8) compared to a true IDCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z2 <<= CONST_BITS;
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    tmp0 = z2 + z3;
+    tmp1 = z2 - z3;
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+
+  /* Pass 2: process 8 rows from work array, store into output array.
+   * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp0 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[4];
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[6];
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 14x7 output block.
+ *
+ * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*7];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp23 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
+    tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
+    tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp10 = z1 + z3;
+    z2 -= tmp10;
+    tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
+    tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
+    tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
+    tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
+    tmp10 = tmp11 - tmp12;
+    tmp11 += tmp12;
+    tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
+    tmp11 += tmp12;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
+    tmp10 += z2;
+    tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 7 rows from work array, store into output array.
+   * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z1 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z1 <<= CONST_BITS;
+    z4 = (INT32) wsptr[4];
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[6];
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+    z4 <<= CONST_BITS;
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
+    tmp16 += tmp15;
+    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
+    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
+    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
+    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
+
+    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 12x6 output block.
+ *
+ * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*6];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp10 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
+    tmp11 = tmp10 + tmp20;
+    tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
+    tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
+    tmp20 = tmp11 + tmp10;
+    tmp22 = tmp11 - tmp10;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
+    tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
+    tmp11 = (z1 - z2 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) (tmp21 + tmp11);
+    wsptr[8*4] = (int) (tmp21 - tmp11);
+    wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 6 rows from work array, store into output array.
+   * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z3 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z3 <<= CONST_BITS;
+
+    z4 = (INT32) wsptr[4];
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = (INT32) wsptr[2];
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 <<= CONST_BITS;
+    z2 = (INT32) wsptr[6];
+    z2 <<= CONST_BITS;
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+					       CONST_BITS+PASS1_BITS+3)
+			     & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 10x5 output block.
+ *
+ * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*5];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp12 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= z2 << 2;
+
+    /* Odd part */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
+    tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
+    tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 5 rows from work array, store into output array.
+   * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z3 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z3 <<= CONST_BITS;
+    z4 = (INT32) wsptr[4];
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z3 <<= CONST_BITS;
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z3 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
+
+    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 8;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing an 8x4 output block.
+ *
+ * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*4];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 4-point IDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+
+    tmp10 = (tmp0 + tmp2) << PASS1_BITS;
+    tmp12 = (tmp0 - tmp2) << PASS1_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
+		       CONST_BITS-PASS1_BITS);
+    tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
+		       CONST_BITS-PASS1_BITS);
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) (tmp10 + tmp0);
+    wsptr[8*3] = (int) (tmp10 - tmp0);
+    wsptr[8*1] = (int) (tmp12 + tmp2);
+    wsptr[8*2] = (int) (tmp12 - tmp2);
+  }
+
+  /* Pass 2: process rows from work array, store into output array.
+   * Note that we must descale the results by a factor of 8 == 2**3,
+   * and also undo the PASS1_BITS scaling.
+   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z2 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z3 = (INT32) wsptr[4];
+
+    tmp0 = (z2 + z3) << CONST_BITS;
+    tmp1 = (z2 - z3) << CONST_BITS;
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = (INT32) wsptr[7];
+    tmp1 = (INT32) wsptr[5];
+    tmp2 = (INT32) wsptr[3];
+    tmp3 = (INT32) wsptr[1];
+
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 6x3 output block.
+ *
+ * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[6*3];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
+  }
+  
+  /* Pass 2: process 3 rows from work array, store into output array.
+   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp0 <<= CONST_BITS;
+    tmp2 = (INT32) wsptr[4];
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = tmp0 - tmp10 - tmp10;
+    tmp10 = (INT32) wsptr[2];
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+    tmp1 = (z1 - z2 - z3) << CONST_BITS;
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 6;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 4x2 output block.
+ *
+ * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp2, tmp10, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  INT32 * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  INT32 workspace[4*2];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+
+    /* Odd part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+    /* Final output stage */
+
+    wsptr[4*0] = tmp10 + tmp0;
+    wsptr[4*1] = tmp10 - tmp0;
+  }
+
+  /* Pass 2: process 2 rows from work array, store into output array.
+   * 4-point IDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 2; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2));
+    tmp2 = wsptr[2];
+
+    tmp10 = (tmp0 + tmp2) << CONST_BITS;
+    tmp12 = (tmp0 - tmp2) << CONST_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = wsptr[1];
+    z3 = wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
+    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 4;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 2x1 output block.
+ *
+ * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  DCTELEM tmp0, tmp1;
+  ISLOW_MULT_TYPE * quantptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  ISHIFT_TEMPS
+
+  /* Pass 1: empty. */
+
+  /* Pass 2: process 1 row from input, store into output array. */
+
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  outptr = output_buf[0] + output_col;
+
+  /* Even part */
+
+  tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
+  /* Add range center and fudge factor for final descale and range-limit. */
+  tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
+
+  /* Odd part */
+
+  tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
+
+  /* Final output stage */
+
+  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
+  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing an 8x16 output block.
+ *
+ * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*16];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
+    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
+    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process rows from work array, store into output array.
+   * Note that we must descale the results by a factor of 8 == 2**3,
+   * and also undo the PASS1_BITS scaling.
+   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 16; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    z2 = (INT32) wsptr[0] +
+	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	    (ONE << (PASS1_BITS+2)));
+    z3 = (INT32) wsptr[4];
+
+    tmp0 = (z2 + z3) << CONST_BITS;
+    tmp1 = (z2 - z3) << CONST_BITS;
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = (INT32) wsptr[7];
+    tmp1 = (INT32) wsptr[5];
+    tmp2 = (INT32) wsptr[3];
+    tmp3 = (INT32) wsptr[1];
+
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 7x14 output block.
+ *
+ * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[7*14];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
+			CONST_BITS-PASS1_BITS);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp13 = z4 << CONST_BITS;
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
+    tmp16 += tmp15;
+    z1    += z4;
+    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
+    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
+    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
+    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
+
+    tmp13 = (z1 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[7*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[7*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[7*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[7*3]  = (int) (tmp23 + tmp13);
+    wsptr[7*10] = (int) (tmp23 - tmp13);
+    wsptr[7*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[7*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[7*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[7*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[7*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[7*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 14 rows from work array, store into output array.
+   * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 14; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp23 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
+    tmp23 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
+    tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
+    tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp10 = z1 + z3;
+    z2 -= tmp10;
+    tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
+    tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
+    tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
+    tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
+    tmp10 = tmp11 - tmp12;
+    tmp11 += tmp12;
+    tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
+    tmp11 += tmp12;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
+    tmp10 += z2;
+    tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 7;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 6x12 output block.
+ *
+ * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[6*12];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 <<= CONST_BITS;
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    z2 <<= CONST_BITS;
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    wsptr[6*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[6*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[6*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[6*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[6*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[6*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[6*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[6*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[6*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[6*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 12 rows from work array, store into output array.
+   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 12; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp10 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
+    tmp10 <<= CONST_BITS;
+    tmp12 = (INT32) wsptr[4];
+    tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
+    tmp11 = tmp10 + tmp20;
+    tmp21 = tmp10 - tmp20 - tmp20;
+    tmp20 = (INT32) wsptr[2];
+    tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
+    tmp20 = tmp11 + tmp10;
+    tmp22 = tmp11 - tmp10;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
+    tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
+    tmp11 = (z1 - z2 - z3) << CONST_BITS;
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 6;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 5x10 output block.
+ *
+ * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
+  INT32 z1, z2, z3, z4, z5;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[5*10];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
+			CONST_BITS-PASS1_BITS);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+    z5 = z3 << CONST_BITS;
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z5 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
+
+    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[5*2] = (int) (tmp22 + tmp12);
+    wsptr[5*7] = (int) (tmp22 - tmp12);
+    wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 10 rows from work array, store into output array.
+   * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 10; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp12 = (INT32) wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	       (ONE << (PASS1_BITS+2)));
+    tmp12 <<= CONST_BITS;
+    tmp13 = (INT32) wsptr[2];
+    tmp14 = (INT32) wsptr[4];
+    z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= z2 << 2;
+
+    /* Odd part */
+
+    z2 = (INT32) wsptr[1];
+    z3 = (INT32) wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
+    tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
+    tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 5;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 4x8 output block.
+ *
+ * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[4*8];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * Note results are scaled up by sqrt(8) compared to a true IDCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 4; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+
+      wsptr[4*0] = dcval;
+      wsptr[4*1] = dcval;
+      wsptr[4*2] = dcval;
+      wsptr[4*3] = dcval;
+      wsptr[4*4] = dcval;
+      wsptr[4*5] = dcval;
+      wsptr[4*6] = dcval;
+      wsptr[4*7] = dcval;
+
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z2 <<= CONST_BITS;
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    tmp0 = z2 + z3;
+    tmp1 = z2 - z3;
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
+    tmp10 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+    tmp11 = tmp1 + tmp3;
+    tmp12 = tmp1 - tmp3;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+    z2 = tmp0 + tmp2;
+    z3 = tmp1 + tmp3;
+
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
+    z2 += z1;
+    z3 += z1;
+
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
+    tmp0 += z1 + z2;
+    tmp3 += z1 + z3;
+
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
+    tmp1 += z1 + z3;
+    tmp2 += z1 + z2;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+
+  /* Pass 2: process 8 rows from work array, store into output array.
+   * 4-point IDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp2 = (INT32) wsptr[2];
+
+    tmp10 = (tmp0 + tmp2) << CONST_BITS;
+    tmp12 = (tmp0 - tmp2) << CONST_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = (INT32) wsptr[1];
+    z3 = (INT32) wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
+    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 4;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 3x6 output block.
+ *
+ * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[3*6];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[3*1] = (int) (tmp11 + tmp1);
+    wsptr[3*4] = (int) (tmp11 - tmp1);
+    wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 6 rows from work array, store into output array.
+   * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
+   */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp0 = (INT32) wsptr[0] +
+	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
+	      (ONE << (PASS1_BITS+2)));
+    tmp0 <<= CONST_BITS;
+    tmp2 = (INT32) wsptr[2];
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = (INT32) wsptr[1];
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
+					      CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 3;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 2x4 output block.
+ *
+ * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp2, tmp10, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  INT32 * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  INT32 workspace[2*4];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array.
+   * 4-point IDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
+   */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+
+    tmp10 = (tmp0 + tmp2) << CONST_BITS;
+    tmp12 = (tmp0 - tmp2) << CONST_BITS;
+
+    /* Odd part */
+    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
+    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
+    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
+
+    /* Final output stage */
+
+    wsptr[2*0] = tmp10 + tmp0;
+    wsptr[2*3] = tmp10 - tmp0;
+    wsptr[2*1] = tmp12 + tmp2;
+    wsptr[2*2] = tmp12 - tmp2;
+  }
+
+  /* Pass 2: process 4 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add range center and fudge factor for final descale and range-limit. */
+    tmp10 = wsptr[0] +
+	      ((((INT32) RANGE_CENTER) << (CONST_BITS+3)) +
+	       (ONE << (CONST_BITS+2)));
+
+    /* Odd part */
+
+    tmp0 = wsptr[1];
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += 2;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 1x2 output block.
+ *
+ * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
+ */
+
+GLOBAL(void)
+jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  DCTELEM tmp0, tmp1;
+  ISLOW_MULT_TYPE * quantptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  ISHIFT_TEMPS
+
+  /* Process 1 column from input, store into output array. */
+
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+
+  /* Even part */
+
+  tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
+  /* Add range center and fudge factor for final descale and range-limit. */
+  tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
+
+  /* Odd part */
+
+  tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
+
+  /* Final output stage */
+
+  output_buf[0][output_col] =
+    range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
+  output_buf[1][output_col] =
+    range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
+}
+
+#endif /* IDCT_SCALING_SUPPORTED */
+#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jinclude.h b/cmake/externals/libjpeg/jinclude.h
new file mode 100644
index 000000000..12ea8cd2f
--- /dev/null
+++ b/cmake/externals/libjpeg/jinclude.h
@@ -0,0 +1,157 @@
+/*
+ * jinclude.h
+ *
+ * Copyright (C) 1991-1994, Thomas G. Lane.
+ * Modified 2017-2022 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file exists to provide a single place to fix any problems with
+ * including the wrong system include files.  (Common problems are taken
+ * care of by the standard jconfig symbols, but on really weird systems
+ * you may have to edit this file.)
+ *
+ * NOTE: this file is NOT intended to be included by applications using
+ * the JPEG library.  Most applications need only include jpeglib.h.
+ */
+
+
+/* Include auto-config file to find out which system include files we need. */
+
+#include "jconfig.h"		/* auto configuration options */
+#define JCONFIG_INCLUDED	/* so that jpeglib.h doesn't do it again */
+
+/*
+ * We need the NULL macro and size_t typedef.
+ * On an ANSI-conforming system it is sufficient to include <stddef.h>.
+ * Otherwise, we get them from <stdlib.h> or <stdio.h>; we may have to
+ * pull in <sys/types.h> as well.
+ * Note that the core JPEG library does not require <stdio.h>;
+ * only the default error handler and data source/destination modules do.
+ * But we must pull it in because of the references to FILE in jpeglib.h.
+ * You can remove those references if you want to compile without <stdio.h>.
+ */
+
+#ifdef HAVE_STDDEF_H
+#include <stddef.h>
+#endif
+
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#ifdef NEED_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#include <stdio.h>
+
+/*
+ * We need memory copying and zeroing functions, plus strncpy().
+ * ANSI and System V implementations declare these in <string.h>.
+ * BSD doesn't have the mem() functions, but it does have bcopy()/bzero().
+ * Some systems may declare memset and memcpy in <memory.h>.
+ *
+ * NOTE: we assume the size parameters to these functions are of type size_t.
+ * Change the casts in these macros if not!
+ */
+
+#ifdef NEED_BSD_STRINGS
+
+#include <strings.h>
+#define MEMZERO(target,size)	bzero((void *)(target), (size_t)(size))
+#define MEMCOPY(dest,src,size)	bcopy((const void *)(src), (void *)(dest), (size_t)(size))
+
+#else /* not BSD, assume ANSI/SysV string lib */
+
+#include <string.h>
+#define MEMZERO(target,size)	memset((void *)(target), 0, (size_t)(size))
+#define MEMCOPY(dest,src,size)	memcpy((void *)(dest), (const void *)(src), (size_t)(size))
+
+#endif
+
+/*
+ * In ANSI C, and indeed any rational implementation, size_t is also the
+ * type returned by sizeof().  However, it seems there are some irrational
+ * implementations out there, in which sizeof() returns an int even though
+ * size_t is defined as long or unsigned long.  To ensure consistent results
+ * we always use this SIZEOF() macro in place of using sizeof() directly.
+ */
+
+#define SIZEOF(object)	((size_t) sizeof(object))
+
+/*
+ * The modules that use fread() and fwrite() always invoke them through
+ * these macros.  On some systems you may need to twiddle the argument casts.
+ * CAUTION: argument order is different from underlying functions!
+ *
+ * Furthermore, macros are provided for fflush() and ferror() in order
+ * to facilitate adaption by applications using an own FILE class.
+ *
+ * You can define your own custom file I/O functions in jconfig.h and
+ * #define JPEG_HAVE_FILE_IO_CUSTOM there to prevent redefinition here.
+ *
+ * You can #define JPEG_USE_FILE_IO_CUSTOM in jconfig.h to use custom file
+ * I/O functions implemented in Delphi VCL (Visual Component Library)
+ * in Vcl.Imaging.jpeg.pas for the TJPEGImage component utilizing
+ * the Delphi RTL (Run-Time Library) TMemoryStream component:
+ *
+ *   procedure jpeg_stdio_src(var cinfo: jpeg_decompress_struct;
+ *     input_file: TStream); external;
+ *
+ *   procedure jpeg_stdio_dest(var cinfo: jpeg_compress_struct;
+ *     output_file: TStream); external;
+ *
+ *   function jfread(var buf; recsize, reccount: Integer; S: TStream): Integer;
+ *   begin
+ *     Result := S.Read(buf, recsize * reccount);
+ *   end;
+ *
+ *   function jfwrite(const buf; recsize, reccount: Integer; S: TStream): Integer;
+ *   begin
+ *     Result := S.Write(buf, recsize * reccount);
+ *   end;
+ *
+ *   function jfflush(S: TStream): Integer;
+ *   begin
+ *     Result := 0;
+ *   end;
+ *
+ *   function jferror(S: TStream): Integer;
+ *   begin
+ *     Result := 0;
+ *   end;
+ *
+ * TMemoryStream of Delphi RTL has the distinctive feature to provide dynamic
+ * memory buffer management with a file/stream-based interface, particularly for
+ * the write (output) operation, which is easier to apply compared with direct
+ * implementations as given in jdatadst.c for memory destination.  Those direct
+ * implementations of dynamic memory write tend to be more difficult to use,
+ * so providing an option like TMemoryStream may be a useful alternative.
+ *
+ * The CFile/CMemFile classes of the Microsoft Foundation Class (MFC) Library
+ * may be used in a similar fashion.
+ */
+
+#ifndef JPEG_HAVE_FILE_IO_CUSTOM
+#ifdef JPEG_USE_FILE_IO_CUSTOM
+extern size_t jfread(void * __ptr, size_t __size, size_t __n, FILE * __stream);
+extern size_t jfwrite(const void * __ptr, size_t __size, size_t __n, FILE * __stream);
+extern int    jfflush(FILE * __stream);
+extern int    jferror(FILE * __fp);
+
+#define JFREAD(file,buf,sizeofbuf)  \
+  ((size_t) jfread((void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
+#define JFWRITE(file,buf,sizeofbuf)  \
+  ((size_t) jfwrite((const void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
+#define JFFLUSH(file)	jfflush(file)
+#define JFERROR(file)	jferror(file)
+#else
+#define JFREAD(file,buf,sizeofbuf)  \
+  ((size_t) fread((void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
+#define JFWRITE(file,buf,sizeofbuf)  \
+  ((size_t) fwrite((const void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
+#define JFFLUSH(file)	fflush(file)
+#define JFERROR(file)	ferror(file)
+#endif
+#endif
diff --git a/cmake/externals/libjpeg/jmemansi.c b/cmake/externals/libjpeg/jmemansi.c
new file mode 100644
index 000000000..2d93e4962
--- /dev/null
+++ b/cmake/externals/libjpeg/jmemansi.c
@@ -0,0 +1,167 @@
+/*
+ * jmemansi.c
+ *
+ * Copyright (C) 1992-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file provides a simple generic implementation of the system-
+ * dependent portion of the JPEG memory manager.  This implementation
+ * assumes that you have the ANSI-standard library routine tmpfile().
+ * Also, the problem of determining the amount of memory available
+ * is shoved onto the user.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jmemsys.h"		/* import the system-dependent declarations */
+
+#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
+extern void * malloc JPP((size_t size));
+extern void free JPP((void *ptr));
+#endif
+
+#ifndef SEEK_SET		/* pre-ANSI systems may not define this; */
+#define SEEK_SET  0		/* if not, assume 0 is correct */
+#endif
+
+
+/*
+ * Memory allocation and freeing are controlled by the regular library
+ * routines malloc() and free().
+ */
+
+GLOBAL(void *)
+jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
+{
+  return (void *) malloc(sizeofobject);
+}
+
+GLOBAL(void)
+jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
+{
+  free(object);
+}
+
+
+/*
+ * "Large" objects are treated the same as "small" ones.
+ * NB: although we include FAR keywords in the routine declarations,
+ * this file won't actually work in 80x86 small/medium model; at least,
+ * you probably won't be able to process useful-size images in only 64KB.
+ */
+
+GLOBAL(void FAR *)
+jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
+{
+  return (void FAR *) malloc(sizeofobject);
+}
+
+GLOBAL(void)
+jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
+{
+  free(object);
+}
+
+
+/*
+ * This routine computes the total memory space available for allocation.
+ * It's impossible to do this in a portable way; our current solution is
+ * to make the user tell us (with a default value set at compile time).
+ * If you can actually get the available space, it's a good idea to subtract
+ * a slop factor of 5% or so.
+ */
+
+#ifndef DEFAULT_MAX_MEM		/* so can override from makefile */
+#define DEFAULT_MAX_MEM		1000000L /* default: one megabyte */
+#endif
+
+GLOBAL(long)
+jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
+		    long max_bytes_needed, long already_allocated)
+{
+  return cinfo->mem->max_memory_to_use - already_allocated;
+}
+
+
+/*
+ * Backing store (temporary file) management.
+ * Backing store objects are only used when the value returned by
+ * jpeg_mem_available is less than the total space needed.  You can dispense
+ * with these routines if you have plenty of virtual memory; see jmemnobs.c.
+ */
+
+
+METHODDEF(void)
+read_backing_store (j_common_ptr cinfo, backing_store_ptr info,
+		    void FAR * buffer_address,
+		    long file_offset, long byte_count)
+{
+  if (fseek(info->temp_file, file_offset, SEEK_SET))
+    ERREXIT(cinfo, JERR_TFILE_SEEK);
+  if (JFREAD(info->temp_file, buffer_address, byte_count)
+      != (size_t) byte_count)
+    ERREXIT(cinfo, JERR_TFILE_READ);
+}
+
+
+METHODDEF(void)
+write_backing_store (j_common_ptr cinfo, backing_store_ptr info,
+		     void FAR * buffer_address,
+		     long file_offset, long byte_count)
+{
+  if (fseek(info->temp_file, file_offset, SEEK_SET))
+    ERREXIT(cinfo, JERR_TFILE_SEEK);
+  if (JFWRITE(info->temp_file, buffer_address, byte_count)
+      != (size_t) byte_count)
+    ERREXIT(cinfo, JERR_TFILE_WRITE);
+}
+
+
+METHODDEF(void)
+close_backing_store (j_common_ptr cinfo, backing_store_ptr info)
+{
+  fclose(info->temp_file);
+  /* Since this implementation uses tmpfile() to create the file,
+   * no explicit file deletion is needed.
+   */
+}
+
+
+/*
+ * Initial opening of a backing-store object.
+ *
+ * This version uses tmpfile(), which constructs a suitable file name
+ * behind the scenes.  We don't have to use info->temp_name[] at all;
+ * indeed, we can't even find out the actual name of the temp file.
+ */
+
+GLOBAL(void)
+jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
+			 long total_bytes_needed)
+{
+  if ((info->temp_file = tmpfile()) == NULL)
+    ERREXITS(cinfo, JERR_TFILE_CREATE, "");
+  info->read_backing_store = read_backing_store;
+  info->write_backing_store = write_backing_store;
+  info->close_backing_store = close_backing_store;
+}
+
+
+/*
+ * These routines take care of any system-dependent initialization and
+ * cleanup required.
+ */
+
+GLOBAL(long)
+jpeg_mem_init (j_common_ptr cinfo)
+{
+  return DEFAULT_MAX_MEM;	/* default for max_memory_to_use */
+}
+
+GLOBAL(void)
+jpeg_mem_term (j_common_ptr cinfo)
+{
+  /* no work */
+}
diff --git a/cmake/externals/libjpeg/jmemmgr.c b/cmake/externals/libjpeg/jmemmgr.c
new file mode 100644
index 000000000..40377de47
--- /dev/null
+++ b/cmake/externals/libjpeg/jmemmgr.c
@@ -0,0 +1,1115 @@
+/*
+ * jmemmgr.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2011-2019 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the JPEG system-independent memory management
+ * routines.  This code is usable across a wide variety of machines; most
+ * of the system dependencies have been isolated in a separate file.
+ * The major functions provided here are:
+ *   * pool-based allocation and freeing of memory;
+ *   * policy decisions about how to divide available memory among the
+ *     virtual arrays;
+ *   * control logic for swapping virtual arrays between main memory and
+ *     backing storage.
+ * The separate system-dependent file provides the actual backing-storage
+ * access code, and it contains the policy decision about how much total
+ * main memory to use.
+ * This file is system-dependent in the sense that some of its functions
+ * are unnecessary in some systems.  For example, if there is enough virtual
+ * memory so that backing storage will never be used, much of the virtual
+ * array control logic could be removed.  (Of course, if you have that much
+ * memory then you shouldn't care about a little bit of unused code...)
+ */
+
+#define JPEG_INTERNALS
+#define AM_MEMORY_MANAGER	/* we define jvirt_Xarray_control structs */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jmemsys.h"		/* import the system-dependent declarations */
+
+#ifndef NO_GETENV
+#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare getenv() */
+extern char * getenv JPP((const char * name));
+#endif
+#endif
+
+
+/*
+ * Some important notes:
+ *   The allocation routines provided here must never return NULL.
+ *   They should exit to error_exit if unsuccessful.
+ *
+ *   It's not a good idea to try to merge the sarray and barray routines,
+ *   even though they are textually almost the same, because samples are
+ *   usually stored as bytes while coefficients are shorts or ints.  Thus,
+ *   in machines where byte pointers have a different representation from
+ *   word pointers, the resulting machine code could not be the same.
+ */
+
+
+/*
+ * Many machines require storage alignment: longs must start on 4-byte
+ * boundaries, doubles on 8-byte boundaries, etc.  On such machines, malloc()
+ * always returns pointers that are multiples of the worst-case alignment
+ * requirement, and we had better do so too.
+ * There isn't any really portable way to determine the worst-case alignment
+ * requirement.  This module assumes that the alignment requirement is
+ * multiples of sizeof(ALIGN_TYPE).
+ * By default, we define ALIGN_TYPE as double.  This is necessary on some
+ * workstations (where doubles really do need 8-byte alignment) and will work
+ * fine on nearly everything.  If your machine has lesser alignment needs,
+ * you can save a few bytes by making ALIGN_TYPE smaller.
+ * The only place I know of where this will NOT work is certain Macintosh
+ * 680x0 compilers that define double as a 10-byte IEEE extended float.
+ * Doing 10-byte alignment is counterproductive because longwords won't be
+ * aligned well.  Put "#define ALIGN_TYPE long" in jconfig.h if you have
+ * such a compiler.
+ */
+
+#ifndef ALIGN_TYPE		/* so can override from jconfig.h */
+#define ALIGN_TYPE  double
+#endif
+
+
+/*
+ * We allocate objects from "pools", where each pool is gotten with a single
+ * request to jpeg_get_small() or jpeg_get_large().  There is no per-object
+ * overhead within a pool, except for alignment padding.  Each pool has a
+ * header with a link to the next pool of the same class.
+ * Small and large pool headers are identical except that the latter's
+ * link pointer must be FAR on 80x86 machines.
+ * Notice that the "real" header fields are union'ed with a dummy ALIGN_TYPE
+ * field.  This forces the compiler to make SIZEOF(small_pool_hdr) a multiple
+ * of the alignment requirement of ALIGN_TYPE.
+ */
+
+typedef union small_pool_struct * small_pool_ptr;
+
+typedef union small_pool_struct {
+  struct {
+    small_pool_ptr next;	/* next in list of pools */
+    size_t bytes_used;		/* how many bytes already used within pool */
+    size_t bytes_left;		/* bytes still available in this pool */
+  } hdr;
+  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
+} small_pool_hdr;
+
+typedef union large_pool_struct FAR * large_pool_ptr;
+
+typedef union large_pool_struct {
+  struct {
+    large_pool_ptr next;	/* next in list of pools */
+    size_t bytes_used;		/* how many bytes already used within pool */
+    size_t bytes_left;		/* bytes still available in this pool */
+  } hdr;
+  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
+} large_pool_hdr;
+
+
+/*
+ * Here is the full definition of a memory manager object.
+ */
+
+typedef struct {
+  struct jpeg_memory_mgr pub;	/* public fields */
+
+  /* Each pool identifier (lifetime class) names a linked list of pools. */
+  small_pool_ptr small_list[JPOOL_NUMPOOLS];
+  large_pool_ptr large_list[JPOOL_NUMPOOLS];
+
+  /* Since we only have one lifetime class of virtual arrays, only one
+   * linked list is necessary (for each datatype).  Note that the virtual
+   * array control blocks being linked together are actually stored somewhere
+   * in the small-pool list.
+   */
+  jvirt_sarray_ptr virt_sarray_list;
+  jvirt_barray_ptr virt_barray_list;
+
+  /* This counts total space obtained from jpeg_get_small/large */
+  size_t total_space_allocated;
+
+  /* alloc_sarray and alloc_barray set this value for use by virtual
+   * array routines.
+   */
+  JDIMENSION last_rowsperchunk;	/* from most recent alloc_sarray/barray */
+} my_memory_mgr;
+
+typedef my_memory_mgr * my_mem_ptr;
+
+
+/*
+ * The control blocks for virtual arrays.
+ * Note that these blocks are allocated in the "small" pool area.
+ * System-dependent info for the associated backing store (if any) is hidden
+ * inside the backing_store_info struct.
+ */
+
+struct jvirt_sarray_control {
+  JSAMPARRAY mem_buffer;	/* => the in-memory buffer */
+  JDIMENSION rows_in_array;	/* total virtual array height */
+  JDIMENSION samplesperrow;	/* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;		/* max rows accessed by access_virt_sarray */
+  JDIMENSION rows_in_mem;	/* height of memory buffer */
+  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
+  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
+  boolean pre_zero;		/* pre-zero mode requested? */
+  boolean dirty;		/* do current buffer contents need written? */
+  boolean b_s_open;		/* is backing-store data valid? */
+  jvirt_sarray_ptr next;	/* link to next virtual sarray control block */
+  backing_store_info b_s_info;	/* System-dependent control info */
+};
+
+struct jvirt_barray_control {
+  JBLOCKARRAY mem_buffer;	/* => the in-memory buffer */
+  JDIMENSION rows_in_array;	/* total virtual array height */
+  JDIMENSION blocksperrow;	/* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;		/* max rows accessed by access_virt_barray */
+  JDIMENSION rows_in_mem;	/* height of memory buffer */
+  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
+  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
+  boolean pre_zero;		/* pre-zero mode requested? */
+  boolean dirty;		/* do current buffer contents need written? */
+  boolean b_s_open;		/* is backing-store data valid? */
+  jvirt_barray_ptr next;	/* link to next virtual barray control block */
+  backing_store_info b_s_info;	/* System-dependent control info */
+};
+
+
+#ifdef MEM_STATS		/* optional extra stuff for statistics */
+
+LOCAL(void)
+print_mem_stats (j_common_ptr cinfo, int pool_id)
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr shdr_ptr;
+  large_pool_ptr lhdr_ptr;
+
+  /* Since this is only a debugging stub, we can cheat a little by using
+   * fprintf directly rather than going through the trace message code.
+   * This is helpful because message parm array can't handle longs.
+   */
+  fprintf(stderr, "Freeing pool %d, total space = %ld\n",
+	  pool_id, (long) mem->total_space_allocated);
+
+  for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL;
+       lhdr_ptr = lhdr_ptr->hdr.next) {
+    fprintf(stderr, "  Large chunk used %ld\n",
+	    (long) lhdr_ptr->hdr.bytes_used);
+  }
+
+  for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL;
+       shdr_ptr = shdr_ptr->hdr.next) {
+    fprintf(stderr, "  Small chunk used %ld free %ld\n",
+	    (long) shdr_ptr->hdr.bytes_used,
+	    (long) shdr_ptr->hdr.bytes_left);
+  }
+}
+
+#endif /* MEM_STATS */
+
+
+LOCAL(noreturn_t)
+out_of_memory (j_common_ptr cinfo, int which)
+/* Report an out-of-memory error and stop execution */
+/* If we compiled MEM_STATS support, report alloc requests before dying */
+{
+#ifdef MEM_STATS
+  cinfo->err->trace_level = 2;	/* force self_destruct to report stats */
+#endif
+  ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, which);
+}
+
+
+/*
+ * Allocation of "small" objects.
+ *
+ * For these, we use pooled storage.  When a new pool must be created,
+ * we try to get enough space for the current request plus a "slop" factor,
+ * where the slop will be the amount of leftover space in the new pool.
+ * The speed vs. space tradeoff is largely determined by the slop values.
+ * A different slop value is provided for each pool class (lifetime),
+ * and we also distinguish the first pool of a class from later ones.
+ * NOTE: the values given work fairly well on both 16- and 32-bit-int
+ * machines, but may be too small if longs are 64 bits or more.
+ */
+
+static const size_t first_pool_slop[JPOOL_NUMPOOLS] = 
+{
+	1600,			/* first PERMANENT pool */
+	16000			/* first IMAGE pool */
+};
+
+static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = 
+{
+	0,			/* additional PERMANENT pools */
+	5000			/* additional IMAGE pools */
+};
+
+#define MIN_SLOP  50		/* greater than 0 to avoid futile looping */
+
+
+METHODDEF(void *)
+alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
+/* Allocate a "small" object */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr hdr_ptr, prev_hdr_ptr;
+  size_t odd_bytes, min_request, slop;
+  char * data_ptr;
+
+  /* Check for unsatisfiable request (do now to ensure no overflow below) */
+  if (sizeofobject > (size_t) MAX_ALLOC_CHUNK - SIZEOF(small_pool_hdr))
+    out_of_memory(cinfo, 1);	/* request exceeds malloc's ability */
+
+  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
+  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
+  if (odd_bytes > 0)
+    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
+
+  /* See if space is available in any existing pool */
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+  prev_hdr_ptr = NULL;
+  hdr_ptr = mem->small_list[pool_id];
+  while (hdr_ptr != NULL) {
+    if (hdr_ptr->hdr.bytes_left >= sizeofobject)
+      break;			/* found pool with enough space */
+    prev_hdr_ptr = hdr_ptr;
+    hdr_ptr = hdr_ptr->hdr.next;
+  }
+
+  /* Time to make a new pool? */
+  if (hdr_ptr == NULL) {
+    /* min_request is what we need now, slop is what will be leftover */
+    min_request = sizeofobject + SIZEOF(small_pool_hdr);
+    if (prev_hdr_ptr == NULL)	/* first pool in class? */
+      slop = first_pool_slop[pool_id];
+    else
+      slop = extra_pool_slop[pool_id];
+    /* Don't ask for more than MAX_ALLOC_CHUNK */
+    if (slop > (size_t) MAX_ALLOC_CHUNK - min_request)
+      slop = (size_t) MAX_ALLOC_CHUNK - min_request;
+    /* Try to get space, if fail reduce slop and try again */
+    for (;;) {
+      hdr_ptr = (small_pool_ptr) jpeg_get_small(cinfo, min_request + slop);
+      if (hdr_ptr != NULL)
+	break;
+      slop /= 2;
+      if (slop < MIN_SLOP)	/* give up when it gets real small */
+	out_of_memory(cinfo, 2); /* jpeg_get_small failed */
+    }
+    mem->total_space_allocated += min_request + slop;
+    /* Success, initialize the new pool header and add to end of list */
+    hdr_ptr->hdr.next = NULL;
+    hdr_ptr->hdr.bytes_used = 0;
+    hdr_ptr->hdr.bytes_left = sizeofobject + slop;
+    if (prev_hdr_ptr == NULL)	/* first pool in class? */
+      mem->small_list[pool_id] = hdr_ptr;
+    else
+      prev_hdr_ptr->hdr.next = hdr_ptr;
+  }
+
+  /* OK, allocate the object from the current pool */
+  data_ptr = (char *) (hdr_ptr + 1); /* point to first data byte in pool */
+  data_ptr += hdr_ptr->hdr.bytes_used; /* point to place for object */
+  hdr_ptr->hdr.bytes_used += sizeofobject;
+  hdr_ptr->hdr.bytes_left -= sizeofobject;
+
+  return (void *) data_ptr;
+}
+
+
+/*
+ * Allocation of "large" objects.
+ *
+ * The external semantics of these are the same as "small" objects,
+ * except that FAR pointers are used on 80x86.  However the pool
+ * management heuristics are quite different.  We assume that each
+ * request is large enough that it may as well be passed directly to
+ * jpeg_get_large; the pool management just links everything together
+ * so that we can free it all on demand.
+ * Note: the major use of "large" objects is in JSAMPARRAY and JBLOCKARRAY
+ * structures.  The routines that create these structures (see below)
+ * deliberately bunch rows together to ensure a large request size.
+ */
+
+METHODDEF(void FAR *)
+alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
+/* Allocate a "large" object */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  large_pool_ptr hdr_ptr;
+  size_t odd_bytes;
+
+  /* Check for unsatisfiable request (do now to ensure no overflow below) */
+  if (sizeofobject > (size_t) MAX_ALLOC_CHUNK - SIZEOF(large_pool_hdr))
+    out_of_memory(cinfo, 3);	/* request exceeds malloc's ability */
+
+  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
+  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
+  if (odd_bytes > 0)
+    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
+
+  /* Always make a new pool */
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  hdr_ptr = (large_pool_ptr) jpeg_get_large(cinfo, sizeofobject +
+					    SIZEOF(large_pool_hdr));
+  if (hdr_ptr == NULL)
+    out_of_memory(cinfo, 4);	/* jpeg_get_large failed */
+  mem->total_space_allocated += sizeofobject + SIZEOF(large_pool_hdr);
+
+  /* Success, initialize the new pool header and add to list */
+  hdr_ptr->hdr.next = mem->large_list[pool_id];
+  /* We maintain space counts in each pool header for statistical purposes,
+   * even though they are not needed for allocation.
+   */
+  hdr_ptr->hdr.bytes_used = sizeofobject;
+  hdr_ptr->hdr.bytes_left = 0;
+  mem->large_list[pool_id] = hdr_ptr;
+
+  return (void FAR *) (hdr_ptr + 1); /* point to first data byte in pool */
+}
+
+
+/*
+ * Creation of 2-D sample arrays.
+ * The pointers are in near heap, the samples themselves in FAR heap.
+ *
+ * To minimize allocation overhead and to allow I/O of large contiguous
+ * blocks, we allocate the sample rows in groups of as many rows as possible
+ * without exceeding MAX_ALLOC_CHUNK total bytes per allocation request.
+ * NB: the virtual array control routines, later in this file, know about
+ * this chunking of rows.  The rowsperchunk value is left in the mem manager
+ * object so that it can be saved away if this sarray is the workspace for
+ * a virtual array.
+ */
+
+METHODDEF(JSAMPARRAY)
+alloc_sarray (j_common_ptr cinfo, int pool_id,
+	      JDIMENSION samplesperrow, JDIMENSION numrows)
+/* Allocate a 2-D sample array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  JSAMPARRAY result;
+  JSAMPROW workspace;
+  JDIMENSION rowsperchunk, currow, i;
+  long ltemp;
+
+  /* Calculate max # of rows allowed in one allocation chunk */
+  ltemp = (MAX_ALLOC_CHUNK - SIZEOF(large_pool_hdr)) /
+	  ((long) samplesperrow * SIZEOF(JSAMPLE));
+  if (ltemp <= 0)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+  if (ltemp < (long) numrows)
+    rowsperchunk = (JDIMENSION) ltemp;
+  else
+    rowsperchunk = numrows;
+  mem->last_rowsperchunk = rowsperchunk;
+
+  /* Get space for row pointers (small object) */
+  result = (JSAMPARRAY) alloc_small(cinfo, pool_id,
+				    (size_t) numrows * SIZEOF(JSAMPROW));
+
+  /* Get the rows themselves (large objects) */
+  currow = 0;
+  while (currow < numrows) {
+    rowsperchunk = MIN(rowsperchunk, numrows - currow);
+    workspace = (JSAMPROW) alloc_large(cinfo, pool_id,
+      (size_t) rowsperchunk * (size_t) samplesperrow * SIZEOF(JSAMPLE));
+    for (i = rowsperchunk; i > 0; i--) {
+      result[currow++] = workspace;
+      workspace += samplesperrow;
+    }
+  }
+
+  return result;
+}
+
+
+/*
+ * Creation of 2-D coefficient-block arrays.
+ * This is essentially the same as the code for sample arrays, above.
+ */
+
+METHODDEF(JBLOCKARRAY)
+alloc_barray (j_common_ptr cinfo, int pool_id,
+	      JDIMENSION blocksperrow, JDIMENSION numrows)
+/* Allocate a 2-D coefficient-block array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  JBLOCKARRAY result;
+  JBLOCKROW workspace;
+  JDIMENSION rowsperchunk, currow, i;
+  long ltemp;
+
+  /* Calculate max # of rows allowed in one allocation chunk */
+  ltemp = (MAX_ALLOC_CHUNK - SIZEOF(large_pool_hdr)) /
+	  ((long) blocksperrow * SIZEOF(JBLOCK));
+  if (ltemp <= 0)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+  if (ltemp < (long) numrows)
+    rowsperchunk = (JDIMENSION) ltemp;
+  else
+    rowsperchunk = numrows;
+  mem->last_rowsperchunk = rowsperchunk;
+
+  /* Get space for row pointers (small object) */
+  result = (JBLOCKARRAY) alloc_small(cinfo, pool_id,
+				     (size_t) numrows * SIZEOF(JBLOCKROW));
+
+  /* Get the rows themselves (large objects) */
+  currow = 0;
+  while (currow < numrows) {
+    rowsperchunk = MIN(rowsperchunk, numrows - currow);
+    workspace = (JBLOCKROW) alloc_large(cinfo, pool_id,
+      (size_t) rowsperchunk * (size_t) blocksperrow * SIZEOF(JBLOCK));
+    for (i = rowsperchunk; i > 0; i--) {
+      result[currow++] = workspace;
+      workspace += blocksperrow;
+    }
+  }
+
+  return result;
+}
+
+
+/*
+ * About virtual array management:
+ *
+ * The above "normal" array routines are only used to allocate strip buffers
+ * (as wide as the image, but just a few rows high).  Full-image-sized buffers
+ * are handled as "virtual" arrays.  The array is still accessed a strip at a
+ * time, but the memory manager must save the whole array for repeated
+ * accesses.  The intended implementation is that there is a strip buffer in
+ * memory (as high as is possible given the desired memory limit), plus a
+ * backing file that holds the rest of the array.
+ *
+ * The request_virt_array routines are told the total size of the image and
+ * the maximum number of rows that will be accessed at once.  The in-memory
+ * buffer must be at least as large as the maxaccess value.
+ *
+ * The request routines create control blocks but not the in-memory buffers.
+ * That is postponed until realize_virt_arrays is called.  At that time the
+ * total amount of space needed is known (approximately, anyway), so free
+ * memory can be divided up fairly.
+ *
+ * The access_virt_array routines are responsible for making a specific strip
+ * area accessible (after reading or writing the backing file, if necessary).
+ * Note that the access routines are told whether the caller intends to modify
+ * the accessed strip; during a read-only pass this saves having to rewrite
+ * data to disk.  The access routines are also responsible for pre-zeroing
+ * any newly accessed rows, if pre-zeroing was requested.
+ *
+ * In current usage, the access requests are usually for nonoverlapping
+ * strips; that is, successive access start_row numbers differ by exactly
+ * num_rows = maxaccess.  This means we can get good performance with simple
+ * buffer dump/reload logic, by making the in-memory buffer be a multiple
+ * of the access height; then there will never be accesses across bufferload
+ * boundaries.  The code will still work with overlapping access requests,
+ * but it doesn't handle bufferload overlaps very efficiently.
+ */
+
+
+METHODDEF(jvirt_sarray_ptr)
+request_virt_sarray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
+		     JDIMENSION samplesperrow, JDIMENSION numrows,
+		     JDIMENSION maxaccess)
+/* Request a virtual 2-D sample array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  jvirt_sarray_ptr result;
+
+  /* Only IMAGE-lifetime virtual arrays are currently supported */
+  if (pool_id != JPOOL_IMAGE)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  /* get control block */
+  result = (jvirt_sarray_ptr) alloc_small(cinfo, pool_id,
+					  SIZEOF(struct jvirt_sarray_control));
+
+  result->mem_buffer = NULL;	/* marks array not yet realized */
+  result->rows_in_array = numrows;
+  result->samplesperrow = samplesperrow;
+  result->maxaccess = maxaccess;
+  result->pre_zero = pre_zero;
+  result->b_s_open = FALSE;	/* no associated backing-store object */
+  result->next = mem->virt_sarray_list; /* add to list of virtual arrays */
+  mem->virt_sarray_list = result;
+
+  return result;
+}
+
+
+METHODDEF(jvirt_barray_ptr)
+request_virt_barray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
+		     JDIMENSION blocksperrow, JDIMENSION numrows,
+		     JDIMENSION maxaccess)
+/* Request a virtual 2-D coefficient-block array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  jvirt_barray_ptr result;
+
+  /* Only IMAGE-lifetime virtual arrays are currently supported */
+  if (pool_id != JPOOL_IMAGE)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  /* get control block */
+  result = (jvirt_barray_ptr) alloc_small(cinfo, pool_id,
+					  SIZEOF(struct jvirt_barray_control));
+
+  result->mem_buffer = NULL;	/* marks array not yet realized */
+  result->rows_in_array = numrows;
+  result->blocksperrow = blocksperrow;
+  result->maxaccess = maxaccess;
+  result->pre_zero = pre_zero;
+  result->b_s_open = FALSE;	/* no associated backing-store object */
+  result->next = mem->virt_barray_list; /* add to list of virtual arrays */
+  mem->virt_barray_list = result;
+
+  return result;
+}
+
+
+METHODDEF(void)
+realize_virt_arrays (j_common_ptr cinfo)
+/* Allocate the in-memory buffers for any unrealized virtual arrays */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  long bytesperrow, space_per_minheight, maximum_space;
+  long avail_mem, minheights, max_minheights;
+  jvirt_sarray_ptr sptr;
+  jvirt_barray_ptr bptr;
+
+  /* Compute the minimum space needed (maxaccess rows in each buffer)
+   * and the maximum space needed (full image height in each buffer).
+   * These may be of use to the system-dependent jpeg_mem_available routine.
+   */
+  space_per_minheight = 0;
+  maximum_space = 0;
+  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+    if (sptr->mem_buffer == NULL) { /* if not realized yet */
+      bytesperrow = (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
+      space_per_minheight += (long) sptr->maxaccess * bytesperrow;
+      maximum_space += (long) sptr->rows_in_array * bytesperrow;
+    }
+  }
+  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+    if (bptr->mem_buffer == NULL) { /* if not realized yet */
+      bytesperrow = (long) bptr->blocksperrow * SIZEOF(JBLOCK);
+      space_per_minheight += (long) bptr->maxaccess * bytesperrow;
+      maximum_space += (long) bptr->rows_in_array * bytesperrow;
+    }
+  }
+
+  if (space_per_minheight <= 0)
+    return;			/* no unrealized arrays, no work */
+
+  /* Determine amount of memory to actually use; this is system-dependent. */
+  avail_mem = jpeg_mem_available(cinfo, space_per_minheight, maximum_space,
+				 (long) mem->total_space_allocated);
+
+  /* If the maximum space needed is available, make all the buffers full
+   * height; otherwise parcel it out with the same number of minheights
+   * in each buffer.
+   */
+  if (avail_mem >= maximum_space)
+    max_minheights = 1000000000L;
+  else {
+    max_minheights = avail_mem / space_per_minheight;
+    /* If there doesn't seem to be enough space, try to get the minimum
+     * anyway.  This allows a "stub" implementation of jpeg_mem_available().
+     */
+    if (max_minheights <= 0)
+      max_minheights = 1;
+  }
+
+  /* Allocate the in-memory buffers and initialize backing store as needed. */
+
+  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+    if (sptr->mem_buffer == NULL) { /* if not realized yet */
+      minheights = ((long) sptr->rows_in_array - 1L) / sptr->maxaccess + 1L;
+      if (minheights <= max_minheights) {
+	/* This buffer fits in memory */
+	sptr->rows_in_mem = sptr->rows_in_array;
+      } else {
+	/* It doesn't fit in memory, create backing store. */
+	sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess);
+	jpeg_open_backing_store(cinfo, & sptr->b_s_info,
+				(long) sptr->rows_in_array *
+				(long) sptr->samplesperrow *
+				(long) SIZEOF(JSAMPLE));
+	sptr->b_s_open = TRUE;
+      }
+      sptr->mem_buffer = alloc_sarray(cinfo, JPOOL_IMAGE,
+				      sptr->samplesperrow, sptr->rows_in_mem);
+      sptr->rowsperchunk = mem->last_rowsperchunk;
+      sptr->cur_start_row = 0;
+      sptr->first_undef_row = 0;
+      sptr->dirty = FALSE;
+    }
+  }
+
+  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+    if (bptr->mem_buffer == NULL) { /* if not realized yet */
+      minheights = ((long) bptr->rows_in_array - 1L) / bptr->maxaccess + 1L;
+      if (minheights <= max_minheights) {
+	/* This buffer fits in memory */
+	bptr->rows_in_mem = bptr->rows_in_array;
+      } else {
+	/* It doesn't fit in memory, create backing store. */
+	bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess);
+	jpeg_open_backing_store(cinfo, & bptr->b_s_info,
+				(long) bptr->rows_in_array *
+				(long) bptr->blocksperrow *
+				(long) SIZEOF(JBLOCK));
+	bptr->b_s_open = TRUE;
+      }
+      bptr->mem_buffer = alloc_barray(cinfo, JPOOL_IMAGE,
+				      bptr->blocksperrow, bptr->rows_in_mem);
+      bptr->rowsperchunk = mem->last_rowsperchunk;
+      bptr->cur_start_row = 0;
+      bptr->first_undef_row = 0;
+      bptr->dirty = FALSE;
+    }
+  }
+}
+
+
+LOCAL(void)
+do_sarray_io (j_common_ptr cinfo, jvirt_sarray_ptr ptr, boolean writing)
+/* Do backing store read or write of a virtual sample array */
+{
+  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
+
+  bytesperrow = (long) ptr->samplesperrow * SIZEOF(JSAMPLE);
+  file_offset = (long) ptr->cur_start_row * bytesperrow;
+  /* Loop to read or write each allocation chunk in mem_buffer */
+  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
+    /* One chunk, but check for short chunk at end of buffer */
+    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
+    /* Transfer no more than is currently defined */
+    thisrow = (long) ptr->cur_start_row + i;
+    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
+    /* Transfer no more than fits in file */
+    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
+    if (rows <= 0)		/* this chunk might be past end of file! */
+      break;
+    byte_count = rows * bytesperrow;
+    if (writing)
+      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
+					    (void FAR *) ptr->mem_buffer[i],
+					    file_offset, byte_count);
+    else
+      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
+					   (void FAR *) ptr->mem_buffer[i],
+					   file_offset, byte_count);
+    file_offset += byte_count;
+  }
+}
+
+
+LOCAL(void)
+do_barray_io (j_common_ptr cinfo, jvirt_barray_ptr ptr, boolean writing)
+/* Do backing store read or write of a virtual coefficient-block array */
+{
+  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
+
+  bytesperrow = (long) ptr->blocksperrow * SIZEOF(JBLOCK);
+  file_offset = (long) ptr->cur_start_row * bytesperrow;
+  /* Loop to read or write each allocation chunk in mem_buffer */
+  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
+    /* One chunk, but check for short chunk at end of buffer */
+    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
+    /* Transfer no more than is currently defined */
+    thisrow = (long) ptr->cur_start_row + i;
+    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
+    /* Transfer no more than fits in file */
+    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
+    if (rows <= 0)		/* this chunk might be past end of file! */
+      break;
+    byte_count = rows * bytesperrow;
+    if (writing)
+      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
+					    (void FAR *) ptr->mem_buffer[i],
+					    file_offset, byte_count);
+    else
+      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
+					   (void FAR *) ptr->mem_buffer[i],
+					   file_offset, byte_count);
+    file_offset += byte_count;
+  }
+}
+
+
+METHODDEF(JSAMPARRAY)
+access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr,
+		    JDIMENSION start_row, JDIMENSION num_rows,
+		    boolean writable)
+/* Access the part of a virtual sample array starting at start_row */
+/* and extending for num_rows rows.  writable is true if  */
+/* caller intends to modify the accessed area. */
+{
+  JDIMENSION end_row = start_row + num_rows;
+  JDIMENSION undef_row;
+
+  /* debugging check */
+  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
+      ptr->mem_buffer == NULL)
+    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+
+  /* Make the desired part of the virtual array accessible */
+  if (start_row < ptr->cur_start_row ||
+      end_row > ptr->cur_start_row + ptr->rows_in_mem) {
+    if (! ptr->b_s_open)
+      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
+    /* Flush old buffer contents if necessary */
+    if (ptr->dirty) {
+      do_sarray_io(cinfo, ptr, TRUE);
+      ptr->dirty = FALSE;
+    }
+    /* Decide what part of virtual array to access.
+     * Algorithm: if target address > current window, assume forward scan,
+     * load starting at target address.  If target address < current window,
+     * assume backward scan, load so that target area is top of window.
+     * Note that when switching from forward write to forward read, will have
+     * start_row = 0, so the limiting case applies and we load from 0 anyway.
+     */
+    if (start_row > ptr->cur_start_row) {
+      ptr->cur_start_row = start_row;
+    } else {
+      /* use long arithmetic here to avoid overflow & unsigned problems */
+      long ltemp;
+
+      ltemp = (long) end_row - (long) ptr->rows_in_mem;
+      if (ltemp < 0)
+	ltemp = 0;		/* don't fall off front end of file */
+      ptr->cur_start_row = (JDIMENSION) ltemp;
+    }
+    /* Read in the selected part of the array.
+     * During the initial write pass, we will do no actual read
+     * because the selected part is all undefined.
+     */
+    do_sarray_io(cinfo, ptr, FALSE);
+  }
+  /* Ensure the accessed part of the array is defined; prezero if needed.
+   * To improve locality of access, we only prezero the part of the array
+   * that the caller is about to access, not the entire in-memory array.
+   */
+  if (ptr->first_undef_row < end_row) {
+    if (ptr->first_undef_row < start_row) {
+      if (writable)		/* writer skipped over a section of array */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;	/* but reader is allowed to read ahead */
+    } else {
+      undef_row = ptr->first_undef_row;
+    }
+    if (writable)
+      ptr->first_undef_row = end_row;
+    if (ptr->pre_zero) {
+      size_t bytesperrow = (size_t) ptr->samplesperrow * SIZEOF(JSAMPLE);
+      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
+      end_row -= ptr->cur_start_row;
+      while (undef_row < end_row) {
+	FMEMZERO((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
+	undef_row++;
+      }
+    } else {
+      if (! writable)		/* reader looking at undefined data */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+    }
+  }
+  /* Flag the buffer dirty if caller will write in it */
+  if (writable)
+    ptr->dirty = TRUE;
+  /* Return address of proper part of the buffer */
+  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
+}
+
+
+METHODDEF(JBLOCKARRAY)
+access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr,
+		    JDIMENSION start_row, JDIMENSION num_rows,
+		    boolean writable)
+/* Access the part of a virtual block array starting at start_row */
+/* and extending for num_rows rows.  writable is true if  */
+/* caller intends to modify the accessed area. */
+{
+  JDIMENSION end_row = start_row + num_rows;
+  JDIMENSION undef_row;
+
+  /* debugging check */
+  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
+      ptr->mem_buffer == NULL)
+    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+
+  /* Make the desired part of the virtual array accessible */
+  if (start_row < ptr->cur_start_row ||
+      end_row > ptr->cur_start_row + ptr->rows_in_mem) {
+    if (! ptr->b_s_open)
+      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
+    /* Flush old buffer contents if necessary */
+    if (ptr->dirty) {
+      do_barray_io(cinfo, ptr, TRUE);
+      ptr->dirty = FALSE;
+    }
+    /* Decide what part of virtual array to access.
+     * Algorithm: if target address > current window, assume forward scan,
+     * load starting at target address.  If target address < current window,
+     * assume backward scan, load so that target area is top of window.
+     * Note that when switching from forward write to forward read, will have
+     * start_row = 0, so the limiting case applies and we load from 0 anyway.
+     */
+    if (start_row > ptr->cur_start_row) {
+      ptr->cur_start_row = start_row;
+    } else {
+      /* use long arithmetic here to avoid overflow & unsigned problems */
+      long ltemp;
+
+      ltemp = (long) end_row - (long) ptr->rows_in_mem;
+      if (ltemp < 0)
+	ltemp = 0;		/* don't fall off front end of file */
+      ptr->cur_start_row = (JDIMENSION) ltemp;
+    }
+    /* Read in the selected part of the array.
+     * During the initial write pass, we will do no actual read
+     * because the selected part is all undefined.
+     */
+    do_barray_io(cinfo, ptr, FALSE);
+  }
+  /* Ensure the accessed part of the array is defined; prezero if needed.
+   * To improve locality of access, we only prezero the part of the array
+   * that the caller is about to access, not the entire in-memory array.
+   */
+  if (ptr->first_undef_row < end_row) {
+    if (ptr->first_undef_row < start_row) {
+      if (writable)		/* writer skipped over a section of array */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;	/* but reader is allowed to read ahead */
+    } else {
+      undef_row = ptr->first_undef_row;
+    }
+    if (writable)
+      ptr->first_undef_row = end_row;
+    if (ptr->pre_zero) {
+      size_t bytesperrow = (size_t) ptr->blocksperrow * SIZEOF(JBLOCK);
+      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
+      end_row -= ptr->cur_start_row;
+      while (undef_row < end_row) {
+	FMEMZERO((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
+	undef_row++;
+      }
+    } else {
+      if (! writable)		/* reader looking at undefined data */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+    }
+  }
+  /* Flag the buffer dirty if caller will write in it */
+  if (writable)
+    ptr->dirty = TRUE;
+  /* Return address of proper part of the buffer */
+  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
+}
+
+
+/*
+ * Release all objects belonging to a specified pool.
+ */
+
+METHODDEF(void)
+free_pool (j_common_ptr cinfo, int pool_id)
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr shdr_ptr;
+  large_pool_ptr lhdr_ptr;
+  size_t space_freed;
+
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+#ifdef MEM_STATS
+  if (cinfo->err->trace_level > 1)
+    print_mem_stats(cinfo, pool_id); /* print pool's memory usage statistics */
+#endif
+
+  /* If freeing IMAGE pool, close any virtual arrays first */
+  if (pool_id == JPOOL_IMAGE) {
+    jvirt_sarray_ptr sptr;
+    jvirt_barray_ptr bptr;
+
+    for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+      if (sptr->b_s_open) {	/* there may be no backing store */
+	sptr->b_s_open = FALSE;	/* prevent recursive close if error */
+	(*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info);
+      }
+    }
+    mem->virt_sarray_list = NULL;
+    for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+      if (bptr->b_s_open) {	/* there may be no backing store */
+	bptr->b_s_open = FALSE;	/* prevent recursive close if error */
+	(*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info);
+      }
+    }
+    mem->virt_barray_list = NULL;
+  }
+
+  /* Release large objects */
+  lhdr_ptr = mem->large_list[pool_id];
+  mem->large_list[pool_id] = NULL;
+
+  while (lhdr_ptr != NULL) {
+    large_pool_ptr next_lhdr_ptr = lhdr_ptr->hdr.next;
+    space_freed = lhdr_ptr->hdr.bytes_used +
+		  lhdr_ptr->hdr.bytes_left +
+		  SIZEOF(large_pool_hdr);
+    jpeg_free_large(cinfo, (void FAR *) lhdr_ptr, space_freed);
+    mem->total_space_allocated -= space_freed;
+    lhdr_ptr = next_lhdr_ptr;
+  }
+
+  /* Release small objects */
+  shdr_ptr = mem->small_list[pool_id];
+  mem->small_list[pool_id] = NULL;
+
+  while (shdr_ptr != NULL) {
+    small_pool_ptr next_shdr_ptr = shdr_ptr->hdr.next;
+    space_freed = shdr_ptr->hdr.bytes_used +
+		  shdr_ptr->hdr.bytes_left +
+		  SIZEOF(small_pool_hdr);
+    jpeg_free_small(cinfo, (void *) shdr_ptr, space_freed);
+    mem->total_space_allocated -= space_freed;
+    shdr_ptr = next_shdr_ptr;
+  }
+}
+
+
+/*
+ * Close up shop entirely.
+ * Note that this cannot be called unless cinfo->mem is non-NULL.
+ */
+
+METHODDEF(void)
+self_destruct (j_common_ptr cinfo)
+{
+  int pool;
+
+  /* Close all backing store, release all memory.
+   * Releasing pools in reverse order might help avoid fragmentation
+   * with some (brain-damaged) malloc libraries.
+   */
+  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
+    free_pool(cinfo, pool);
+  }
+
+  /* Release the memory manager control block too. */
+  jpeg_free_small(cinfo, (void *) cinfo->mem, SIZEOF(my_memory_mgr));
+  cinfo->mem = NULL;		/* ensures I will be called only once */
+
+  jpeg_mem_term(cinfo);		/* system-dependent cleanup */
+}
+
+
+/*
+ * Memory manager initialization.
+ * When this is called, only the error manager pointer is valid in cinfo!
+ */
+
+GLOBAL(void)
+jinit_memory_mgr (j_common_ptr cinfo)
+{
+  my_mem_ptr mem;
+  long max_to_use;
+  int pool;
+  size_t test_mac;
+
+  cinfo->mem = NULL;		/* for safety if init fails */
+
+  /* Check for configuration errors.
+   * SIZEOF(ALIGN_TYPE) should be a power of 2; otherwise, it probably
+   * doesn't reflect any real hardware alignment requirement.
+   * The test is a little tricky: for X>0, X and X-1 have no one-bits
+   * in common if and only if X is a power of 2, ie has only one one-bit.
+   * Some compilers may give an "unreachable code" warning here; ignore it.
+   */
+  if ((SIZEOF(ALIGN_TYPE) & (SIZEOF(ALIGN_TYPE)-1)) != 0)
+    ERREXIT(cinfo, JERR_BAD_ALIGN_TYPE);
+  /* MAX_ALLOC_CHUNK must be representable as type size_t, and must be
+   * a multiple of SIZEOF(ALIGN_TYPE).
+   * Again, an "unreachable code" warning may be ignored here.
+   * But a "constant too large" warning means you need to fix MAX_ALLOC_CHUNK.
+   */
+  test_mac = (size_t) MAX_ALLOC_CHUNK;
+  if ((long) test_mac != MAX_ALLOC_CHUNK ||
+      (MAX_ALLOC_CHUNK % SIZEOF(ALIGN_TYPE)) != 0)
+    ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK);
+
+  max_to_use = jpeg_mem_init(cinfo); /* system-dependent initialization */
+
+  /* Attempt to allocate memory manager's control block */
+  mem = (my_mem_ptr) jpeg_get_small(cinfo, SIZEOF(my_memory_mgr));
+
+  if (mem == NULL) {
+    jpeg_mem_term(cinfo);	/* system-dependent cleanup */
+    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 0);
+  }
+
+  /* OK, fill in the method pointers */
+  mem->pub.alloc_small = alloc_small;
+  mem->pub.alloc_large = alloc_large;
+  mem->pub.alloc_sarray = alloc_sarray;
+  mem->pub.alloc_barray = alloc_barray;
+  mem->pub.request_virt_sarray = request_virt_sarray;
+  mem->pub.request_virt_barray = request_virt_barray;
+  mem->pub.realize_virt_arrays = realize_virt_arrays;
+  mem->pub.access_virt_sarray = access_virt_sarray;
+  mem->pub.access_virt_barray = access_virt_barray;
+  mem->pub.free_pool = free_pool;
+  mem->pub.self_destruct = self_destruct;
+
+  /* Make MAX_ALLOC_CHUNK accessible to other modules */
+  mem->pub.max_alloc_chunk = MAX_ALLOC_CHUNK;
+
+  /* Initialize working state */
+  mem->pub.max_memory_to_use = max_to_use;
+
+  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
+    mem->small_list[pool] = NULL;
+    mem->large_list[pool] = NULL;
+  }
+  mem->virt_sarray_list = NULL;
+  mem->virt_barray_list = NULL;
+
+  mem->total_space_allocated = SIZEOF(my_memory_mgr);
+
+  /* Declare ourselves open for business */
+  cinfo->mem = &mem->pub;
+
+  /* Check for an environment variable JPEGMEM; if found, override the
+   * default max_memory setting from jpeg_mem_init.  Note that the
+   * surrounding application may again override this value.
+   * If your system doesn't support getenv(), define NO_GETENV to disable
+   * this feature.
+   */
+#ifndef NO_GETENV
+  { char * memenv;
+
+    if ((memenv = getenv("JPEGMEM")) != NULL) {
+      char ch = 'x';
+
+      if (sscanf(memenv, "%ld%c", &max_to_use, &ch) > 0) {
+	if (ch == 'm' || ch == 'M')
+	  max_to_use *= 1000L;
+	mem->pub.max_memory_to_use = max_to_use * 1000L;
+      }
+    }
+  }
+#endif
+
+}
diff --git a/cmake/externals/libjpeg/jmemnobs.c b/cmake/externals/libjpeg/jmemnobs.c
new file mode 100644
index 000000000..a364cd822
--- /dev/null
+++ b/cmake/externals/libjpeg/jmemnobs.c
@@ -0,0 +1,113 @@
+/*
+ * jmemnobs.c
+ *
+ * Copyright (C) 1992-1996, Thomas G. Lane.
+ * Modified 2019 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file provides a really simple implementation of the system-
+ * dependent portion of the JPEG memory manager.  This implementation
+ * assumes that no backing-store files are needed: all required space
+ * can be obtained from malloc().
+ * This is very portable in the sense that it'll compile on almost anything,
+ * but you'd better have lots of main memory (or virtual memory) if you want
+ * to process big images.
+ * Note that the max_memory_to_use option is respected by this implementation.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jmemsys.h"		/* import the system-dependent declarations */
+
+#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
+extern void * malloc JPP((size_t size));
+extern void free JPP((void *ptr));
+#endif
+
+
+/*
+ * Memory allocation and freeing are controlled by the regular library
+ * routines malloc() and free().
+ */
+
+GLOBAL(void *)
+jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
+{
+  return (void *) malloc(sizeofobject);
+}
+
+GLOBAL(void)
+jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
+{
+  free(object);
+}
+
+
+/*
+ * "Large" objects are treated the same as "small" ones.
+ * NB: although we include FAR keywords in the routine declarations,
+ * this file won't actually work in 80x86 small/medium model; at least,
+ * you probably won't be able to process useful-size images in only 64KB.
+ */
+
+GLOBAL(void FAR *)
+jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
+{
+  return (void FAR *) malloc(sizeofobject);
+}
+
+GLOBAL(void)
+jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
+{
+  free(object);
+}
+
+
+/*
+ * This routine computes the total memory space available for allocation.
+ */
+
+GLOBAL(long)
+jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
+		    long max_bytes_needed, long already_allocated)
+{
+  if (cinfo->mem->max_memory_to_use)
+    return cinfo->mem->max_memory_to_use - already_allocated;
+
+  /* Here we say, "we got all you want bud!" */
+  return max_bytes_needed;
+}
+
+
+/*
+ * Backing store (temporary file) management.
+ * Since jpeg_mem_available always promised the moon,
+ * this should never be called and we can just error out.
+ */
+
+GLOBAL(void)
+jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
+			 long total_bytes_needed)
+{
+  ERREXIT(cinfo, JERR_NO_BACKING_STORE);
+}
+
+
+/*
+ * These routines take care of any system-dependent initialization and
+ * cleanup required.  Here, there isn't any.
+ */
+
+GLOBAL(long)
+jpeg_mem_init (j_common_ptr cinfo)
+{
+  return 0;			/* just set max_memory_to_use to 0 */
+}
+
+GLOBAL(void)
+jpeg_mem_term (j_common_ptr cinfo)
+{
+  /* no work */
+}
diff --git a/cmake/externals/libjpeg/jmemsys.h b/cmake/externals/libjpeg/jmemsys.h
new file mode 100644
index 000000000..6c3c6d348
--- /dev/null
+++ b/cmake/externals/libjpeg/jmemsys.h
@@ -0,0 +1,198 @@
+/*
+ * jmemsys.h
+ *
+ * Copyright (C) 1992-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This include file defines the interface between the system-independent
+ * and system-dependent portions of the JPEG memory manager.  No other
+ * modules need include it.  (The system-independent portion is jmemmgr.c;
+ * there are several different versions of the system-dependent portion.)
+ *
+ * This file works as-is for the system-dependent memory managers supplied
+ * in the IJG distribution.  You may need to modify it if you write a
+ * custom memory manager.  If system-dependent changes are needed in
+ * this file, the best method is to #ifdef them based on a configuration
+ * symbol supplied in jconfig.h, as we have done with USE_MSDOS_MEMMGR
+ * and USE_MAC_MEMMGR.
+ */
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_get_small		jGetSmall
+#define jpeg_free_small		jFreeSmall
+#define jpeg_get_large		jGetLarge
+#define jpeg_free_large		jFreeLarge
+#define jpeg_mem_available	jMemAvail
+#define jpeg_open_backing_store	jOpenBackStore
+#define jpeg_mem_init		jMemInit
+#define jpeg_mem_term		jMemTerm
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/*
+ * These two functions are used to allocate and release small chunks of
+ * memory.  (Typically the total amount requested through jpeg_get_small is
+ * no more than 20K or so; this will be requested in chunks of a few K each.)
+ * Behavior should be the same as for the standard library functions malloc
+ * and free; in particular, jpeg_get_small must return NULL on failure.
+ * On most systems, these ARE malloc and free.  jpeg_free_small is passed the
+ * size of the object being freed, just in case it's needed.
+ * On an 80x86 machine using small-data memory model, these manage near heap.
+ */
+
+EXTERN(void *) jpeg_get_small JPP((j_common_ptr cinfo, size_t sizeofobject));
+EXTERN(void) jpeg_free_small JPP((j_common_ptr cinfo, void * object,
+				  size_t sizeofobject));
+
+/*
+ * These two functions are used to allocate and release large chunks of
+ * memory (up to the total free space designated by jpeg_mem_available).
+ * The interface is the same as above, except that on an 80x86 machine,
+ * far pointers are used.  On most other machines these are identical to
+ * the jpeg_get/free_small routines; but we keep them separate anyway,
+ * in case a different allocation strategy is desirable for large chunks.
+ */
+
+EXTERN(void FAR *) jpeg_get_large JPP((j_common_ptr cinfo,
+				       size_t sizeofobject));
+EXTERN(void) jpeg_free_large JPP((j_common_ptr cinfo, void FAR * object,
+				  size_t sizeofobject));
+
+/*
+ * The macro MAX_ALLOC_CHUNK designates the maximum number of bytes that may
+ * be requested in a single call to jpeg_get_large (and jpeg_get_small for that
+ * matter, but that case should never come into play).  This macro is needed
+ * to model the 64Kb-segment-size limit of far addressing on 80x86 machines.
+ * On those machines, we expect that jconfig.h will provide a proper value.
+ * On machines with 32-bit flat address spaces, any large constant may be used.
+ *
+ * NB: jmemmgr.c expects that MAX_ALLOC_CHUNK will be representable as type
+ * size_t and will be a multiple of sizeof(align_type).
+ */
+
+#ifndef MAX_ALLOC_CHUNK		/* may be overridden in jconfig.h */
+#define MAX_ALLOC_CHUNK  1000000000L
+#endif
+
+/*
+ * This routine computes the total space still available for allocation by
+ * jpeg_get_large.  If more space than this is needed, backing store will be
+ * used.  NOTE: any memory already allocated must not be counted.
+ *
+ * There is a minimum space requirement, corresponding to the minimum
+ * feasible buffer sizes; jmemmgr.c will request that much space even if
+ * jpeg_mem_available returns zero.  The maximum space needed, enough to hold
+ * all working storage in memory, is also passed in case it is useful.
+ * Finally, the total space already allocated is passed.  If no better
+ * method is available, cinfo->mem->max_memory_to_use - already_allocated
+ * is often a suitable calculation.
+ *
+ * It is OK for jpeg_mem_available to underestimate the space available
+ * (that'll just lead to more backing-store access than is really necessary).
+ * However, an overestimate will lead to failure.  Hence it's wise to subtract
+ * a slop factor from the true available space.  5% should be enough.
+ *
+ * On machines with lots of virtual memory, any large constant may be returned.
+ * Conversely, zero may be returned to always use the minimum amount of memory.
+ */
+
+EXTERN(long) jpeg_mem_available JPP((j_common_ptr cinfo,
+				     long min_bytes_needed,
+				     long max_bytes_needed,
+				     long already_allocated));
+
+
+/*
+ * This structure holds whatever state is needed to access a single
+ * backing-store object.  The read/write/close method pointers are called
+ * by jmemmgr.c to manipulate the backing-store object; all other fields
+ * are private to the system-dependent backing store routines.
+ */
+
+#define TEMP_NAME_LENGTH   64	/* max length of a temporary file's name */
+
+
+#ifdef USE_MSDOS_MEMMGR		/* DOS-specific junk */
+
+typedef unsigned short XMSH;	/* type of extended-memory handles */
+typedef unsigned short EMSH;	/* type of expanded-memory handles */
+
+typedef union {
+  short file_handle;		/* DOS file handle if it's a temp file */
+  XMSH xms_handle;		/* handle if it's a chunk of XMS */
+  EMSH ems_handle;		/* handle if it's a chunk of EMS */
+} handle_union;
+
+#endif /* USE_MSDOS_MEMMGR */
+
+#ifdef USE_MAC_MEMMGR		/* Mac-specific junk */
+#include <Files.h>
+#endif /* USE_MAC_MEMMGR */
+
+
+typedef struct backing_store_struct * backing_store_ptr;
+
+typedef struct backing_store_struct {
+  /* Methods for reading/writing/closing this backing-store object */
+  JMETHOD(void, read_backing_store, (j_common_ptr cinfo,
+				     backing_store_ptr info,
+				     void FAR * buffer_address,
+				     long file_offset, long byte_count));
+  JMETHOD(void, write_backing_store, (j_common_ptr cinfo,
+				      backing_store_ptr info,
+				      void FAR * buffer_address,
+				      long file_offset, long byte_count));
+  JMETHOD(void, close_backing_store, (j_common_ptr cinfo,
+				      backing_store_ptr info));
+
+  /* Private fields for system-dependent backing-store management */
+#ifdef USE_MSDOS_MEMMGR
+  /* For the MS-DOS manager (jmemdos.c), we need: */
+  handle_union handle;		/* reference to backing-store storage object */
+  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
+#else
+#ifdef USE_MAC_MEMMGR
+  /* For the Mac manager (jmemmac.c), we need: */
+  short temp_file;		/* file reference number to temp file */
+  FSSpec tempSpec;		/* the FSSpec for the temp file */
+  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
+#else
+  /* For a typical implementation with temp files, we need: */
+  FILE * temp_file;		/* stdio reference to temp file */
+  char temp_name[TEMP_NAME_LENGTH]; /* name of temp file */
+#endif
+#endif
+} backing_store_info;
+
+
+/*
+ * Initial opening of a backing-store object.  This must fill in the
+ * read/write/close pointers in the object.  The read/write routines
+ * may take an error exit if the specified maximum file size is exceeded.
+ * (If jpeg_mem_available always returns a large value, this routine can
+ * just take an error exit.)
+ */
+
+EXTERN(void) jpeg_open_backing_store JPP((j_common_ptr cinfo,
+					  backing_store_ptr info,
+					  long total_bytes_needed));
+
+
+/*
+ * These routines take care of any system-dependent initialization and
+ * cleanup required.  jpeg_mem_init will be called before anything is
+ * allocated (and, therefore, nothing in cinfo is of use except the error
+ * manager pointer).  It should return a suitable default value for
+ * max_memory_to_use; this may subsequently be overridden by the surrounding
+ * application.  (Note that max_memory_to_use is only important if
+ * jpeg_mem_available chooses to consult it ... no one else will.)
+ * jpeg_mem_term may assume that all requested memory has been freed and that
+ * all opened backing-store objects have been closed.
+ */
+
+EXTERN(long) jpeg_mem_init JPP((j_common_ptr cinfo));
+EXTERN(void) jpeg_mem_term JPP((j_common_ptr cinfo));
diff --git a/cmake/externals/libjpeg/jmorecfg.h b/cmake/externals/libjpeg/jmorecfg.h
new file mode 100644
index 000000000..4638d6af2
--- /dev/null
+++ b/cmake/externals/libjpeg/jmorecfg.h
@@ -0,0 +1,457 @@
+/*
+ * jmorecfg.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 1997-2022 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains additional configuration options that customize the
+ * JPEG software for special applications or support machine-dependent
+ * optimizations.  Most users will not need to touch this file.
+ */
+
+
+/*
+ * Define BITS_IN_JSAMPLE as either
+ *   8   for 8-bit sample values (the usual setting)
+ *   9   for 9-bit sample values
+ *   10  for 10-bit sample values
+ *   11  for 11-bit sample values
+ *   12  for 12-bit sample values
+ * Only 8, 9, 10, 11, and 12 bits sample data precision are supported for
+ * full-feature DCT processing.  Further depths up to 16-bit may be added
+ * later for the lossless modes of operation.
+ * Run-time selection and conversion of data precision will be added later
+ * and are currently not supported, sorry.
+ * Exception:  The transcoding part (jpegtran) supports all settings in a
+ * single instance, since it operates on the level of DCT coefficients and
+ * not sample values.  The DCT coefficients are of the same type (16 bits)
+ * in all cases (see below).
+ */
+
+#define BITS_IN_JSAMPLE  8	/* use 8, 9, 10, 11, or 12 */
+
+
+/*
+ * Maximum number of components (color channels) allowed in JPEG image.
+ * To meet the letter of the JPEG spec, set this to 255.  However, darn
+ * few applications need more than 4 channels (maybe 5 for CMYK + alpha
+ * mask).  We recommend 10 as a reasonable compromise; use 4 if you are
+ * really short on memory.  (Each allowed component costs a hundred or so
+ * bytes of storage, whether actually used in an image or not.)
+ */
+
+#define MAX_COMPONENTS  10	/* maximum number of image components */
+
+
+/*
+ * Basic data types.
+ * You may need to change these if you have a machine with unusual data
+ * type sizes; for example, "char" not 8 bits, "short" not 16 bits,
+ * or "long" not 32 bits.  We don't care whether "int" is 16 or 32 bits,
+ * but it had better be at least 16.
+ */
+
+/* Representation of a single sample (pixel element value).
+ * We frequently allocate large arrays of these, so it's important to keep
+ * them small.  But if you have memory to burn and access to char or short
+ * arrays is very slow on your hardware, you might want to change these.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+/* JSAMPLE should be the smallest type that will hold the values 0..255.
+ * You can use a signed char by having GETJSAMPLE mask it with 0xFF.
+ */
+
+#ifdef HAVE_UNSIGNED_CHAR
+
+typedef unsigned char JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#else /* not HAVE_UNSIGNED_CHAR */
+
+typedef char JSAMPLE;
+#ifdef CHAR_IS_UNSIGNED
+#define GETJSAMPLE(value)  ((int) (value))
+#else
+#define GETJSAMPLE(value)  ((int) (value) & 0xFF)
+#endif /* CHAR_IS_UNSIGNED */
+
+#endif /* HAVE_UNSIGNED_CHAR */
+
+#define MAXJSAMPLE	255
+#define CENTERJSAMPLE	128
+
+#endif /* BITS_IN_JSAMPLE == 8 */
+
+
+#if BITS_IN_JSAMPLE == 9
+/* JSAMPLE should be the smallest type that will hold the values 0..511.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	511
+#define CENTERJSAMPLE	256
+
+#endif /* BITS_IN_JSAMPLE == 9 */
+
+
+#if BITS_IN_JSAMPLE == 10
+/* JSAMPLE should be the smallest type that will hold the values 0..1023.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	1023
+#define CENTERJSAMPLE	512
+
+#endif /* BITS_IN_JSAMPLE == 10 */
+
+
+#if BITS_IN_JSAMPLE == 11
+/* JSAMPLE should be the smallest type that will hold the values 0..2047.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	2047
+#define CENTERJSAMPLE	1024
+
+#endif /* BITS_IN_JSAMPLE == 11 */
+
+
+#if BITS_IN_JSAMPLE == 12
+/* JSAMPLE should be the smallest type that will hold the values 0..4095.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	4095
+#define CENTERJSAMPLE	2048
+
+#endif /* BITS_IN_JSAMPLE == 12 */
+
+
+/* Representation of a DCT frequency coefficient.
+ * This should be a signed value of at least 16 bits; "short" is usually OK.
+ * Again, we allocate large arrays of these, but you can change to int
+ * if you have memory to burn and "short" is really slow.
+ */
+
+typedef short JCOEF;
+
+
+/* Compressed datastreams are represented as arrays of JOCTET.
+ * These must be EXACTLY 8 bits wide, at least once they are written to
+ * external storage.  Note that when using the stdio data source/destination
+ * managers, this is also the data type passed to fread/fwrite.
+ */
+
+#ifdef HAVE_UNSIGNED_CHAR
+
+typedef unsigned char JOCTET;
+#define GETJOCTET(value)  (value)
+
+#else /* not HAVE_UNSIGNED_CHAR */
+
+typedef char JOCTET;
+#ifdef CHAR_IS_UNSIGNED
+#define GETJOCTET(value)  (value)
+#else
+#define GETJOCTET(value)  ((value) & 0xFF)
+#endif /* CHAR_IS_UNSIGNED */
+
+#endif /* HAVE_UNSIGNED_CHAR */
+
+
+/* These typedefs are used for various table entries and so forth.
+ * They must be at least as wide as specified; but making them too big
+ * won't cost a huge amount of memory, so we don't provide special
+ * extraction code like we did for JSAMPLE.  (In other words, these
+ * typedefs live at a different point on the speed/space tradeoff curve.)
+ */
+
+/* UINT8 must hold at least the values 0..255. */
+
+#ifdef HAVE_UNSIGNED_CHAR
+typedef unsigned char UINT8;
+#else /* not HAVE_UNSIGNED_CHAR */
+#ifdef CHAR_IS_UNSIGNED
+typedef char UINT8;
+#else /* not CHAR_IS_UNSIGNED */
+typedef short UINT8;
+#endif /* CHAR_IS_UNSIGNED */
+#endif /* HAVE_UNSIGNED_CHAR */
+
+/* UINT16 must hold at least the values 0..65535. */
+
+#ifdef HAVE_UNSIGNED_SHORT
+typedef unsigned short UINT16;
+#else /* not HAVE_UNSIGNED_SHORT */
+typedef unsigned int UINT16;
+#endif /* HAVE_UNSIGNED_SHORT */
+
+/* INT16 must hold at least the values -32768..32767. */
+
+#ifndef XMD_H			/* X11/xmd.h correctly defines INT16 */
+typedef short INT16;
+#endif
+
+/* INT32 must hold at least signed 32-bit values. */
+
+#ifndef XMD_H			/* X11/xmd.h correctly defines INT32 */
+#ifndef _BASETSD_H_		/* Microsoft defines it in basetsd.h */
+#ifndef _BASETSD_H		/* MinGW is slightly different */
+#ifndef QGLOBAL_H		/* Qt defines it in qglobal.h */
+typedef long INT32;
+#endif
+#endif
+#endif
+#endif
+
+/* Datatype used for image dimensions.  The JPEG standard only supports
+ * images up to 64K*64K due to 16-bit fields in SOF markers.  Therefore
+ * "unsigned int" is sufficient on all machines.  However, if you need to
+ * handle larger images and you don't mind deviating from the spec, you
+ * can change this datatype.
+ */
+
+typedef unsigned int JDIMENSION;
+
+#define JPEG_MAX_DIMENSION  65500L  /* a tad under 64K to prevent overflows */
+
+
+/* These macros are used in all function definitions and extern declarations.
+ * You could modify them if you need to change function linkage conventions;
+ * in particular, you'll need to do that to make the library a Windows DLL.
+ * Another application is to make all functions global for use with debuggers
+ * or code profilers that require it.
+ */
+
+/* a function called through method pointers: */
+#define METHODDEF(type)		static type
+/* a function used only in its module: */
+#define LOCAL(type)		static type
+/* a function referenced thru EXTERNs: */
+#define GLOBAL(type)		type
+/* a reference to a GLOBAL function: */
+#define EXTERN(type)		extern type
+
+
+/* This macro is used to declare a "method", that is, a function pointer.
+ * We want to supply prototype parameters if the compiler can cope.
+ * Note that the arglist parameter must be parenthesized!
+ * Again, you can customize this if you need special linkage keywords.
+ */
+
+#ifdef HAVE_PROTOTYPES
+#define JMETHOD(type,methodname,arglist)  type (*methodname) arglist
+#else
+#define JMETHOD(type,methodname,arglist)  type (*methodname) ()
+#endif
+
+
+/* The noreturn type identifier is used to declare functions
+ * which cannot return.
+ * Compilers can thus create more optimized code and perform
+ * better checks for warnings and errors.
+ * Static analyzer tools can make improved inferences about
+ * execution paths and are prevented from giving false alerts.
+ *
+ * Unfortunately, the proposed specifications of corresponding
+ * extensions in the Dec 2011 ISO C standard revision (C11),
+ * GCC, MSVC, etc. are not viable.
+ * Thus we introduce a user defined type to declare noreturn
+ * functions at least for clarity.  A proper compiler would
+ * have a suitable noreturn type to match in place of void.
+ */
+
+#ifndef HAVE_NORETURN_T
+typedef void noreturn_t;
+#endif
+
+
+/* Here is the pseudo-keyword for declaring pointers that must be "far"
+ * on 80x86 machines.  Most of the specialized coding for 80x86 is handled
+ * by just saying "FAR *" where such a pointer is needed.  In a few places
+ * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol.
+ */
+
+#ifndef FAR
+#ifdef NEED_FAR_POINTERS
+#define FAR  far
+#else
+#define FAR
+#endif
+#endif
+
+
+/*
+ * On a few systems, type boolean and/or its values FALSE, TRUE may appear
+ * in standard header files.  Or you may have conflicts with application-
+ * specific header files that you want to include together with these files.
+ * Defining HAVE_BOOLEAN before including jpeglib.h should make it work.
+ */
+
+#ifndef HAVE_BOOLEAN
+#if defined FALSE || defined TRUE || defined QGLOBAL_H
+/* Qt3 defines FALSE and TRUE as "const" variables in qglobal.h */
+typedef int boolean;
+#ifndef FALSE			/* in case these macros already exist */
+#define FALSE	0		/* values of boolean */
+#endif
+#ifndef TRUE
+#define TRUE	1
+#endif
+#else
+typedef enum { FALSE = 0, TRUE = 1 } boolean;
+#endif
+#endif
+
+
+/*
+ * The remaining options affect code selection within the JPEG library,
+ * but they don't need to be visible to most applications using the library.
+ * To minimize application namespace pollution, the symbols won't be
+ * defined unless JPEG_INTERNALS or JPEG_INTERNAL_OPTIONS has been defined.
+ */
+
+#ifdef JPEG_INTERNALS
+#define JPEG_INTERNAL_OPTIONS
+#endif
+
+#ifdef JPEG_INTERNAL_OPTIONS
+
+
+/*
+ * These defines indicate whether to include various optional functions.
+ * Undefining some of these symbols will produce a smaller but less capable
+ * library.  Note that you can leave certain source files out of the
+ * compilation/linking process if you've #undef'd the corresponding symbols.
+ * (You may HAVE to do that if your compiler doesn't like null source files.)
+ */
+
+/* Capability options common to encoder and decoder: */
+
+#define DCT_ISLOW_SUPPORTED	/* slow but accurate integer algorithm */
+#define DCT_IFAST_SUPPORTED	/* faster, less accurate integer method */
+#define DCT_FLOAT_SUPPORTED	/* floating-point: accurate, fast on fast HW */
+
+/* Encoder capability options: */
+
+#define C_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
+#define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
+#define C_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN) */
+#define DCT_SCALING_SUPPORTED	/* Input rescaling via DCT? (Requires DCT_ISLOW) */
+#define ENTROPY_OPT_SUPPORTED	    /* Optimization of entropy coding parms? */
+/* Note: if you selected more than 8-bit data precision, it is dangerous to
+ * turn off ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only
+ * good for 8-bit precision, so arithmetic coding is recommended for higher
+ * precision.  The Huffman encoder normally uses entropy optimization to
+ * compute usable tables for higher precision.  Otherwise, you'll have to
+ * supply different default Huffman tables.
+ * The exact same statements apply for progressive JPEG: the default tables
+ * don't work for progressive mode.  (This may get fixed, however.)
+ */
+#define INPUT_SMOOTHING_SUPPORTED   /* Input image smoothing option? */
+
+/* Decoder capability options: */
+
+#define D_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
+#define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
+#define D_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN) */
+#define IDCT_SCALING_SUPPORTED	/* Output rescaling via IDCT? (Requires DCT_ISLOW) */
+#define SAVE_MARKERS_SUPPORTED	    /* jpeg_save_markers() needed? */
+#define BLOCK_SMOOTHING_SUPPORTED   /* Block smoothing? (Progressive only) */
+#undef  UPSAMPLE_SCALING_SUPPORTED  /* Output rescaling at upsample stage? */
+#define UPSAMPLE_MERGING_SUPPORTED  /* Fast path for sloppy upsampling? */
+#define QUANT_1PASS_SUPPORTED	    /* 1-pass color quantization? */
+#define QUANT_2PASS_SUPPORTED	    /* 2-pass color quantization? */
+
+/* more capability options later, no doubt */
+
+
+/*
+ * Ordering of RGB data in scanlines passed to or from the application.
+ * If your application wants to deal with data in the order B,G,R, just
+ * #define JPEG_USE_RGB_CUSTOM in jconfig.h, or define your own custom
+ * order in jconfig.h and #define JPEG_HAVE_RGB_CUSTOM.
+ * You can also deal with formats such as R,G,B,X (one extra byte per pixel)
+ * by changing RGB_PIXELSIZE.
+ * Note that changing the offsets will also change
+ * the order in which colormap data is organized.
+ * RESTRICTIONS:
+ * 1. The sample applications cjpeg,djpeg do NOT support modified RGB formats.
+ * 2. The color quantizer modules will not behave desirably if RGB_PIXELSIZE
+ *    is not 3 (they don't understand about dummy color components!).
+ *    So you can't use color quantization if you change that value.
+ */
+
+#ifndef JPEG_HAVE_RGB_CUSTOM
+#ifdef JPEG_USE_RGB_CUSTOM
+#define RGB_RED		2	/* Offset of Red in an RGB scanline element */
+#define RGB_GREEN	1	/* Offset of Green */
+#define RGB_BLUE	0	/* Offset of Blue */
+#else
+#define RGB_RED		0	/* Offset of Red in an RGB scanline element */
+#define RGB_GREEN	1	/* Offset of Green */
+#define RGB_BLUE	2	/* Offset of Blue */
+#endif
+#define RGB_PIXELSIZE	3	/* JSAMPLEs per RGB scanline element */
+#endif
+
+
+/* Definitions for speed-related optimizations. */
+
+
+/* If your compiler supports inline functions, define INLINE
+ * as the inline keyword; otherwise define it as empty.
+ */
+
+#ifndef INLINE
+#ifdef __GNUC__			/* for instance, GNU C knows about inline */
+#define INLINE __inline__
+#endif
+#ifndef INLINE
+#define INLINE			/* default is to define it as empty */
+#endif
+#endif
+
+
+/* On some machines (notably 68000 series) "int" is 32 bits, but multiplying
+ * two 16-bit shorts is faster than multiplying two ints.  Define MULTIPLIER
+ * as short on such a machine.  MULTIPLIER must be at least 16 bits wide.
+ */
+
+#ifndef MULTIPLIER
+#define MULTIPLIER  int		/* type for fastest integer multiply */
+#endif
+
+
+/* FAST_FLOAT should be either float or double, whichever is done faster
+ * by your compiler.  (Note that this type is only used in the floating point
+ * DCT routines, so it only matters if you've defined DCT_FLOAT_SUPPORTED.)
+ * Typically, float is faster in ANSI C compilers, while double is faster in
+ * pre-ANSI compilers (because they insist on converting to double anyway).
+ * The code below therefore chooses float if we have ANSI-style prototypes.
+ */
+
+#ifndef FAST_FLOAT
+#ifdef HAVE_PROTOTYPES
+#define FAST_FLOAT  float
+#else
+#define FAST_FLOAT  double
+#endif
+#endif
+
+#endif /* JPEG_INTERNAL_OPTIONS */
diff --git a/cmake/externals/libjpeg/jpegint.h b/cmake/externals/libjpeg/jpegint.h
new file mode 100644
index 000000000..3528bff5b
--- /dev/null
+++ b/cmake/externals/libjpeg/jpegint.h
@@ -0,0 +1,445 @@
+/*
+ * jpegint.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 1997-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file provides common declarations for the various JPEG modules.
+ * These declarations are considered internal to the JPEG library; most
+ * applications using the library shouldn't need to include this file.
+ */
+
+
+/* Declarations for both compression & decompression */
+
+typedef enum {			/* Operating modes for buffer controllers */
+	JBUF_PASS_THRU,		/* Plain stripwise operation */
+	/* Remaining modes require a full-image buffer to have been created */
+	JBUF_SAVE_SOURCE,	/* Run source subobject only, save output */
+	JBUF_CRANK_DEST,	/* Run dest subobject only, using saved data */
+	JBUF_SAVE_AND_PASS	/* Run both subobjects, save output */
+} J_BUF_MODE;
+
+/* Values of global_state field (jdapi.c has some dependencies on ordering!) */
+#define CSTATE_START	100	/* after create_compress */
+#define CSTATE_SCANNING	101	/* start_compress done, write_scanlines OK */
+#define CSTATE_RAW_OK	102	/* start_compress done, write_raw_data OK */
+#define CSTATE_WRCOEFS	103	/* jpeg_write_coefficients done */
+#define DSTATE_START	200	/* after create_decompress */
+#define DSTATE_INHEADER	201	/* reading header markers, no SOS yet */
+#define DSTATE_READY	202	/* found SOS, ready for start_decompress */
+#define DSTATE_PRELOAD	203	/* reading multiscan file in start_decompress*/
+#define DSTATE_PRESCAN	204	/* performing dummy pass for 2-pass quant */
+#define DSTATE_SCANNING	205	/* start_decompress done, read_scanlines OK */
+#define DSTATE_RAW_OK	206	/* start_decompress done, read_raw_data OK */
+#define DSTATE_BUFIMAGE	207	/* expecting jpeg_start_output */
+#define DSTATE_BUFPOST	208	/* looking for SOS/EOI in jpeg_finish_output */
+#define DSTATE_RDCOEFS	209	/* reading file in jpeg_read_coefficients */
+#define DSTATE_STOPPING	210	/* looking for EOI in jpeg_finish_decompress */
+
+
+/* Declarations for compression modules */
+
+/* Master control module */
+struct jpeg_comp_master {
+  JMETHOD(void, prepare_for_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, pass_startup, (j_compress_ptr cinfo));
+  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean call_pass_startup;	/* True if pass_startup must be called */
+  boolean is_last_pass;		/* True during last pass */
+};
+
+/* Main buffer control (downsampled-data buffer) */
+struct jpeg_c_main_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, process_data, (j_compress_ptr cinfo,
+			       JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+			       JDIMENSION in_rows_avail));
+};
+
+/* Compression preprocessing (downsampling input buffer control) */
+struct jpeg_c_prep_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, pre_process_data, (j_compress_ptr cinfo,
+				   JSAMPARRAY input_buf,
+				   JDIMENSION *in_row_ctr,
+				   JDIMENSION in_rows_avail,
+				   JSAMPIMAGE output_buf,
+				   JDIMENSION *out_row_group_ctr,
+				   JDIMENSION out_row_groups_avail));
+};
+
+/* Coefficient buffer control */
+struct jpeg_c_coef_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(boolean, compress_data, (j_compress_ptr cinfo,
+				   JSAMPIMAGE input_buf));
+};
+
+/* Colorspace conversion */
+struct jpeg_color_converter {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, color_convert, (j_compress_ptr cinfo,
+				JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+				JDIMENSION output_row, int num_rows));
+};
+
+/* Downsampling */
+struct jpeg_downsampler {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, downsample, (j_compress_ptr cinfo,
+			     JSAMPIMAGE input_buf, JDIMENSION in_row_index,
+			     JSAMPIMAGE output_buf,
+			     JDIMENSION out_row_group_index));
+
+  boolean need_context_rows;	/* TRUE if need rows above & below */
+};
+
+/* Forward DCT (also controls coefficient quantization) */
+typedef JMETHOD(void, forward_DCT_ptr,
+		(j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+		 JDIMENSION start_col, JDIMENSION num_blocks));
+
+struct jpeg_forward_dct {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  /* It is useful to allow each component to have a separate FDCT method. */
+  forward_DCT_ptr forward_DCT[MAX_COMPONENTS];
+};
+
+/* Entropy encoding */
+struct jpeg_entropy_encoder {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, boolean gather_statistics));
+  JMETHOD(boolean, encode_mcu, (j_compress_ptr cinfo, JBLOCKARRAY MCU_data));
+  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
+};
+
+/* Marker writing */
+struct jpeg_marker_writer {
+  JMETHOD(void, write_file_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_frame_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_scan_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_file_trailer, (j_compress_ptr cinfo));
+  JMETHOD(void, write_tables_only, (j_compress_ptr cinfo));
+  /* These routines are exported to allow insertion of extra markers */
+  /* Probably only COM and APPn markers should be written this way */
+  JMETHOD(void, write_marker_header, (j_compress_ptr cinfo, int marker,
+				      unsigned int datalen));
+  JMETHOD(void, write_marker_byte, (j_compress_ptr cinfo, int val));
+};
+
+
+/* Declarations for decompression modules */
+
+/* Master control module */
+struct jpeg_decomp_master {
+  JMETHOD(void, prepare_for_output_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, finish_output_pass, (j_decompress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean is_dummy_pass;	/* True during 1st pass for 2-pass quant */
+};
+
+/* Input control module */
+struct jpeg_input_controller {
+  JMETHOD(int, consume_input, (j_decompress_ptr cinfo));
+  JMETHOD(void, reset_input_controller, (j_decompress_ptr cinfo));
+  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, finish_input_pass, (j_decompress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean has_multiple_scans;	/* True if file has multiple scans */
+  boolean eoi_reached;		/* True when EOI has been consumed */
+};
+
+/* Main buffer control (downsampled-data buffer) */
+struct jpeg_d_main_controller {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, process_data, (j_decompress_ptr cinfo,
+			       JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			       JDIMENSION out_rows_avail));
+};
+
+/* Coefficient buffer control */
+struct jpeg_d_coef_controller {
+  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
+  JMETHOD(int, consume_data, (j_decompress_ptr cinfo));
+  JMETHOD(void, start_output_pass, (j_decompress_ptr cinfo));
+  JMETHOD(int, decompress_data, (j_decompress_ptr cinfo,
+				 JSAMPIMAGE output_buf));
+  /* Pointer to array of coefficient virtual arrays, or NULL if none */
+  jvirt_barray_ptr *coef_arrays;
+};
+
+/* Decompression postprocessing (color quantization buffer control) */
+struct jpeg_d_post_controller {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, post_process_data, (j_decompress_ptr cinfo,
+				    JSAMPIMAGE input_buf,
+				    JDIMENSION *in_row_group_ctr,
+				    JDIMENSION in_row_groups_avail,
+				    JSAMPARRAY output_buf,
+				    JDIMENSION *out_row_ctr,
+				    JDIMENSION out_rows_avail));
+};
+
+/* Marker reading & parsing */
+struct jpeg_marker_reader {
+  JMETHOD(void, reset_marker_reader, (j_decompress_ptr cinfo));
+  /* Read markers until SOS or EOI.
+   * Returns same codes as are defined for jpeg_consume_input:
+   * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+   */
+  JMETHOD(int, read_markers, (j_decompress_ptr cinfo));
+  /* Read a restart marker --- exported for use by entropy decoder only */
+  jpeg_marker_parser_method read_restart_marker;
+
+  /* State of marker reader --- nominally internal, but applications
+   * supplying COM or APPn handlers might like to know the state.
+   */
+  boolean saw_SOI;		/* found SOI? */
+  boolean saw_SOF;		/* found SOF? */
+  int next_restart_num;		/* next restart number expected (0-7) */
+  unsigned int discarded_bytes;	/* # of bytes skipped looking for a marker */
+};
+
+/* Entropy decoding */
+struct jpeg_entropy_decoder {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data));
+  JMETHOD(void, finish_pass, (j_decompress_ptr cinfo));
+};
+
+/* Inverse DCT (also performs dequantization) */
+typedef JMETHOD(void, inverse_DCT_method_ptr,
+		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col));
+
+struct jpeg_inverse_dct {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  /* It is useful to allow each component to have a separate IDCT method. */
+  inverse_DCT_method_ptr inverse_DCT[MAX_COMPONENTS];
+};
+
+/* Upsampling (note that upsampler must also call color converter) */
+struct jpeg_upsampler {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, upsample, (j_decompress_ptr cinfo,
+			   JSAMPIMAGE input_buf,
+			   JDIMENSION *in_row_group_ctr,
+			   JDIMENSION in_row_groups_avail,
+			   JSAMPARRAY output_buf,
+			   JDIMENSION *out_row_ctr,
+			   JDIMENSION out_rows_avail));
+
+  boolean need_context_rows;	/* TRUE if need rows above & below */
+};
+
+/* Colorspace conversion */
+struct jpeg_color_deconverter {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, color_convert, (j_decompress_ptr cinfo,
+				JSAMPIMAGE input_buf, JDIMENSION input_row,
+				JSAMPARRAY output_buf, int num_rows));
+};
+
+/* Color quantization or color precision reduction */
+struct jpeg_color_quantizer {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, boolean is_pre_scan));
+  JMETHOD(void, color_quantize, (j_decompress_ptr cinfo,
+				 JSAMPARRAY input_buf, JSAMPARRAY output_buf,
+				 int num_rows));
+  JMETHOD(void, finish_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, new_color_map, (j_decompress_ptr cinfo));
+};
+
+
+/* Definition of range extension bits for decompression processes.
+ * See the comments with prepare_range_limit_table (in jdmaster.c)
+ * for more info.
+ * The recommended default value for normal applications is 2.
+ * Applications with special requirements may use a different value.
+ * For example, Ghostscript wants to use 3 for proper handling of
+ * wacky images with oversize coefficient values.
+ */
+
+#define RANGE_BITS	2
+#define RANGE_CENTER	(CENTERJSAMPLE << RANGE_BITS)
+
+
+/* Miscellaneous useful macros */
+
+#undef MAX
+#define MAX(a,b)	((a) > (b) ? (a) : (b))
+#undef MIN
+#define MIN(a,b)	((a) < (b) ? (a) : (b))
+
+
+/* We assume that right shift corresponds to signed division by 2 with
+ * rounding towards minus infinity.  This is correct for typical "arithmetic
+ * shift" instructions that shift in copies of the sign bit.  But some
+ * C compilers implement >> with an unsigned shift.  For these machines you
+ * must define RIGHT_SHIFT_IS_UNSIGNED.
+ * RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity.
+ * It is only applied with constant shift counts.  SHIFT_TEMPS must be
+ * included in the variables of any routine using RIGHT_SHIFT.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define SHIFT_TEMPS	INT32 shift_temp;
+#define RIGHT_SHIFT(x,shft)  \
+	((shift_temp = (x)) < 0 ? \
+	 (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \
+	 (shift_temp >> (shft)))
+#else
+#define SHIFT_TEMPS
+#define RIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+/* Descale and correctly round an INT32 value that's scaled by N bits.
+ * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
+ * the fudge factor is correct for either sign of X.
+ */
+
+#define DESCALE(x,n)	RIGHT_SHIFT((x) + ((INT32) 1 << ((n)-1)), n)
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jinit_compress_master	jICompress
+#define jinit_c_master_control	jICMaster
+#define jinit_c_main_controller	jICMainC
+#define jinit_c_prep_controller	jICPrepC
+#define jinit_c_coef_controller	jICCoefC
+#define jinit_color_converter	jICColor
+#define jinit_downsampler	jIDownsampler
+#define jinit_forward_dct	jIFDCT
+#define jinit_huff_encoder	jIHEncoder
+#define jinit_arith_encoder	jIAEncoder
+#define jinit_marker_writer	jIMWriter
+#define jinit_master_decompress	jIDMaster
+#define jinit_d_main_controller	jIDMainC
+#define jinit_d_coef_controller	jIDCoefC
+#define jinit_d_post_controller	jIDPostC
+#define jinit_input_controller	jIInCtlr
+#define jinit_marker_reader	jIMReader
+#define jinit_huff_decoder	jIHDecoder
+#define jinit_arith_decoder	jIADecoder
+#define jinit_inverse_dct	jIIDCT
+#define jinit_upsampler		jIUpsampler
+#define jinit_color_deconverter	jIDColor
+#define jinit_1pass_quantizer	jI1Quant
+#define jinit_2pass_quantizer	jI2Quant
+#define jinit_merged_upsampler	jIMUpsampler
+#define jinit_memory_mgr	jIMemMgr
+#define jdiv_round_up		jDivRound
+#define jround_up		jRound
+#define jzero_far		jZeroFar
+#define jcopy_sample_rows	jCopySamples
+#define jcopy_block_row		jCopyBlocks
+#define jpeg_zigzag_order	jZIGTable
+#define jpeg_natural_order	jZAGTable
+#define jpeg_natural_order7	jZAG7Table
+#define jpeg_natural_order6	jZAG6Table
+#define jpeg_natural_order5	jZAG5Table
+#define jpeg_natural_order4	jZAG4Table
+#define jpeg_natural_order3	jZAG3Table
+#define jpeg_natural_order2	jZAG2Table
+#define jpeg_aritab		jAriTab
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays
+ * and coefficient-block arrays.  This won't work on 80x86 because the arrays
+ * are FAR and we're assuming a small-pointer memory model.  However, some
+ * DOS compilers provide far-pointer versions of memcpy() and memset() even
+ * in the small-model libraries.  These will be used if USE_FMEM is defined.
+ * Otherwise, the routines in jutils.c do it the hard way.
+ */
+
+#ifndef NEED_FAR_POINTERS	/* normal case, same as regular macro */
+#define FMEMZERO(target,size)	MEMZERO(target,size)
+#else				/* 80x86 case */
+#ifdef USE_FMEM
+#define FMEMZERO(target,size)	_fmemset((void FAR *)(target), 0, (size_t)(size))
+#else
+EXTERN(void) jzero_far JPP((void FAR * target, size_t bytestozero));
+#define FMEMZERO(target,size)	jzero_far(target, size)
+#endif
+#endif
+
+
+/* Compression module initialization routines */
+EXTERN(void) jinit_compress_master JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_c_master_control JPP((j_compress_ptr cinfo,
+					 boolean transcode_only));
+EXTERN(void) jinit_c_main_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_c_prep_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_c_coef_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_color_converter JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_downsampler JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_huff_encoder JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_arith_encoder JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_marker_writer JPP((j_compress_ptr cinfo));
+/* Decompression module initialization routines */
+EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_d_main_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_d_coef_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_d_post_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_input_controller JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_arith_decoder JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_color_deconverter JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_1pass_quantizer JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_2pass_quantizer JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_merged_upsampler JPP((j_decompress_ptr cinfo));
+/* Memory manager initialization */
+EXTERN(void) jinit_memory_mgr JPP((j_common_ptr cinfo));
+
+/* Utility routines in jutils.c */
+EXTERN(long) jdiv_round_up JPP((long a, long b));
+EXTERN(long) jround_up JPP((long a, long b));
+EXTERN(void) jcopy_sample_rows JPP((JSAMPARRAY input_array,
+				    JSAMPARRAY output_array,
+				    int num_rows, JDIMENSION num_cols));
+EXTERN(void) jcopy_block_row JPP((JBLOCKROW input_row, JBLOCKROW output_row,
+				  JDIMENSION num_blocks));
+/* Constant tables in jutils.c */
+#if 0				/* This table is not actually needed in v6a */
+extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */
+#endif
+extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */
+extern const int jpeg_natural_order7[]; /* zz to natural order for 7x7 block */
+extern const int jpeg_natural_order6[]; /* zz to natural order for 6x6 block */
+extern const int jpeg_natural_order5[]; /* zz to natural order for 5x5 block */
+extern const int jpeg_natural_order4[]; /* zz to natural order for 4x4 block */
+extern const int jpeg_natural_order3[]; /* zz to natural order for 3x3 block */
+extern const int jpeg_natural_order2[]; /* zz to natural order for 2x2 block */
+
+/* Arithmetic coding probability estimation tables in jaricom.c */
+extern const INT32 jpeg_aritab[];
+
+/* Suppress undefined-structure complaints if necessary. */
+
+#ifdef INCOMPLETE_TYPES_BROKEN
+#ifndef AM_MEMORY_MANAGER	/* only jmemmgr.c defines these */
+struct jvirt_sarray_control { long dummy; };
+struct jvirt_barray_control { long dummy; };
+#endif
+#endif /* INCOMPLETE_TYPES_BROKEN */
diff --git a/cmake/externals/libjpeg/jpeglib.h b/cmake/externals/libjpeg/jpeglib.h
new file mode 100644
index 000000000..e7e15ab2c
--- /dev/null
+++ b/cmake/externals/libjpeg/jpeglib.h
@@ -0,0 +1,1183 @@
+/*
+ * jpeglib.h
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modified 2002-2022 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file defines the application interface for the JPEG library.
+ * Most applications using the library need only include this file,
+ * and perhaps jerror.h if they want to know the exact error codes.
+ */
+
+#ifndef JPEGLIB_H
+#define JPEGLIB_H
+
+/*
+ * First we include the configuration files that record how this
+ * installation of the JPEG library is set up.  jconfig.h can be
+ * generated automatically for many systems.  jmorecfg.h contains
+ * manual configuration options that most people need not worry about.
+ */
+
+#ifndef JCONFIG_INCLUDED	/* in case jinclude.h already did */
+#include "jconfig.h"		/* widely used configuration options */
+#endif
+#include "jmorecfg.h"		/* seldom changed options */
+
+
+#ifdef __cplusplus
+#ifndef DONT_USE_EXTERN_C
+extern "C" {
+#endif
+#endif
+
+/* Version IDs for the JPEG library.
+ * Might be useful for tests like "#if JPEG_LIB_VERSION >= 90".
+ */
+
+#define JPEG_LIB_VERSION        90	/* Compatibility version 9.0 */
+#define JPEG_LIB_VERSION_MAJOR  9
+#define JPEG_LIB_VERSION_MINOR  6
+
+
+/* Various constants determining the sizes of things.
+ * All of these are specified by the JPEG standard,
+ * so don't change them if you want to be compatible.
+ */
+
+#define DCTSIZE		    8	/* The basic DCT block is 8x8 coefficients */
+#define DCTSIZE2	    64	/* DCTSIZE squared; # of elements in a block */
+#define NUM_QUANT_TBLS      4	/* Quantization tables are numbered 0..3 */
+#define NUM_HUFF_TBLS       4	/* Huffman tables are numbered 0..3 */
+#define NUM_ARITH_TBLS      16	/* Arith-coding tables are numbered 0..15 */
+#define MAX_COMPS_IN_SCAN   4	/* JPEG limit on # of components in one scan */
+#define MAX_SAMP_FACTOR     4	/* JPEG limit on sampling factors */
+/* Unfortunately, some bozo at Adobe saw no reason to be bound by the standard;
+ * the PostScript DCT filter can emit files with many more than 10 blocks/MCU.
+ * If you happen to run across such a file, you can up D_MAX_BLOCKS_IN_MCU
+ * to handle it.  We even let you do this from the jconfig.h file.  However,
+ * we strongly discourage changing C_MAX_BLOCKS_IN_MCU; just because Adobe
+ * sometimes emits noncompliant files doesn't mean you should too.
+ */
+#define C_MAX_BLOCKS_IN_MCU   10 /* compressor's limit on blocks per MCU */
+#ifndef D_MAX_BLOCKS_IN_MCU
+#define D_MAX_BLOCKS_IN_MCU   10 /* decompressor's limit on blocks per MCU */
+#endif
+
+
+/* Data structures for images (arrays of samples and of DCT coefficients).
+ * On 80x86 machines, the image arrays are too big for near pointers,
+ * but the pointer arrays can fit in near memory.
+ */
+
+typedef JSAMPLE FAR *JSAMPROW;	/* ptr to one image row of pixel samples. */
+typedef JSAMPROW *JSAMPARRAY;	/* ptr to some rows (a 2-D sample array) */
+typedef JSAMPARRAY *JSAMPIMAGE;	/* a 3-D sample array: top index is color */
+
+typedef JCOEF JBLOCK[DCTSIZE2];	/* one block of coefficients */
+typedef JBLOCK FAR *JBLOCKROW;	/* pointer to one row of coefficient blocks */
+typedef JBLOCKROW *JBLOCKARRAY;		/* a 2-D array of coefficient blocks */
+typedef JBLOCKARRAY *JBLOCKIMAGE;	/* a 3-D array of coefficient blocks */
+
+typedef JCOEF FAR *JCOEFPTR;	/* useful in a couple of places */
+
+
+/* Types for JPEG compression parameters and working tables. */
+
+
+/* DCT coefficient quantization tables. */
+
+typedef struct {
+  /* This array gives the coefficient quantizers in natural array order
+   * (not the zigzag order in which they are stored in a JPEG DQT marker).
+   * CAUTION: IJG versions prior to v6a kept this array in zigzag order.
+   */
+  UINT16 quantval[DCTSIZE2];	/* quantization step for each coefficient */
+  /* This field is used only during compression.  It's initialized FALSE when
+   * the table is created, and set TRUE when it's been output to the file.
+   * You could suppress output of a table by setting this to TRUE.
+   * (See jpeg_suppress_tables for an example.)
+   */
+  boolean sent_table;		/* TRUE when table has been output */
+} JQUANT_TBL;
+
+
+/* Huffman coding tables. */
+
+typedef struct {
+  /* These two fields directly represent the contents of a JPEG DHT marker */
+  UINT8 bits[17];		/* bits[k] = # of symbols with codes of */
+				/* length k bits; bits[0] is unused */
+  UINT8 huffval[256];		/* The symbols, in order of incr code length */
+  /* This field is used only during compression.  It's initialized FALSE when
+   * the table is created, and set TRUE when it's been output to the file.
+   * You could suppress output of a table by setting this to TRUE.
+   * (See jpeg_suppress_tables for an example.)
+   */
+  boolean sent_table;		/* TRUE when table has been output */
+} JHUFF_TBL;
+
+
+/* Basic info about one component (color channel). */
+
+typedef struct {
+  /* These values are fixed over the whole image. */
+  /* For compression, they must be supplied by parameter setup; */
+  /* for decompression, they are read from the SOF marker. */
+  int component_id;		/* identifier for this component (0..255) */
+  int component_index;		/* its index in SOF or cinfo->comp_info[] */
+  int h_samp_factor;		/* horizontal sampling factor (1..4) */
+  int v_samp_factor;		/* vertical sampling factor (1..4) */
+  int quant_tbl_no;		/* quantization table selector (0..3) */
+  /* These values may vary between scans. */
+  /* For compression, they must be supplied by parameter setup; */
+  /* for decompression, they are read from the SOS marker. */
+  /* The decompressor output side may not use these variables. */
+  int dc_tbl_no;		/* DC entropy table selector (0..3) */
+  int ac_tbl_no;		/* AC entropy table selector (0..3) */
+
+  /* Remaining fields should be treated as private by applications. */
+
+  /* These values are computed during compression or decompression startup: */
+  /* Component's size in DCT blocks.
+   * Any dummy blocks added to complete an MCU are not counted; therefore
+   * these values do not depend on whether a scan is interleaved or not.
+   */
+  JDIMENSION width_in_blocks;
+  JDIMENSION height_in_blocks;
+  /* Size of a DCT block in samples,
+   * reflecting any scaling we choose to apply during the DCT step.
+   * Values from 1 to 16 are supported.
+   * Note that different components may receive different DCT scalings.
+   */
+  int DCT_h_scaled_size;
+  int DCT_v_scaled_size;
+  /* The downsampled dimensions are the component's actual, unpadded number
+   * of samples at the main buffer (preprocessing/compression interface);
+   * DCT scaling is included, so
+   * downsampled_width =
+   *   ceil(image_width * Hi/Hmax * DCT_h_scaled_size/block_size)
+   * and similarly for height.
+   */
+  JDIMENSION downsampled_width;	 /* actual width in samples */
+  JDIMENSION downsampled_height; /* actual height in samples */
+  /* For decompression, in cases where some of the components will be
+   * ignored (eg grayscale output from YCbCr image), we can skip most
+   * computations for the unused components.
+   * For compression, some of the components will need further quantization
+   * scale by factor of 2 after DCT (eg BG_YCC output from normal RGB input).
+   * The field is first set TRUE for decompression, FALSE for compression
+   * in initial_setup, and then adapted in color conversion setup.
+   */
+  boolean component_needed;
+
+  /* These values are computed before starting a scan of the component. */
+  /* The decompressor output side may not use these variables. */
+  int MCU_width;		/* number of blocks per MCU, horizontally */
+  int MCU_height;		/* number of blocks per MCU, vertically */
+  int MCU_blocks;		/* MCU_width * MCU_height */
+  int MCU_sample_width;	/* MCU width in samples: MCU_width * DCT_h_scaled_size */
+  int last_col_width;		/* # of non-dummy blocks across in last MCU */
+  int last_row_height;		/* # of non-dummy blocks down in last MCU */
+
+  /* Saved quantization table for component; NULL if none yet saved.
+   * See jdinput.c comments about the need for this information.
+   * This field is currently used only for decompression.
+   */
+  JQUANT_TBL * quant_table;
+
+  /* Private per-component storage for DCT or IDCT subsystem. */
+  void * dct_table;
+} jpeg_component_info;
+
+
+/* The script for encoding a multiple-scan file is an array of these: */
+
+typedef struct {
+  int comps_in_scan;		/* number of components encoded in this scan */
+  int component_index[MAX_COMPS_IN_SCAN]; /* their SOF/comp_info[] indexes */
+  int Ss, Se;			/* progressive JPEG spectral selection parms */
+  int Ah, Al;			/* progressive JPEG successive approx. parms */
+} jpeg_scan_info;
+
+/* The decompressor can save APPn and COM markers in a list of these: */
+
+typedef struct jpeg_marker_struct FAR * jpeg_saved_marker_ptr;
+
+struct jpeg_marker_struct {
+  jpeg_saved_marker_ptr next;	/* next in list, or NULL */
+  UINT8 marker;			/* marker code: JPEG_COM, or JPEG_APP0+n */
+  unsigned int original_length;	/* # bytes of data in the file */
+  unsigned int data_length;	/* # bytes of data saved at data[] */
+  JOCTET FAR * data;		/* the data contained in the marker */
+  /* the marker length word is not counted in data_length or original_length */
+};
+
+/* Known color spaces. */
+
+typedef enum {
+	JCS_UNKNOWN,		/* error/unspecified */
+	JCS_GRAYSCALE,		/* monochrome */
+	JCS_RGB,		/* red/green/blue, standard RGB (sRGB) */
+	JCS_YCbCr,		/* Y/Cb/Cr (also known as YUV), standard YCC */
+	JCS_CMYK,		/* C/M/Y/K */
+	JCS_YCCK,		/* Y/Cb/Cr/K */
+	JCS_BG_RGB,		/* big gamut red/green/blue, bg-sRGB */
+	JCS_BG_YCC		/* big gamut Y/Cb/Cr, bg-sYCC */
+} J_COLOR_SPACE;
+
+/* Supported color transforms. */
+
+typedef enum {
+	JCT_NONE           = 0,
+	JCT_SUBTRACT_GREEN = 1
+} J_COLOR_TRANSFORM;
+
+/* DCT/IDCT algorithm options. */
+
+typedef enum {
+	JDCT_ISLOW,		/* slow but accurate integer algorithm */
+	JDCT_IFAST,		/* faster, less accurate integer method */
+	JDCT_FLOAT		/* floating-point: accurate, fast on fast HW */
+} J_DCT_METHOD;
+
+#ifndef JDCT_DEFAULT		/* may be overridden in jconfig.h */
+#define JDCT_DEFAULT  JDCT_ISLOW
+#endif
+#ifndef JDCT_FASTEST		/* may be overridden in jconfig.h */
+#define JDCT_FASTEST  JDCT_IFAST
+#endif
+
+/* Dithering options for decompression. */
+
+typedef enum {
+	JDITHER_NONE,		/* no dithering */
+	JDITHER_ORDERED,	/* simple ordered dither */
+	JDITHER_FS		/* Floyd-Steinberg error diffusion dither */
+} J_DITHER_MODE;
+
+
+/* Common fields between JPEG compression and decompression master structs. */
+
+#define jpeg_common_fields \
+  struct jpeg_error_mgr * err;	/* Error handler module */\
+  struct jpeg_memory_mgr * mem;	/* Memory manager module */\
+  struct jpeg_progress_mgr * progress; /* Progress monitor, or NULL if none */\
+  void * client_data;		/* Available for use by application */\
+  boolean is_decompressor;	/* So common code can tell which is which */\
+  int global_state		/* For checking call sequence validity */
+
+/* Routines that are to be used by both halves of the library are declared
+ * to receive a pointer to this structure.  There are no actual instances of
+ * jpeg_common_struct, only of jpeg_compress_struct and jpeg_decompress_struct.
+ */
+struct jpeg_common_struct {
+  jpeg_common_fields;		/* Fields common to both master struct types */
+  /* Additional fields follow in an actual jpeg_compress_struct or
+   * jpeg_decompress_struct.  All three structs must agree on these
+   * initial fields!  (This would be a lot cleaner in C++.)
+   */
+};
+
+typedef struct jpeg_common_struct * j_common_ptr;
+typedef struct jpeg_compress_struct * j_compress_ptr;
+typedef struct jpeg_decompress_struct * j_decompress_ptr;
+
+
+/* Master record for a compression instance */
+
+struct jpeg_compress_struct {
+  jpeg_common_fields;		/* Fields shared with jpeg_decompress_struct */
+
+  /* Destination for compressed data */
+  struct jpeg_destination_mgr * dest;
+
+  /* Description of source image --- these fields must be filled in by
+   * outer application before starting compression.  in_color_space must
+   * be correct before you can even call jpeg_set_defaults().
+   */
+
+  JDIMENSION image_width;	/* input image width */
+  JDIMENSION image_height;	/* input image height */
+  int input_components;		/* # of color components in input image */
+  J_COLOR_SPACE in_color_space;	/* colorspace of input image */
+
+  double input_gamma;		/* image gamma of input image */
+
+  /* Compression parameters --- these fields must be set before calling
+   * jpeg_start_compress().  We recommend calling jpeg_set_defaults() to
+   * initialize everything to reasonable defaults, then changing anything
+   * the application specifically wants to change.  That way you won't get
+   * burnt when new parameters are added.  Also note that there are several
+   * helper routines to simplify changing parameters.
+   */
+
+  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
+
+  JDIMENSION jpeg_width;	/* scaled JPEG image width */
+  JDIMENSION jpeg_height;	/* scaled JPEG image height */
+  /* Dimensions of actual JPEG image that will be written to file,
+   * derived from input dimensions by scaling factors above.
+   * These fields are computed by jpeg_start_compress().
+   * You can also use jpeg_calc_jpeg_dimensions() to determine these values
+   * in advance of calling jpeg_start_compress().
+   */
+
+  int data_precision;		/* bits of precision in image data */
+
+  int num_components;		/* # of color components in JPEG image */
+  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
+
+  jpeg_component_info * comp_info;
+  /* comp_info[i] describes component that appears i'th in SOF */
+
+  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
+  int q_scale_factor[NUM_QUANT_TBLS];
+  /* ptrs to coefficient quantization tables, or NULL if not defined,
+   * and corresponding scale factors (percentage, initialized 100).
+   */
+
+  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  /* ptrs to Huffman coding tables, or NULL if not defined */
+
+  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
+  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
+  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
+
+  int num_scans;		/* # of entries in scan_info array */
+  const jpeg_scan_info * scan_info; /* script for multi-scan file, or NULL */
+  /* The default value of scan_info is NULL, which causes a single-scan
+   * sequential JPEG file to be emitted.  To create a multi-scan file,
+   * set num_scans and scan_info to point to an array of scan definitions.
+   */
+
+  boolean raw_data_in;		/* TRUE=caller supplies downsampled data */
+  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
+  boolean optimize_coding;	/* TRUE=optimize entropy encoding parms */
+  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
+  boolean do_fancy_downsampling; /* TRUE=apply fancy downsampling */
+  int smoothing_factor;		/* 1..100, or 0 for no input smoothing */
+  J_DCT_METHOD dct_method;	/* DCT algorithm selector */
+
+  /* The restart interval can be specified in absolute MCUs by setting
+   * restart_interval, or in MCU rows by setting restart_in_rows
+   * (in which case the correct restart_interval will be figured
+   * for each scan).
+   */
+  unsigned int restart_interval; /* MCUs per restart, or 0 for no restart */
+  int restart_in_rows;		/* if > 0, MCU rows per restart interval */
+
+  /* Parameters controlling emission of special markers. */
+
+  boolean write_JFIF_header;	/* should a JFIF marker be written? */
+  UINT8 JFIF_major_version;	/* What to write for the JFIF version number */
+  UINT8 JFIF_minor_version;
+  /* These three values are not used by the JPEG code, merely copied */
+  /* into the JFIF APP0 marker.  density_unit can be 0 for unknown, */
+  /* 1 for dots/inch, or 2 for dots/cm.  Note that the pixel aspect */
+  /* ratio is defined by X_density/Y_density even when density_unit=0. */
+  UINT8 density_unit;		/* JFIF code for pixel size units */
+  UINT16 X_density;		/* Horizontal pixel density */
+  UINT16 Y_density;		/* Vertical pixel density */
+  boolean write_Adobe_marker;	/* should an Adobe marker be written? */
+
+  J_COLOR_TRANSFORM color_transform;
+  /* Color transform identifier, writes LSE marker if nonzero */
+
+  /* State variable: index of next scanline to be written to
+   * jpeg_write_scanlines().  Application may use this to control its
+   * processing loop, e.g., "while (next_scanline < image_height)".
+   */
+
+  JDIMENSION next_scanline;	/* 0 .. image_height-1  */
+
+  /* Remaining fields are known throughout compressor, but generally
+   * should not be touched by a surrounding application.
+   */
+
+  /*
+   * These fields are computed during compression startup
+   */
+  boolean progressive_mode;	/* TRUE if scan script uses progressive mode */
+  int max_h_samp_factor;	/* largest h_samp_factor */
+  int max_v_samp_factor;	/* largest v_samp_factor */
+
+  int min_DCT_h_scaled_size;	/* smallest DCT_h_scaled_size of any component */
+  int min_DCT_v_scaled_size;	/* smallest DCT_v_scaled_size of any component */
+
+  JDIMENSION total_iMCU_rows;	/* # of iMCU rows to be input to coef ctlr */
+  /* The coefficient controller receives data in units of MCU rows as defined
+   * for fully interleaved scans (whether the JPEG file is interleaved or not).
+   * There are v_samp_factor * DCT_v_scaled_size sample rows of each component
+   * in an "iMCU" (interleaved MCU) row.
+   */
+
+  /*
+   * These fields are valid during any one scan.
+   * They describe the components and MCUs actually appearing in the scan.
+   */
+  int comps_in_scan;		/* # of JPEG components in this scan */
+  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
+  /* *cur_comp_info[i] describes component that appears i'th in SOS */
+
+  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
+
+  int blocks_in_MCU;		/* # of DCT blocks per MCU */
+  int MCU_membership[C_MAX_BLOCKS_IN_MCU];
+  /* MCU_membership[i] is index in cur_comp_info of component owning */
+  /* i'th block in an MCU */
+
+  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
+
+  int block_size;		/* the basic DCT block size: 1..16 */
+  const int * natural_order;	/* natural-order position array */
+  int lim_Se;			/* min( Se, DCTSIZE2-1 ) */
+
+  /*
+   * Links to compression subobjects (methods and private variables of modules)
+   */
+  struct jpeg_comp_master * master;
+  struct jpeg_c_main_controller * main;
+  struct jpeg_c_prep_controller * prep;
+  struct jpeg_c_coef_controller * coef;
+  struct jpeg_marker_writer * marker;
+  struct jpeg_color_converter * cconvert;
+  struct jpeg_downsampler * downsample;
+  struct jpeg_forward_dct * fdct;
+  struct jpeg_entropy_encoder * entropy;
+  jpeg_scan_info * script_space; /* workspace for jpeg_simple_progression */
+  int script_space_size;
+};
+
+
+/* Master record for a decompression instance */
+
+struct jpeg_decompress_struct {
+  jpeg_common_fields;		/* Fields shared with jpeg_compress_struct */
+
+  /* Source of compressed data */
+  struct jpeg_source_mgr * src;
+
+  /* Basic description of image --- filled in by jpeg_read_header(). */
+  /* Application may inspect these values to decide how to process image. */
+
+  JDIMENSION image_width;	/* nominal image width (from SOF marker) */
+  JDIMENSION image_height;	/* nominal image height */
+  int num_components;		/* # of color components in JPEG image */
+  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
+
+  /* Decompression processing parameters --- these fields must be set before
+   * calling jpeg_start_decompress().  Note that jpeg_read_header() initializes
+   * them to default values.
+   */
+
+  J_COLOR_SPACE out_color_space; /* colorspace for output */
+
+  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
+
+  double output_gamma;		/* image gamma wanted in output */
+
+  boolean buffered_image;	/* TRUE=multiple output passes */
+  boolean raw_data_out;		/* TRUE=downsampled data wanted */
+
+  J_DCT_METHOD dct_method;	/* IDCT algorithm selector */
+  boolean do_fancy_upsampling;	/* TRUE=apply fancy upsampling */
+  boolean do_block_smoothing;	/* TRUE=apply interblock smoothing */
+
+  boolean quantize_colors;	/* TRUE=colormapped output wanted */
+  /* the following are ignored if not quantize_colors: */
+  J_DITHER_MODE dither_mode;	/* type of color dithering to use */
+  boolean two_pass_quantize;	/* TRUE=use two-pass color quantization */
+  int desired_number_of_colors;	/* max # colors to use in created colormap */
+  /* these are significant only in buffered-image mode: */
+  boolean enable_1pass_quant;	/* enable future use of 1-pass quantizer */
+  boolean enable_external_quant;/* enable future use of external colormap */
+  boolean enable_2pass_quant;	/* enable future use of 2-pass quantizer */
+
+  /* Description of actual output image that will be returned to application.
+   * These fields are computed by jpeg_start_decompress().
+   * You can also use jpeg_calc_output_dimensions() to determine these values
+   * in advance of calling jpeg_start_decompress().
+   */
+
+  JDIMENSION output_width;	/* scaled image width */
+  JDIMENSION output_height;	/* scaled image height */
+  int out_color_components;	/* # of color components in out_color_space */
+  int output_components;	/* # of color components returned */
+  /* output_components is 1 (a colormap index) when quantizing colors;
+   * otherwise it equals out_color_components.
+   */
+  int rec_outbuf_height;	/* min recommended height of scanline buffer */
+  /* If the buffer passed to jpeg_read_scanlines() is less than this many rows
+   * high, space and time will be wasted due to unnecessary data copying.
+   * Usually rec_outbuf_height will be 1 or 2, at most 4.
+   */
+
+  /* When quantizing colors, the output colormap is described by these fields.
+   * The application can supply a colormap by setting colormap non-NULL before
+   * calling jpeg_start_decompress; otherwise a colormap is created during
+   * jpeg_start_decompress or jpeg_start_output.
+   * The map has out_color_components rows and actual_number_of_colors columns.
+   */
+  int actual_number_of_colors;	/* number of entries in use */
+  JSAMPARRAY colormap;		/* The color map as a 2-D pixel array */
+
+  /* State variables: these variables indicate the progress of decompression.
+   * The application may examine these but must not modify them.
+   */
+
+  /* Row index of next scanline to be read from jpeg_read_scanlines().
+   * Application may use this to control its processing loop, e.g.,
+   * "while (output_scanline < output_height)".
+   */
+  JDIMENSION output_scanline;	/* 0 .. output_height-1  */
+
+  /* Current input scan number and number of iMCU rows completed in scan.
+   * These indicate the progress of the decompressor input side.
+   */
+  int input_scan_number;	/* Number of SOS markers seen so far */
+  JDIMENSION input_iMCU_row;	/* Number of iMCU rows completed */
+
+  /* The "output scan number" is the notional scan being displayed by the
+   * output side.  The decompressor will not allow output scan/row number
+   * to get ahead of input scan/row, but it can fall arbitrarily far behind.
+   */
+  int output_scan_number;	/* Nominal scan number being displayed */
+  JDIMENSION output_iMCU_row;	/* Number of iMCU rows read */
+
+  /* Current progression status.  coef_bits[c][i] indicates the precision
+   * with which component c's DCT coefficient i (in zigzag order) is known.
+   * It is -1 when no data has yet been received, otherwise it is the point
+   * transform (shift) value for the most recent scan of the coefficient
+   * (thus, 0 at completion of the progression).
+   * This pointer is NULL when reading a non-progressive file.
+   */
+  int (*coef_bits)[DCTSIZE2];	/* -1 or current Al value for each coef */
+
+  /* Internal JPEG parameters --- the application usually need not look at
+   * these fields.  Note that the decompressor output side may not use
+   * any parameters that can change between scans.
+   */
+
+  /* Quantization and Huffman tables are carried forward across input
+   * datastreams when processing abbreviated JPEG datastreams.
+   */
+
+  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
+  /* ptrs to coefficient quantization tables, or NULL if not defined */
+
+  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  /* ptrs to Huffman coding tables, or NULL if not defined */
+
+  /* These parameters are never carried across datastreams, since they
+   * are given in SOF/SOS markers or defined to be reset by SOI.
+   */
+
+  int data_precision;		/* bits of precision in image data */
+
+  jpeg_component_info * comp_info;
+  /* comp_info[i] describes component that appears i'th in SOF */
+
+  boolean is_baseline;		/* TRUE if Baseline SOF0 encountered */
+  boolean progressive_mode;	/* TRUE if SOFn specifies progressive mode */
+  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
+
+  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
+  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
+  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
+
+  unsigned int restart_interval; /* MCUs per restart interval, or 0 for no restart */
+
+  /* These fields record data obtained from optional markers recognized by
+   * the JPEG library.
+   */
+  boolean saw_JFIF_marker;	/* TRUE iff a JFIF APP0 marker was found */
+  /* Data copied from JFIF marker; only valid if saw_JFIF_marker is TRUE: */
+  UINT8 JFIF_major_version;	/* JFIF version number */
+  UINT8 JFIF_minor_version;
+  UINT8 density_unit;		/* JFIF code for pixel size units */
+  UINT16 X_density;		/* Horizontal pixel density */
+  UINT16 Y_density;		/* Vertical pixel density */
+  boolean saw_Adobe_marker;	/* TRUE iff an Adobe APP14 marker was found */
+  UINT8 Adobe_transform;	/* Color transform code from Adobe marker */
+
+  J_COLOR_TRANSFORM color_transform;
+  /* Color transform identifier derived from LSE marker, otherwise zero */
+
+  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
+
+  /* Aside from the specific data retained from APPn markers known to the
+   * library, the uninterpreted contents of any or all APPn and COM markers
+   * can be saved in a list for examination by the application.
+   */
+  jpeg_saved_marker_ptr marker_list; /* Head of list of saved markers */
+
+  /* Remaining fields are known throughout decompressor, but generally
+   * should not be touched by a surrounding application.
+   */
+
+  /*
+   * These fields are computed during decompression startup
+   */
+  int max_h_samp_factor;	/* largest h_samp_factor */
+  int max_v_samp_factor;	/* largest v_samp_factor */
+
+  int min_DCT_h_scaled_size;	/* smallest DCT_h_scaled_size of any component */
+  int min_DCT_v_scaled_size;	/* smallest DCT_v_scaled_size of any component */
+
+  JDIMENSION total_iMCU_rows;	/* # of iMCU rows in image */
+  /* The coefficient controller's input and output progress is measured in
+   * units of "iMCU" (interleaved MCU) rows.  These are the same as MCU rows
+   * in fully interleaved JPEG scans, but are used whether the scan is
+   * interleaved or not.  We define an iMCU row as v_samp_factor DCT block
+   * rows of each component.  Therefore, the IDCT output contains
+   * v_samp_factor * DCT_v_scaled_size sample rows of a component per iMCU row.
+   */
+
+  JSAMPLE * sample_range_limit; /* table for fast range-limiting */
+
+  /*
+   * These fields are valid during any one scan.
+   * They describe the components and MCUs actually appearing in the scan.
+   * Note that the decompressor output side must not use these fields.
+   */
+  int comps_in_scan;		/* # of JPEG components in this scan */
+  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
+  /* *cur_comp_info[i] describes component that appears i'th in SOS */
+
+  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
+
+  int blocks_in_MCU;		/* # of DCT blocks per MCU */
+  int MCU_membership[D_MAX_BLOCKS_IN_MCU];
+  /* MCU_membership[i] is index in cur_comp_info of component owning */
+  /* i'th block in an MCU */
+
+  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
+
+  /* These fields are derived from Se of first SOS marker.
+   */
+  int block_size;		/* the basic DCT block size: 1..16 */
+  const int * natural_order; /* natural-order position array for entropy decode */
+  int lim_Se;			/* min( Se, DCTSIZE2-1 ) for entropy decode */
+
+  /* This field is shared between entropy decoder and marker parser.
+   * It is either zero or the code of a JPEG marker that has been
+   * read from the data source, but has not yet been processed.
+   */
+  int unread_marker;
+
+  /*
+   * Links to decompression subobjects (methods, private variables of modules)
+   */
+  struct jpeg_decomp_master * master;
+  struct jpeg_d_main_controller * main;
+  struct jpeg_d_coef_controller * coef;
+  struct jpeg_d_post_controller * post;
+  struct jpeg_input_controller * inputctl;
+  struct jpeg_marker_reader * marker;
+  struct jpeg_entropy_decoder * entropy;
+  struct jpeg_inverse_dct * idct;
+  struct jpeg_upsampler * upsample;
+  struct jpeg_color_deconverter * cconvert;
+  struct jpeg_color_quantizer * cquantize;
+};
+
+
+/* "Object" declarations for JPEG modules that may be supplied or called
+ * directly by the surrounding application.
+ * As with all objects in the JPEG library, these structs only define the
+ * publicly visible methods and state variables of a module.  Additional
+ * private fields may exist after the public ones.
+ */
+
+
+/* Error handler object */
+
+struct jpeg_error_mgr {
+  /* Error exit handler: does not return to caller */
+  JMETHOD(noreturn_t, error_exit, (j_common_ptr cinfo));
+  /* Conditionally emit a trace or warning message */
+  JMETHOD(void, emit_message, (j_common_ptr cinfo, int msg_level));
+  /* Routine that actually outputs a trace or error message */
+  JMETHOD(void, output_message, (j_common_ptr cinfo));
+  /* Format a message string for the most recent JPEG error or message */
+  JMETHOD(void, format_message, (j_common_ptr cinfo, char * buffer));
+#define JMSG_LENGTH_MAX  200	/* recommended size of format_message buffer */
+  /* Reset error state variables at start of a new image */
+  JMETHOD(void, reset_error_mgr, (j_common_ptr cinfo));
+
+  /* The message ID code and any parameters are saved here.
+   * A message can have one string parameter or up to 8 int parameters.
+   */
+  int msg_code;
+#define JMSG_STR_PARM_MAX  80
+  union {
+    int i[8];
+    char s[JMSG_STR_PARM_MAX];
+  } msg_parm;
+
+  /* Standard state variables for error facility */
+
+  int trace_level;		/* max msg_level that will be displayed */
+
+  /* For recoverable corrupt-data errors, we emit a warning message,
+   * but keep going unless emit_message chooses to abort.  emit_message
+   * should count warnings in num_warnings.  The surrounding application
+   * can check for bad data by seeing if num_warnings is nonzero at the
+   * end of processing.
+   */
+  long num_warnings;		/* number of corrupt-data warnings */
+
+  /* These fields point to the table(s) of error message strings.
+   * An application can change the table pointer to switch to a different
+   * message list (typically, to change the language in which errors are
+   * reported).  Some applications may wish to add additional error codes
+   * that will be handled by the JPEG library error mechanism; the second
+   * table pointer is used for this purpose.
+   *
+   * First table includes all errors generated by JPEG library itself.
+   * Error code 0 is reserved for a "no such error string" message.
+   */
+  const char * const * jpeg_message_table; /* Library errors */
+  int last_jpeg_message;    /* Table contains strings 0..last_jpeg_message */
+  /* Second table can be added by application (see cjpeg/djpeg for example).
+   * It contains strings numbered first_addon_message..last_addon_message.
+   */
+  const char * const * addon_message_table; /* Non-library errors */
+  int first_addon_message;	/* code for first string in addon table */
+  int last_addon_message;	/* code for last string in addon table */
+};
+
+
+/* Progress monitor object */
+
+struct jpeg_progress_mgr {
+  JMETHOD(void, progress_monitor, (j_common_ptr cinfo));
+
+  long pass_counter;		/* work units completed in this pass */
+  long pass_limit;		/* total number of work units in this pass */
+  int completed_passes;		/* passes completed so far */
+  int total_passes;		/* total number of passes expected */
+};
+
+
+/* Data destination object for compression */
+
+struct jpeg_destination_mgr {
+  JOCTET * next_output_byte;	/* => next byte to write in buffer */
+  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+
+  JMETHOD(void, init_destination, (j_compress_ptr cinfo));
+  JMETHOD(boolean, empty_output_buffer, (j_compress_ptr cinfo));
+  JMETHOD(void, term_destination, (j_compress_ptr cinfo));
+};
+
+
+/* Data source object for decompression */
+
+struct jpeg_source_mgr {
+  const JOCTET * next_input_byte; /* => next byte to read from buffer */
+  size_t bytes_in_buffer;	/* # of bytes remaining in buffer */
+
+  JMETHOD(void, init_source, (j_decompress_ptr cinfo));
+  JMETHOD(boolean, fill_input_buffer, (j_decompress_ptr cinfo));
+  JMETHOD(void, skip_input_data, (j_decompress_ptr cinfo, long num_bytes));
+  JMETHOD(boolean, resync_to_restart, (j_decompress_ptr cinfo, int desired));
+  JMETHOD(void, term_source, (j_decompress_ptr cinfo));
+};
+
+
+/* Memory manager object.
+ * Allocates "small" objects (a few K total), "large" objects (tens of K),
+ * and "really big" objects (virtual arrays with backing store if needed).
+ * The memory manager does not allow individual objects to be freed; rather,
+ * each created object is assigned to a pool, and whole pools can be freed
+ * at once.  This is faster and more convenient than remembering exactly what
+ * to free, especially where malloc()/free() are not too speedy.
+ * NB: alloc routines never return NULL.  They exit to error_exit if not
+ * successful.
+ */
+
+#define JPOOL_PERMANENT	0	/* lasts until master record is destroyed */
+#define JPOOL_IMAGE	1	/* lasts until done with image/datastream */
+#define JPOOL_NUMPOOLS	2
+
+typedef struct jvirt_sarray_control * jvirt_sarray_ptr;
+typedef struct jvirt_barray_control * jvirt_barray_ptr;
+
+
+struct jpeg_memory_mgr {
+  /* Method pointers */
+  JMETHOD(void *, alloc_small, (j_common_ptr cinfo, int pool_id,
+				size_t sizeofobject));
+  JMETHOD(void FAR *, alloc_large, (j_common_ptr cinfo, int pool_id,
+				     size_t sizeofobject));
+  JMETHOD(JSAMPARRAY, alloc_sarray, (j_common_ptr cinfo, int pool_id,
+				     JDIMENSION samplesperrow,
+				     JDIMENSION numrows));
+  JMETHOD(JBLOCKARRAY, alloc_barray, (j_common_ptr cinfo, int pool_id,
+				      JDIMENSION blocksperrow,
+				      JDIMENSION numrows));
+  JMETHOD(jvirt_sarray_ptr, request_virt_sarray, (j_common_ptr cinfo,
+						  int pool_id,
+						  boolean pre_zero,
+						  JDIMENSION samplesperrow,
+						  JDIMENSION numrows,
+						  JDIMENSION maxaccess));
+  JMETHOD(jvirt_barray_ptr, request_virt_barray, (j_common_ptr cinfo,
+						  int pool_id,
+						  boolean pre_zero,
+						  JDIMENSION blocksperrow,
+						  JDIMENSION numrows,
+						  JDIMENSION maxaccess));
+  JMETHOD(void, realize_virt_arrays, (j_common_ptr cinfo));
+  JMETHOD(JSAMPARRAY, access_virt_sarray, (j_common_ptr cinfo,
+					   jvirt_sarray_ptr ptr,
+					   JDIMENSION start_row,
+					   JDIMENSION num_rows,
+					   boolean writable));
+  JMETHOD(JBLOCKARRAY, access_virt_barray, (j_common_ptr cinfo,
+					    jvirt_barray_ptr ptr,
+					    JDIMENSION start_row,
+					    JDIMENSION num_rows,
+					    boolean writable));
+  JMETHOD(void, free_pool, (j_common_ptr cinfo, int pool_id));
+  JMETHOD(void, self_destruct, (j_common_ptr cinfo));
+
+  /* Limit on memory allocation for this JPEG object.  (Note that this is
+   * merely advisory, not a guaranteed maximum; it only affects the space
+   * used for virtual-array buffers.)  May be changed by outer application
+   * after creating the JPEG object.
+   */
+  long max_memory_to_use;
+
+  /* Maximum allocation request accepted by alloc_large. */
+  long max_alloc_chunk;
+};
+
+
+/* Routine signature for application-supplied marker processing methods.
+ * Need not pass marker code since it is stored in cinfo->unread_marker.
+ */
+typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo));
+
+
+/* Declarations for routines called by application.
+ * The JPP macro hides prototype parameters from compilers that can't cope.
+ * Note JPP requires double parentheses.
+ */
+
+#ifdef HAVE_PROTOTYPES
+#define JPP(arglist)	arglist
+#else
+#define JPP(arglist)	()
+#endif
+
+
+/* Short forms of external names for systems with brain-damaged linkers.
+ * We shorten external names to be unique in the first six letters, which
+ * is good enough for all known systems.
+ * (If your compiler itself needs names to be unique in less than 15 
+ * characters, you are out of luck.  Get a better compiler.)
+ */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_std_error		jStdError
+#define jpeg_CreateCompress	jCreaCompress
+#define jpeg_CreateDecompress	jCreaDecompress
+#define jpeg_destroy_compress	jDestCompress
+#define jpeg_destroy_decompress	jDestDecompress
+#define jpeg_stdio_dest		jStdDest
+#define jpeg_stdio_src		jStdSrc
+#define jpeg_mem_dest		jMemDest
+#define jpeg_mem_src		jMemSrc
+#define jpeg_set_defaults	jSetDefaults
+#define jpeg_set_colorspace	jSetColorspace
+#define jpeg_default_colorspace	jDefColorspace
+#define jpeg_set_quality	jSetQuality
+#define jpeg_set_linear_quality	jSetLQuality
+#define jpeg_default_qtables	jDefQTables
+#define jpeg_add_quant_table	jAddQuantTable
+#define jpeg_quality_scaling	jQualityScaling
+#define jpeg_simple_progression	jSimProgress
+#define jpeg_suppress_tables	jSuppressTables
+#define jpeg_alloc_quant_table	jAlcQTable
+#define jpeg_alloc_huff_table	jAlcHTable
+#define jpeg_std_huff_table	jStdHTable
+#define jpeg_start_compress	jStrtCompress
+#define jpeg_write_scanlines	jWrtScanlines
+#define jpeg_finish_compress	jFinCompress
+#define jpeg_calc_jpeg_dimensions	jCjpegDimensions
+#define jpeg_write_raw_data	jWrtRawData
+#define jpeg_write_marker	jWrtMarker
+#define jpeg_write_m_header	jWrtMHeader
+#define jpeg_write_m_byte	jWrtMByte
+#define jpeg_write_tables	jWrtTables
+#define jpeg_read_header	jReadHeader
+#define jpeg_start_decompress	jStrtDecompress
+#define jpeg_read_scanlines	jReadScanlines
+#define jpeg_finish_decompress	jFinDecompress
+#define jpeg_read_raw_data	jReadRawData
+#define jpeg_has_multiple_scans	jHasMultScn
+#define jpeg_start_output	jStrtOutput
+#define jpeg_finish_output	jFinOutput
+#define jpeg_input_complete	jInComplete
+#define jpeg_new_colormap	jNewCMap
+#define jpeg_consume_input	jConsumeInput
+#define jpeg_core_output_dimensions	jCoreDimensions
+#define jpeg_calc_output_dimensions	jCalcDimensions
+#define jpeg_save_markers	jSaveMarkers
+#define jpeg_set_marker_processor	jSetMarker
+#define jpeg_read_coefficients	jReadCoefs
+#define jpeg_write_coefficients	jWrtCoefs
+#define jpeg_copy_critical_parameters	jCopyCrit
+#define jpeg_abort_compress	jAbrtCompress
+#define jpeg_abort_decompress	jAbrtDecompress
+#define jpeg_abort		jAbort
+#define jpeg_destroy		jDestroy
+#define jpeg_resync_to_restart	jResyncRestart
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/* Default error-management setup */
+EXTERN(struct jpeg_error_mgr *) jpeg_std_error
+	JPP((struct jpeg_error_mgr * err));
+
+/* Initialization of JPEG compression objects.
+ * jpeg_create_compress() and jpeg_create_decompress() are the exported
+ * names that applications should call.  These expand to calls on
+ * jpeg_CreateCompress and jpeg_CreateDecompress with additional information
+ * passed for version mismatch checking.
+ * NB: you must set up the error-manager BEFORE calling jpeg_create_xxx.
+ */
+#define jpeg_create_compress(cinfo) \
+    jpeg_CreateCompress((cinfo), JPEG_LIB_VERSION, \
+			(size_t) sizeof(struct jpeg_compress_struct))
+#define jpeg_create_decompress(cinfo) \
+    jpeg_CreateDecompress((cinfo), JPEG_LIB_VERSION, \
+			  (size_t) sizeof(struct jpeg_decompress_struct))
+EXTERN(void) jpeg_CreateCompress JPP((j_compress_ptr cinfo,
+				      int version, size_t structsize));
+EXTERN(void) jpeg_CreateDecompress JPP((j_decompress_ptr cinfo,
+					int version, size_t structsize));
+/* Destruction of JPEG compression objects */
+EXTERN(void) jpeg_destroy_compress JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_destroy_decompress JPP((j_decompress_ptr cinfo));
+
+/* Standard data source and destination managers: stdio streams. */
+/* Caller is responsible for opening the file before and closing after. */
+EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile));
+EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile));
+
+/* Data source and destination managers: memory buffers. */
+EXTERN(void) jpeg_mem_dest JPP((j_compress_ptr cinfo,
+			       unsigned char ** outbuffer,
+			       size_t * outsize));
+EXTERN(void) jpeg_mem_src JPP((j_decompress_ptr cinfo,
+			      const unsigned char * inbuffer,
+			      size_t insize));
+
+/* Default parameter setup for compression */
+EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo));
+/* Compression parameter setup aids */
+EXTERN(void) jpeg_set_colorspace JPP((j_compress_ptr cinfo,
+				      J_COLOR_SPACE colorspace));
+EXTERN(void) jpeg_default_colorspace JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_set_quality JPP((j_compress_ptr cinfo, int quality,
+				   boolean force_baseline));
+EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo,
+					  int scale_factor,
+					  boolean force_baseline));
+EXTERN(void) jpeg_default_qtables JPP((j_compress_ptr cinfo,
+				       boolean force_baseline));
+EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl,
+				       const unsigned int *basic_table,
+				       int scale_factor,
+				       boolean force_baseline));
+EXTERN(int) jpeg_quality_scaling JPP((int quality));
+EXTERN(void) jpeg_simple_progression JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_suppress_tables JPP((j_compress_ptr cinfo,
+				       boolean suppress));
+EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table JPP((j_common_ptr cinfo));
+EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table JPP((j_common_ptr cinfo));
+EXTERN(JHUFF_TBL *) jpeg_std_huff_table JPP((j_common_ptr cinfo,
+					     boolean isDC, int tblno));
+
+/* Main entry points for compression */
+EXTERN(void) jpeg_start_compress JPP((j_compress_ptr cinfo,
+				      boolean write_all_tables));
+EXTERN(JDIMENSION) jpeg_write_scanlines JPP((j_compress_ptr cinfo,
+					     JSAMPARRAY scanlines,
+					     JDIMENSION num_lines));
+EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo));
+
+/* Precalculate JPEG dimensions for current compression parameters. */
+EXTERN(void) jpeg_calc_jpeg_dimensions JPP((j_compress_ptr cinfo));
+
+/* Replaces jpeg_write_scanlines when writing raw downsampled data. */
+EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo,
+					    JSAMPIMAGE data,
+					    JDIMENSION num_lines));
+
+/* Write a special marker.  See libjpeg.txt concerning safe usage. */
+EXTERN(void) jpeg_write_marker
+	JPP((j_compress_ptr cinfo, int marker,
+	     const JOCTET * dataptr, unsigned int datalen));
+/* Same, but piecemeal. */
+EXTERN(void) jpeg_write_m_header
+	JPP((j_compress_ptr cinfo, int marker, unsigned int datalen));
+EXTERN(void) jpeg_write_m_byte
+	JPP((j_compress_ptr cinfo, int val));
+
+/* Alternate compression function: just write an abbreviated table file */
+EXTERN(void) jpeg_write_tables JPP((j_compress_ptr cinfo));
+
+/* Decompression startup: read start of JPEG datastream to see what's there */
+EXTERN(int) jpeg_read_header JPP((j_decompress_ptr cinfo,
+				  boolean require_image));
+/* Return value is one of: */
+#define JPEG_SUSPENDED		0 /* Suspended due to lack of input data */
+#define JPEG_HEADER_OK		1 /* Found valid image datastream */
+#define JPEG_HEADER_TABLES_ONLY	2 /* Found valid table-specs-only datastream */
+/* If you pass require_image = TRUE (normal case), you need not check for
+ * a TABLES_ONLY return code; an abbreviated file will cause an error exit.
+ * JPEG_SUSPENDED is only possible if you use a data source module that can
+ * give a suspension return (the stdio source module doesn't).
+ */
+
+/* Main entry points for decompression */
+EXTERN(boolean) jpeg_start_decompress JPP((j_decompress_ptr cinfo));
+EXTERN(JDIMENSION) jpeg_read_scanlines JPP((j_decompress_ptr cinfo,
+					    JSAMPARRAY scanlines,
+					    JDIMENSION max_lines));
+EXTERN(boolean) jpeg_finish_decompress JPP((j_decompress_ptr cinfo));
+
+/* Replaces jpeg_read_scanlines when reading raw downsampled data. */
+EXTERN(JDIMENSION) jpeg_read_raw_data JPP((j_decompress_ptr cinfo,
+					   JSAMPIMAGE data,
+					   JDIMENSION max_lines));
+
+/* Additional entry points for buffered-image mode. */
+EXTERN(boolean) jpeg_has_multiple_scans JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_start_output JPP((j_decompress_ptr cinfo,
+				       int scan_number));
+EXTERN(boolean) jpeg_finish_output JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_input_complete JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_new_colormap JPP((j_decompress_ptr cinfo));
+EXTERN(int) jpeg_consume_input JPP((j_decompress_ptr cinfo));
+/* Return value is one of: */
+/* #define JPEG_SUSPENDED	0    Suspended due to lack of input data */
+#define JPEG_REACHED_SOS	1 /* Reached start of new scan */
+#define JPEG_REACHED_EOI	2 /* Reached end of image */
+#define JPEG_ROW_COMPLETED	3 /* Completed one iMCU row */
+#define JPEG_SCAN_COMPLETED	4 /* Completed last iMCU row of a scan */
+
+/* Precalculate output dimensions for current decompression parameters. */
+EXTERN(void) jpeg_core_output_dimensions JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_calc_output_dimensions JPP((j_decompress_ptr cinfo));
+
+/* Control saving of COM and APPn markers into marker_list. */
+EXTERN(void) jpeg_save_markers
+	JPP((j_decompress_ptr cinfo, int marker_code,
+	     unsigned int length_limit));
+
+/* Install a special processing method for COM or APPn markers. */
+EXTERN(void) jpeg_set_marker_processor
+	JPP((j_decompress_ptr cinfo, int marker_code,
+	     jpeg_marker_parser_method routine));
+
+/* Read or write raw DCT coefficients --- useful for lossless transcoding. */
+EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_write_coefficients JPP((j_compress_ptr cinfo,
+					  jvirt_barray_ptr * coef_arrays));
+EXTERN(void) jpeg_copy_critical_parameters JPP((j_decompress_ptr srcinfo,
+						j_compress_ptr dstinfo));
+
+/* If you choose to abort compression or decompression before completing
+ * jpeg_finish_(de)compress, then you need to clean up to release memory,
+ * temporary files, etc.  You can just call jpeg_destroy_(de)compress
+ * if you're done with the JPEG object, but if you want to clean it up and
+ * reuse it, call this:
+ */
+EXTERN(void) jpeg_abort_compress JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_abort_decompress JPP((j_decompress_ptr cinfo));
+
+/* Generic versions of jpeg_abort and jpeg_destroy that work on either
+ * flavor of JPEG object.  These may be more convenient in some places.
+ */
+EXTERN(void) jpeg_abort JPP((j_common_ptr cinfo));
+EXTERN(void) jpeg_destroy JPP((j_common_ptr cinfo));
+
+/* Default restart-marker-resync procedure for use by data source modules */
+EXTERN(boolean) jpeg_resync_to_restart JPP((j_decompress_ptr cinfo,
+					    int desired));
+
+
+/* These marker codes are exported since applications and data source modules
+ * are likely to want to use them.
+ */
+
+#define JPEG_RST0	0xD0	/* RST0 marker code */
+#define JPEG_EOI	0xD9	/* EOI marker code */
+#define JPEG_APP0	0xE0	/* APP0 marker code */
+#define JPEG_COM	0xFE	/* COM marker code */
+
+
+/* If we have a brain-damaged compiler that emits warnings (or worse, errors)
+ * for structure definitions that are never filled in, keep it quiet by
+ * supplying dummy definitions for the various substructures.
+ */
+
+#ifdef INCOMPLETE_TYPES_BROKEN
+#ifndef JPEG_INTERNALS		/* will be defined in jpegint.h */
+struct jvirt_sarray_control { long dummy; };
+struct jvirt_barray_control { long dummy; };
+struct jpeg_comp_master { long dummy; };
+struct jpeg_c_main_controller { long dummy; };
+struct jpeg_c_prep_controller { long dummy; };
+struct jpeg_c_coef_controller { long dummy; };
+struct jpeg_marker_writer { long dummy; };
+struct jpeg_color_converter { long dummy; };
+struct jpeg_downsampler { long dummy; };
+struct jpeg_forward_dct { long dummy; };
+struct jpeg_entropy_encoder { long dummy; };
+struct jpeg_decomp_master { long dummy; };
+struct jpeg_d_main_controller { long dummy; };
+struct jpeg_d_coef_controller { long dummy; };
+struct jpeg_d_post_controller { long dummy; };
+struct jpeg_input_controller { long dummy; };
+struct jpeg_marker_reader { long dummy; };
+struct jpeg_entropy_decoder { long dummy; };
+struct jpeg_inverse_dct { long dummy; };
+struct jpeg_upsampler { long dummy; };
+struct jpeg_color_deconverter { long dummy; };
+struct jpeg_color_quantizer { long dummy; };
+#endif /* JPEG_INTERNALS */
+#endif /* INCOMPLETE_TYPES_BROKEN */
+
+
+/*
+ * The JPEG library modules define JPEG_INTERNALS before including this file.
+ * The internal structure declarations are read only when that is true.
+ * Applications using the library should not include jpegint.h, but may wish
+ * to include jerror.h.
+ */
+
+#ifdef JPEG_INTERNALS
+#include "jpegint.h"		/* fetch private declarations */
+#include "jerror.h"		/* fetch error codes too */
+#endif
+
+#ifdef __cplusplus
+#ifndef DONT_USE_EXTERN_C
+}
+#endif
+#endif
+
+#endif /* JPEGLIB_H */
diff --git a/cmake/externals/libjpeg/jquant1.c b/cmake/externals/libjpeg/jquant1.c
new file mode 100644
index 000000000..60b1843e7
--- /dev/null
+++ b/cmake/externals/libjpeg/jquant1.c
@@ -0,0 +1,851 @@
+/*
+ * jquant1.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modified 2011-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains 1-pass color quantization (color mapping) routines.
+ * These routines provide mapping to a fixed color map using equally spaced
+ * color values.  Optional Floyd-Steinberg or ordered dithering is available.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef QUANT_1PASS_SUPPORTED
+
+
+/*
+ * The main purpose of 1-pass quantization is to provide a fast, if not very
+ * high quality, colormapped output capability.  A 2-pass quantizer usually
+ * gives better visual quality; however, for quantized grayscale output this
+ * quantizer is perfectly adequate.  Dithering is highly recommended with this
+ * quantizer, though you can turn it off if you really want to.
+ *
+ * In 1-pass quantization the colormap must be chosen in advance of seeing the
+ * image.  We use a map consisting of all combinations of Ncolors[i] color
+ * values for the i'th component.  The Ncolors[] values are chosen so that
+ * their product, the total number of colors, is no more than that requested.
+ * (In most cases, the product will be somewhat less.)
+ *
+ * Since the colormap is orthogonal, the representative value for each color
+ * component can be determined without considering the other components;
+ * then these indexes can be combined into a colormap index by a standard
+ * N-dimensional-array-subscript calculation.  Most of the arithmetic involved
+ * can be precalculated and stored in the lookup table colorindex[].
+ * colorindex[i][j] maps pixel value j in component i to the nearest
+ * representative value (grid plane) for that component; this index is
+ * multiplied by the array stride for component i, so that the
+ * index of the colormap entry closest to a given pixel value is just
+ *    sum( colorindex[component-number][pixel-component-value] )
+ * Aside from being fast, this scheme allows for variable spacing between
+ * representative values with no additional lookup cost.
+ *
+ * If gamma correction has been applied in color conversion, it might be wise
+ * to adjust the color grid spacing so that the representative colors are
+ * equidistant in linear space.  At this writing, gamma correction is not
+ * implemented by jdcolor, so nothing is done here.
+ */
+
+
+/* Declarations for ordered dithering.
+ *
+ * We use a standard 16x16 ordered dither array.  The basic concept of ordered
+ * dithering is described in many references, for instance Dale Schumacher's
+ * chapter II.2 of Graphics Gems II (James Arvo, ed. Academic Press, 1991).
+ * In place of Schumacher's comparisons against a "threshold" value, we add a
+ * "dither" value to the input pixel and then round the result to the nearest
+ * output value.  The dither value is equivalent to (0.5 - threshold) times
+ * the distance between output values.  For ordered dithering, we assume that
+ * the output colors are equally spaced; if not, results will probably be
+ * worse, since the dither may be too much or too little at a given point.
+ *
+ * The normal calculation would be to form pixel value + dither, range-limit
+ * this to 0..MAXJSAMPLE, and then index into the colorindex table as usual.
+ * We can skip the separate range-limiting step by extending the colorindex
+ * table in both directions.
+ */
+
+#define ODITHER_SIZE  16	/* dimension of dither matrix */
+/* NB: if ODITHER_SIZE is not a power of 2, ODITHER_MASK uses will break */
+#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE)	/* # cells in matrix */
+#define ODITHER_MASK  (ODITHER_SIZE-1) /* mask for wrapping around counters */
+
+typedef int ODITHER_MATRIX[ODITHER_SIZE][ODITHER_SIZE];
+typedef int (*ODITHER_MATRIX_PTR)[ODITHER_SIZE];
+
+static const UINT8 base_dither_matrix[ODITHER_SIZE][ODITHER_SIZE] = {
+  /* Bayer's order-4 dither array.  Generated by the code given in
+   * Stephen Hawley's article "Ordered Dithering" in Graphics Gems I.
+   * The values in this array must range from 0 to ODITHER_CELLS-1.
+   */
+  {   0,192, 48,240, 12,204, 60,252,  3,195, 51,243, 15,207, 63,255 },
+  { 128, 64,176,112,140, 76,188,124,131, 67,179,115,143, 79,191,127 },
+  {  32,224, 16,208, 44,236, 28,220, 35,227, 19,211, 47,239, 31,223 },
+  { 160, 96,144, 80,172,108,156, 92,163, 99,147, 83,175,111,159, 95 },
+  {   8,200, 56,248,  4,196, 52,244, 11,203, 59,251,  7,199, 55,247 },
+  { 136, 72,184,120,132, 68,180,116,139, 75,187,123,135, 71,183,119 },
+  {  40,232, 24,216, 36,228, 20,212, 43,235, 27,219, 39,231, 23,215 },
+  { 168,104,152, 88,164,100,148, 84,171,107,155, 91,167,103,151, 87 },
+  {   2,194, 50,242, 14,206, 62,254,  1,193, 49,241, 13,205, 61,253 },
+  { 130, 66,178,114,142, 78,190,126,129, 65,177,113,141, 77,189,125 },
+  {  34,226, 18,210, 46,238, 30,222, 33,225, 17,209, 45,237, 29,221 },
+  { 162, 98,146, 82,174,110,158, 94,161, 97,145, 81,173,109,157, 93 },
+  {  10,202, 58,250,  6,198, 54,246,  9,201, 57,249,  5,197, 53,245 },
+  { 138, 74,186,122,134, 70,182,118,137, 73,185,121,133, 69,181,117 },
+  {  42,234, 26,218, 38,230, 22,214, 41,233, 25,217, 37,229, 21,213 },
+  { 170,106,154, 90,166,102,150, 86,169,105,153, 89,165,101,149, 85 }
+};
+
+
+/* Declarations for Floyd-Steinberg dithering.
+ *
+ * Errors are accumulated into the array fserrors[], at a resolution of
+ * 1/16th of a pixel count.  The error at a given pixel is propagated
+ * to its not-yet-processed neighbors using the standard F-S fractions,
+ *		...	(here)	7/16
+ *		3/16	5/16	1/16
+ * We work left-to-right on even rows, right-to-left on odd rows.
+ *
+ * We can get away with a single array (holding one row's worth of errors)
+ * by using it to store the current row's errors at pixel columns not yet
+ * processed, but the next row's errors at columns already processed.  We
+ * need only a few extra variables to hold the errors immediately around the
+ * current column.  (If we are lucky, those variables are in registers, but
+ * even if not, they're probably cheaper to access than array elements are.)
+ *
+ * The fserrors[] array is indexed [component#][position].
+ * We provide (#columns + 2) entries per component; the extra entry at each
+ * end saves us from special-casing the first and last pixels.
+ *
+ * Note: on a wide image, we might not have enough room in a PC's near data
+ * segment to hold the error array; so it is allocated with alloc_large.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef INT16 FSERROR;		/* 16 bits should be enough */
+typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+#else
+typedef INT32 FSERROR;		/* may need more than 16 bits */
+typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
+#endif
+
+typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
+
+
+/* Private subobject */
+
+#define MAX_Q_COMPS 4		/* max components I can handle */
+
+typedef struct {
+  struct jpeg_color_quantizer pub; /* public fields */
+
+  /* Initially allocated colormap is saved here */
+  JSAMPARRAY sv_colormap;	/* The color map as a 2-D pixel array */
+  int sv_actual;		/* number of entries in use */
+
+  JSAMPARRAY colorindex;	/* Precomputed mapping for speed */
+  /* colorindex[i][j] = index of color closest to pixel value j in component i,
+   * premultiplied as described above.  Since colormap indexes must fit into
+   * JSAMPLEs, the entries of this array will too.
+   */
+  boolean is_padded;		/* is the colorindex padded for odither? */
+
+  int Ncolors[MAX_Q_COMPS];	/* # of values alloced to each component */
+
+  /* Variables for ordered dithering */
+  int row_index;		/* cur row's vertical index in dither matrix */
+  ODITHER_MATRIX_PTR odither[MAX_Q_COMPS]; /* one dither array per component */
+
+  /* Variables for Floyd-Steinberg dithering */
+  FSERRPTR fserrors[MAX_Q_COMPS]; /* accumulated errors */
+  boolean on_odd_row;		/* flag to remember which row we are on */
+} my_cquantizer;
+
+typedef my_cquantizer * my_cquantize_ptr;
+
+
+/*
+ * Policy-making subroutines for create_colormap and create_colorindex.
+ * These routines determine the colormap to be used.  The rest of the module
+ * only assumes that the colormap is orthogonal.
+ *
+ *  * select_ncolors decides how to divvy up the available colors
+ *    among the components.
+ *  * output_value defines the set of representative values for a component.
+ *  * largest_input_value defines the mapping from input values to
+ *    representative values for a component.
+ * Note that the latter two routines may impose different policies for
+ * different components, though this is not currently done.
+ */
+
+
+LOCAL(int)
+select_ncolors (j_decompress_ptr cinfo, int Ncolors[])
+/* Determine allocation of desired colors to components, */
+/* and fill in Ncolors[] array to indicate choice. */
+/* Return value is total number of colors (product of Ncolors[] values). */
+{
+  int nc = cinfo->out_color_components; /* number of color components */
+  int max_colors = cinfo->desired_number_of_colors;
+  int total_colors, iroot, i, j;
+  boolean changed;
+  long temp;
+  static const int RGB_order[3] = { RGB_GREEN, RGB_RED, RGB_BLUE };
+
+  /* We can allocate at least the nc'th root of max_colors per component. */
+  /* Compute floor(nc'th root of max_colors). */
+  iroot = 1;
+  do {
+    iroot++;
+    temp = iroot;		/* set temp = iroot ** nc */
+    for (i = 1; i < nc; i++)
+      temp *= iroot;
+  } while (temp <= (long) max_colors); /* repeat till iroot exceeds root */
+  iroot--;			/* now iroot = floor(root) */
+
+  /* Must have at least 2 color values per component */
+  if (iroot < 2)
+    ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, (int) temp);
+
+  /* Initialize to iroot color values for each component */
+  total_colors = 1;
+  for (i = 0; i < nc; i++) {
+    Ncolors[i] = iroot;
+    total_colors *= iroot;
+  }
+  /* We may be able to increment the count for one or more components without
+   * exceeding max_colors, though we know not all can be incremented.
+   * Sometimes, the first component can be incremented more than once!
+   * (Example: for 16 colors, we start at 2*2*2, go to 3*2*2, then 4*2*2.)
+   * In RGB colorspace, try to increment G first, then R, then B.
+   */
+  do {
+    changed = FALSE;
+    for (i = 0; i < nc; i++) {
+      j = (cinfo->out_color_space == JCS_RGB ? RGB_order[i] : i);
+      /* calculate new total_colors if Ncolors[j] is incremented */
+      temp = total_colors / Ncolors[j];
+      temp *= Ncolors[j]+1;	/* done in long arith to avoid oflo */
+      if (temp > (long) max_colors)
+	break;			/* won't fit, done with this pass */
+      Ncolors[j]++;		/* OK, apply the increment */
+      total_colors = (int) temp;
+      changed = TRUE;
+    }
+  } while (changed);
+
+  return total_colors;
+}
+
+
+LOCAL(int)
+output_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
+/* Return j'th output value, where j will range from 0 to maxj */
+/* The output values must fall in 0..MAXJSAMPLE in increasing order */
+{
+  /* We always provide values 0 and MAXJSAMPLE for each component;
+   * any additional values are equally spaced between these limits.
+   * (Forcing the upper and lower values to the limits ensures that
+   * dithering can't produce a color outside the selected gamut.)
+   */
+  return (int) (((INT32) j * MAXJSAMPLE + maxj/2) / maxj);
+}
+
+
+LOCAL(int)
+largest_input_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
+/* Return largest input value that should map to j'th output value */
+/* Must have largest(j=0) >= 0, and largest(j=maxj) >= MAXJSAMPLE */
+{
+  /* Breakpoints are halfway between values returned by output_value */
+  return (int) (((INT32) (2*j + 1) * MAXJSAMPLE + maxj) / (2*maxj));
+}
+
+
+/*
+ * Create the colormap.
+ */
+
+LOCAL(void)
+create_colormap (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPARRAY colormap;		/* Created colormap */
+  int total_colors;		/* Number of distinct output colors */
+  int i,j,k, nci, blksize, blkdist, ptr, val;
+
+  /* Select number of colors for each component */
+  total_colors = select_ncolors(cinfo, cquantize->Ncolors);
+
+  /* Report selected color counts */
+  if (cinfo->out_color_components == 3)
+    TRACEMS4(cinfo, 1, JTRC_QUANT_3_NCOLORS,
+	     total_colors, cquantize->Ncolors[0],
+	     cquantize->Ncolors[1], cquantize->Ncolors[2]);
+  else
+    TRACEMS1(cinfo, 1, JTRC_QUANT_NCOLORS, total_colors);
+
+  /* Allocate and fill in the colormap. */
+  /* The colors are ordered in the map in standard row-major order, */
+  /* i.e. rightmost (highest-indexed) color changes most rapidly. */
+
+  colormap = (*cinfo->mem->alloc_sarray) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+     (JDIMENSION) total_colors, (JDIMENSION) cinfo->out_color_components);
+
+  /* blksize is number of adjacent repeated entries for a component */
+  /* blkdist is distance between groups of identical entries for a component */
+  blkdist = total_colors;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    /* fill in colormap entries for i'th color component */
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    blksize = blkdist / nci;
+    for (j = 0; j < nci; j++) {
+      /* Compute j'th output value (out of nci) for component */
+      val = output_value(cinfo, i, j, nci-1);
+      /* Fill in all colormap entries that have this value of this component */
+      for (ptr = j * blksize; ptr < total_colors; ptr += blkdist) {
+	/* fill in blksize entries beginning at ptr */
+	for (k = 0; k < blksize; k++)
+	  colormap[i][ptr+k] = (JSAMPLE) val;
+      }
+    }
+    blkdist = blksize;		/* blksize of this color is blkdist of next */
+  }
+
+  /* Save the colormap in private storage,
+   * where it will survive color quantization mode changes.
+   */
+  cquantize->sv_colormap = colormap;
+  cquantize->sv_actual = total_colors;
+}
+
+
+/*
+ * Create the color index table.
+ */
+
+LOCAL(void)
+create_colorindex (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPROW indexptr;
+  int i,j,k, nci, blksize, val, pad;
+
+  /* For ordered dither, we pad the color index tables by MAXJSAMPLE in
+   * each direction (input index values can be -MAXJSAMPLE .. 2*MAXJSAMPLE).
+   * This is not necessary in the other dithering modes.  However, we
+   * flag whether it was done in case user changes dithering mode.
+   */
+  if (cinfo->dither_mode == JDITHER_ORDERED) {
+    pad = MAXJSAMPLE*2;
+    cquantize->is_padded = TRUE;
+  } else {
+    pad = 0;
+    cquantize->is_padded = FALSE;
+  }
+
+  cquantize->colorindex = (*cinfo->mem->alloc_sarray)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE,
+     (JDIMENSION) (MAXJSAMPLE+1 + pad),
+     (JDIMENSION) cinfo->out_color_components);
+
+  /* blksize is number of adjacent repeated entries for a component */
+  blksize = cquantize->sv_actual;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    /* fill in colorindex entries for i'th color component */
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    blksize = blksize / nci;
+
+    /* adjust colorindex pointers to provide padding at negative indexes. */
+    if (pad)
+      cquantize->colorindex[i] += MAXJSAMPLE;
+
+    /* in loop, val = index of current output value, */
+    /* and k = largest j that maps to current val */
+    indexptr = cquantize->colorindex[i];
+    val = 0;
+    k = largest_input_value(cinfo, i, 0, nci-1);
+    for (j = 0; j <= MAXJSAMPLE; j++) {
+      while (j > k)		/* advance val if past boundary */
+	k = largest_input_value(cinfo, i, ++val, nci-1);
+      /* premultiply so that no multiplication needed in main processing */
+      indexptr[j] = (JSAMPLE) (val * blksize);
+    }
+    /* Pad at both ends if necessary */
+    if (pad)
+      for (j = 1; j <= MAXJSAMPLE; j++) {
+	indexptr[-j] = indexptr[0];
+	indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE];
+      }
+  }
+}
+
+
+/*
+ * Create an ordered-dither array for a component having ncolors
+ * distinct output values.
+ */
+
+LOCAL(ODITHER_MATRIX_PTR)
+make_odither_array (j_decompress_ptr cinfo, int ncolors)
+{
+  ODITHER_MATRIX_PTR odither;
+  int j,k;
+  INT32 num,den;
+
+  odither = (ODITHER_MATRIX_PTR) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(ODITHER_MATRIX));
+  /* The inter-value distance for this color is MAXJSAMPLE/(ncolors-1).
+   * Hence the dither value for the matrix cell with fill order f
+   * (f=0..N-1) should be (N-1-2*f)/(2*N) * MAXJSAMPLE/(ncolors-1).
+   * On 16-bit-int machine, be careful to avoid overflow.
+   */
+  den = 2 * ODITHER_CELLS * ((INT32) (ncolors - 1));
+  for (j = 0; j < ODITHER_SIZE; j++) {
+    for (k = 0; k < ODITHER_SIZE; k++) {
+      num = ((INT32) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k])))
+	    * MAXJSAMPLE;
+      /* Ensure round towards zero despite C's lack of consistency
+       * about rounding negative values in integer division...
+       */
+      odither[j][k] = (int) (num<0 ? -((-num)/den) : num/den);
+    }
+  }
+  return odither;
+}
+
+
+/*
+ * Create the ordered-dither tables.
+ * Components having the same number of representative colors may 
+ * share a dither table.
+ */
+
+LOCAL(void)
+create_odither_tables (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  ODITHER_MATRIX_PTR odither;
+  int i, j, nci;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    odither = NULL;		/* search for matching prior component */
+    for (j = 0; j < i; j++) {
+      if (nci == cquantize->Ncolors[j]) {
+	odither = cquantize->odither[j];
+	break;
+      }
+    }
+    if (odither == NULL)	/* need a new table? */
+      odither = make_odither_array(cinfo, nci);
+    cquantize->odither[i] = odither;
+  }
+}
+
+
+/*
+ * Map some rows of pixels to the output colormapped representation.
+ */
+
+METHODDEF(void)
+color_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		JSAMPARRAY output_buf, int num_rows)
+/* General case, no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPARRAY colorindex = cquantize->colorindex;
+  register int pixcode, ci;
+  register JSAMPROW ptrin, ptrout;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  register int nc = cinfo->out_color_components;
+
+  for (row = 0; row < num_rows; row++) {
+    ptrin = input_buf[row];
+    ptrout = output_buf[row];
+    for (col = width; col > 0; col--) {
+      pixcode = 0;
+      for (ci = 0; ci < nc; ci++) {
+	pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]);
+      }
+      *ptrout++ = (JSAMPLE) pixcode;
+    }
+  }
+}
+
+
+METHODDEF(void)
+color_quantize3 (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		 JSAMPARRAY output_buf, int num_rows)
+/* Fast path for out_color_components==3, no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register int pixcode;
+  register JSAMPROW ptrin, ptrout;
+  JSAMPROW colorindex0 = cquantize->colorindex[0];
+  JSAMPROW colorindex1 = cquantize->colorindex[1];
+  JSAMPROW colorindex2 = cquantize->colorindex[2];
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    ptrin = input_buf[row];
+    ptrout = output_buf[row];
+    for (col = width; col > 0; col--) {
+      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*ptrin++)]);
+      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*ptrin++)]);
+      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*ptrin++)]);
+      *ptrout++ = (JSAMPLE) pixcode;
+    }
+  }
+}
+
+
+METHODDEF(void)
+quantize_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		     JSAMPARRAY output_buf, int num_rows)
+/* General case, with ordered dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex_ci;
+  int * dither;			/* points to active row of dither matrix */
+  int row_index, col_index;	/* current indexes into dither matrix */
+  int nc = cinfo->out_color_components;
+  int ci;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    /* Initialize output values to 0 so can process components separately */
+    FMEMZERO((void FAR *) output_buf[row], (size_t) width * SIZEOF(JSAMPLE));
+    row_index = cquantize->row_index;
+    for (ci = 0; ci < nc; ci++) {
+      input_ptr = input_buf[row] + ci;
+      output_ptr = output_buf[row];
+      colorindex_ci = cquantize->colorindex[ci];
+      dither = cquantize->odither[ci][row_index];
+      col_index = 0;
+
+      for (col = width; col > 0; col--) {
+	/* Form pixel value + dither, range-limit to 0..MAXJSAMPLE,
+	 * select output value, accumulate into output code for this pixel.
+	 * Range-limiting need not be done explicitly, as we have extended
+	 * the colorindex table to produce the right answers for out-of-range
+	 * inputs.  The maximum dither is +- MAXJSAMPLE; this sets the
+	 * required amount of padding.
+	 */
+	*output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]];
+	input_ptr += nc;
+	output_ptr++;
+	col_index = (col_index + 1) & ODITHER_MASK;
+      }
+    }
+    /* Advance row index for next row */
+    row_index = (row_index + 1) & ODITHER_MASK;
+    cquantize->row_index = row_index;
+  }
+}
+
+
+METHODDEF(void)
+quantize3_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		      JSAMPARRAY output_buf, int num_rows)
+/* Fast path for out_color_components==3, with ordered dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register int pixcode;
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex0 = cquantize->colorindex[0];
+  JSAMPROW colorindex1 = cquantize->colorindex[1];
+  JSAMPROW colorindex2 = cquantize->colorindex[2];
+  int * dither0;		/* points to active row of dither matrix */
+  int * dither1;
+  int * dither2;
+  int row_index, col_index;	/* current indexes into dither matrix */
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    row_index = cquantize->row_index;
+    input_ptr = input_buf[row];
+    output_ptr = output_buf[row];
+    dither0 = cquantize->odither[0][row_index];
+    dither1 = cquantize->odither[1][row_index];
+    dither2 = cquantize->odither[2][row_index];
+    col_index = 0;
+
+    for (col = width; col > 0; col--) {
+      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) +
+					dither0[col_index]]);
+      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) +
+					dither1[col_index]]);
+      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) +
+					dither2[col_index]]);
+      *output_ptr++ = (JSAMPLE) pixcode;
+      col_index = (col_index + 1) & ODITHER_MASK;
+    }
+    row_index = (row_index + 1) & ODITHER_MASK;
+    cquantize->row_index = row_index;
+  }
+}
+
+
+METHODDEF(void)
+quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		    JSAMPARRAY output_buf, int num_rows)
+/* General case, with Floyd-Steinberg dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register LOCFSERROR cur;	/* current error or pixel value */
+  LOCFSERROR belowerr;		/* error for pixel below cur */
+  LOCFSERROR bpreverr;		/* error for below/prev col */
+  LOCFSERROR bnexterr;		/* error for below/next col */
+  LOCFSERROR delta;
+  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex_ci;
+  JSAMPROW colormap_ci;
+  int pixcode;
+  int nc = cinfo->out_color_components;
+  int dir;			/* 1 for left-to-right, -1 for right-to-left */
+  int dirnc;			/* dir * nc */
+  int ci;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  JSAMPLE *range_limit = cinfo->sample_range_limit;
+  SHIFT_TEMPS
+
+  for (row = 0; row < num_rows; row++) {
+    /* Initialize output values to 0 so can process components separately */
+    FMEMZERO((void FAR *) output_buf[row], (size_t) width * SIZEOF(JSAMPLE));
+    for (ci = 0; ci < nc; ci++) {
+      input_ptr = input_buf[row] + ci;
+      output_ptr = output_buf[row];
+      if (cquantize->on_odd_row) {
+	/* work right to left in this row */
+	input_ptr += (width-1) * nc; /* so point to rightmost pixel */
+	output_ptr += width-1;
+	dir = -1;
+	dirnc = -nc;
+	errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */
+      } else {
+	/* work left to right in this row */
+	dir = 1;
+	dirnc = nc;
+	errorptr = cquantize->fserrors[ci]; /* => entry before first column */
+      }
+      colorindex_ci = cquantize->colorindex[ci];
+      colormap_ci = cquantize->sv_colormap[ci];
+      /* Preset error values: no error propagated to first pixel from left */
+      cur = 0;
+      /* and no error propagated to row below yet */
+      belowerr = bpreverr = 0;
+
+      for (col = width; col > 0; col--) {
+	/* cur holds the error propagated from the previous pixel on the
+	 * current line.  Add the error propagated from the previous line
+	 * to form the complete error correction term for this pixel, and
+	 * round the error term (which is expressed * 16) to an integer.
+	 * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
+	 * for either sign of the error value.
+	 * Note: errorptr points to *previous* column's array entry.
+	 */
+	cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4);
+	/* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
+	 * The maximum error is +- MAXJSAMPLE; this sets the required size
+	 * of the range_limit array.
+	 */
+	cur += GETJSAMPLE(*input_ptr);
+	cur = GETJSAMPLE(range_limit[cur]);
+	/* Select output value, accumulate into output code for this pixel */
+	pixcode = GETJSAMPLE(colorindex_ci[cur]);
+	*output_ptr += (JSAMPLE) pixcode;
+	/* Compute actual representation error at this pixel */
+	/* Note: we can do this even though we don't have the final */
+	/* pixel code, because the colormap is orthogonal. */
+	cur -= GETJSAMPLE(colormap_ci[pixcode]);
+	/* Compute error fractions to be propagated to adjacent pixels.
+	 * Add these into the running sums, and simultaneously shift the
+	 * next-line error sums left by 1 column.
+	 */
+	bnexterr = cur;
+	delta = cur * 2;
+	cur += delta;		/* form error * 3 */
+	errorptr[0] = (FSERROR) (bpreverr + cur);
+	cur += delta;		/* form error * 5 */
+	bpreverr = belowerr + cur;
+	belowerr = bnexterr;
+	cur += delta;		/* form error * 7 */
+	/* At this point cur contains the 7/16 error value to be propagated
+	 * to the next pixel on the current line, and all the errors for the
+	 * next line have been shifted over. We are therefore ready to move on.
+	 */
+	input_ptr += dirnc;	/* advance input ptr to next column */
+	output_ptr += dir;	/* advance output ptr to next column */
+	errorptr += dir;	/* advance errorptr to current column */
+      }
+      /* Post-loop cleanup: we must unload the final error value into the
+       * final fserrors[] entry.  Note we need not unload belowerr because
+       * it is for the dummy column before or after the actual array.
+       */
+      errorptr[0] = (FSERROR) bpreverr; /* unload prev err into array */
+    }
+    cquantize->on_odd_row = (cquantize->on_odd_row ? FALSE : TRUE);
+  }
+}
+
+
+/*
+ * Allocate workspace for Floyd-Steinberg errors.
+ */
+
+LOCAL(void)
+alloc_fs_workspace (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  size_t arraysize;
+  int i;
+
+  arraysize = ((size_t) cinfo->output_width + (size_t) 2) * SIZEOF(FSERROR);
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    cquantize->fserrors[i] = (FSERRPTR) (*cinfo->mem->alloc_large)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
+  }
+}
+
+
+/*
+ * Initialize for one-pass color quantization.
+ */
+
+METHODDEF(void)
+start_pass_1_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  size_t arraysize;
+  int i;
+
+  /* Install my colormap. */
+  cinfo->colormap = cquantize->sv_colormap;
+  cinfo->actual_number_of_colors = cquantize->sv_actual;
+
+  /* Initialize for desired dithering mode. */
+  switch (cinfo->dither_mode) {
+  case JDITHER_NONE:
+    if (cinfo->out_color_components == 3)
+      cquantize->pub.color_quantize = color_quantize3;
+    else
+      cquantize->pub.color_quantize = color_quantize;
+    break;
+  case JDITHER_ORDERED:
+    if (cinfo->out_color_components == 3)
+      cquantize->pub.color_quantize = quantize3_ord_dither;
+    else
+      cquantize->pub.color_quantize = quantize_ord_dither;
+    cquantize->row_index = 0;	/* initialize state for ordered dither */
+    /* If user changed to ordered dither from another mode,
+     * we must recreate the color index table with padding.
+     * This will cost extra space, but probably isn't very likely.
+     */
+    if (! cquantize->is_padded)
+      create_colorindex(cinfo);
+    /* Create ordered-dither tables if we didn't already. */
+    if (cquantize->odither[0] == NULL)
+      create_odither_tables(cinfo);
+    break;
+  case JDITHER_FS:
+    cquantize->pub.color_quantize = quantize_fs_dither;
+    cquantize->on_odd_row = FALSE; /* initialize state for F-S dither */
+    /* Allocate Floyd-Steinberg workspace if didn't already. */
+    if (cquantize->fserrors[0] == NULL)
+      alloc_fs_workspace(cinfo);
+    /* Initialize the propagated errors to zero. */
+    arraysize = ((size_t) cinfo->output_width + (size_t) 2) * SIZEOF(FSERROR);
+    for (i = 0; i < cinfo->out_color_components; i++)
+      FMEMZERO((void FAR *) cquantize->fserrors[i], arraysize);
+    break;
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+  }
+}
+
+
+/*
+ * Finish up at the end of the pass.
+ */
+
+METHODDEF(void)
+finish_pass_1_quant (j_decompress_ptr cinfo)
+{
+  /* no work in 1-pass case */
+}
+
+
+/*
+ * Switch to a new external colormap between output passes.
+ * Shouldn't get to this module!
+ */
+
+METHODDEF(void)
+new_color_map_1_quant (j_decompress_ptr cinfo)
+{
+  ERREXIT(cinfo, JERR_MODE_CHANGE);
+}
+
+
+/*
+ * Module initialization routine for 1-pass color quantization.
+ */
+
+GLOBAL(void)
+jinit_1pass_quantizer (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize;
+
+  cquantize = (my_cquantize_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_cquantizer));
+  cinfo->cquantize = &cquantize->pub;
+  cquantize->pub.start_pass = start_pass_1_quant;
+  cquantize->pub.finish_pass = finish_pass_1_quant;
+  cquantize->pub.new_color_map = new_color_map_1_quant;
+  cquantize->fserrors[0] = NULL; /* Flag FS workspace not allocated */
+  cquantize->odither[0] = NULL;	/* Also flag odither arrays not allocated */
+
+  /* Make sure my internal arrays won't overflow */
+  if (cinfo->out_color_components > MAX_Q_COMPS)
+    ERREXIT1(cinfo, JERR_QUANT_COMPONENTS, MAX_Q_COMPS);
+  /* Make sure colormap indexes can be represented by JSAMPLEs */
+  if (cinfo->desired_number_of_colors > (MAXJSAMPLE+1))
+    ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXJSAMPLE+1);
+
+  /* Create the colormap and color index table. */
+  create_colormap(cinfo);
+  create_colorindex(cinfo);
+
+  /* Allocate Floyd-Steinberg workspace now if requested.
+   * We do this now since it is FAR storage and may affect the memory
+   * manager's space calculations.  If the user changes to FS dither
+   * mode in a later pass, we will allocate the space then, and will
+   * possibly overrun the max_memory_to_use setting.
+   */
+  if (cinfo->dither_mode == JDITHER_FS)
+    alloc_fs_workspace(cinfo);
+}
+
+#endif /* QUANT_1PASS_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jquant2.c b/cmake/externals/libjpeg/jquant2.c
new file mode 100644
index 000000000..662b9bcef
--- /dev/null
+++ b/cmake/externals/libjpeg/jquant2.c
@@ -0,0 +1,1311 @@
+/*
+ * jquant2.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modified 2011-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains 2-pass color quantization (color mapping) routines.
+ * These routines provide selection of a custom color map for an image,
+ * followed by mapping of the image to that color map, with optional
+ * Floyd-Steinberg dithering.
+ * It is also possible to use just the second pass to map to an arbitrary
+ * externally-given color map.
+ *
+ * Note: ordered dithering is not supported, since there isn't any fast
+ * way to compute intercolor distances; it's unclear that ordered dither's
+ * fundamental assumptions even hold with an irregularly spaced color map.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+
+/*
+ * This module implements the well-known Heckbert paradigm for color
+ * quantization.  Most of the ideas used here can be traced back to
+ * Heckbert's seminal paper
+ *   Heckbert, Paul.  "Color Image Quantization for Frame Buffer Display",
+ *   Proc. SIGGRAPH '82, Computer Graphics v.16 #3 (July 1982), pp 297-304.
+ *
+ * In the first pass over the image, we accumulate a histogram showing the
+ * usage count of each possible color.  To keep the histogram to a reasonable
+ * size, we reduce the precision of the input; typical practice is to retain
+ * 5 or 6 bits per color, so that 8 or 4 different input values are counted
+ * in the same histogram cell.
+ *
+ * Next, the color-selection step begins with a box representing the whole
+ * color space, and repeatedly splits the "largest" remaining box until we
+ * have as many boxes as desired colors.  Then the mean color in each
+ * remaining box becomes one of the possible output colors.
+ * 
+ * The second pass over the image maps each input pixel to the closest output
+ * color (optionally after applying a Floyd-Steinberg dithering correction).
+ * This mapping is logically trivial, but making it go fast enough requires
+ * considerable care.
+ *
+ * Heckbert-style quantizers vary a good deal in their policies for choosing
+ * the "largest" box and deciding where to cut it.  The particular policies
+ * used here have proved out well in experimental comparisons, but better ones
+ * may yet be found.
+ *
+ * In earlier versions of the IJG code, this module quantized in YCbCr color
+ * space, processing the raw upsampled data without a color conversion step.
+ * This allowed the color conversion math to be done only once per colormap
+ * entry, not once per pixel.  However, that optimization precluded other
+ * useful optimizations (such as merging color conversion with upsampling)
+ * and it also interfered with desired capabilities such as quantizing to an
+ * externally-supplied colormap.  We have therefore abandoned that approach.
+ * The present code works in the post-conversion color space, typically RGB.
+ *
+ * To improve the visual quality of the results, we actually work in scaled
+ * RGB space, giving G distances more weight than R, and R in turn more than
+ * B.  To do everything in integer math, we must use integer scale factors.
+ * The 2/3/1 scale factors used here correspond loosely to the relative
+ * weights of the colors in the NTSC grayscale equation.
+ * If you want to use this code to quantize a non-RGB color space, you'll
+ * probably need to change these scale factors.
+ */
+
+#define R_SCALE 2		/* scale R distances by this much */
+#define G_SCALE 3		/* scale G distances by this much */
+#define B_SCALE 1		/* and B by this much */
+
+/* Relabel R/G/B as components 0/1/2, respecting the RGB ordering defined
+ * in jmorecfg.h.  As the code stands, it will do the right thing for R,G,B
+ * and B,G,R orders.  If you define some other weird order in jmorecfg.h,
+ * you'll get compile errors until you extend this logic.  In that case
+ * you'll probably want to tweak the histogram sizes too.
+ */
+
+#if RGB_RED == 0
+#define C0_SCALE R_SCALE
+#endif
+#if RGB_BLUE == 0
+#define C0_SCALE B_SCALE
+#endif
+#if RGB_GREEN == 1
+#define C1_SCALE G_SCALE
+#endif
+#if RGB_RED == 2
+#define C2_SCALE R_SCALE
+#endif
+#if RGB_BLUE == 2
+#define C2_SCALE B_SCALE
+#endif
+
+
+/*
+ * First we have the histogram data structure and routines for creating it.
+ *
+ * The number of bits of precision can be adjusted by changing these symbols.
+ * We recommend keeping 6 bits for G and 5 each for R and B.
+ * If you have plenty of memory and cycles, 6 bits all around gives marginally
+ * better results; if you are short of memory, 5 bits all around will save
+ * some space but degrade the results.
+ * To maintain a fully accurate histogram, we'd need to allocate a "long"
+ * (preferably unsigned long) for each cell.  In practice this is overkill;
+ * we can get by with 16 bits per cell.  Few of the cell counts will overflow,
+ * and clamping those that do overflow to the maximum value will give close-
+ * enough results.  This reduces the recommended histogram size from 256Kb
+ * to 128Kb, which is a useful savings on PC-class machines.
+ * (In the second pass the histogram space is re-used for pixel mapping data;
+ * in that capacity, each cell must be able to store zero to the number of
+ * desired colors.  16 bits/cell is plenty for that too.)
+ * Since the JPEG code is intended to run in small memory model on 80x86
+ * machines, we can't just allocate the histogram in one chunk.  Instead
+ * of a true 3-D array, we use a row of pointers to 2-D arrays.  Each
+ * pointer corresponds to a C0 value (typically 2^5 = 32 pointers) and
+ * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries.  Note that
+ * on 80x86 machines, the pointer row is in near memory but the actual
+ * arrays are in far memory (same arrangement as we use for image arrays).
+ */
+
+#define MAXNUMCOLORS  (MAXJSAMPLE+1) /* maximum size of colormap */
+
+/* These will do the right thing for either R,G,B or B,G,R color order,
+ * but you may not like the results for other color orders.
+ */
+#define HIST_C0_BITS  5		/* bits of precision in R/B histogram */
+#define HIST_C1_BITS  6		/* bits of precision in G histogram */
+#define HIST_C2_BITS  5		/* bits of precision in B/R histogram */
+
+/* Number of elements along histogram axes. */
+#define HIST_C0_ELEMS  (1<<HIST_C0_BITS)
+#define HIST_C1_ELEMS  (1<<HIST_C1_BITS)
+#define HIST_C2_ELEMS  (1<<HIST_C2_BITS)
+
+/* These are the amounts to shift an input value to get a histogram index. */
+#define C0_SHIFT  (BITS_IN_JSAMPLE-HIST_C0_BITS)
+#define C1_SHIFT  (BITS_IN_JSAMPLE-HIST_C1_BITS)
+#define C2_SHIFT  (BITS_IN_JSAMPLE-HIST_C2_BITS)
+
+
+typedef UINT16 histcell;	/* histogram cell; prefer an unsigned type */
+
+typedef histcell FAR * histptr;	/* for pointers to histogram cells */
+
+typedef histcell hist1d[HIST_C2_ELEMS]; /* typedefs for the array */
+typedef hist1d FAR * hist2d;	/* type for the 2nd-level pointers */
+typedef hist2d * hist3d;	/* type for top-level pointer */
+
+
+/* Declarations for Floyd-Steinberg dithering.
+ *
+ * Errors are accumulated into the array fserrors[], at a resolution of
+ * 1/16th of a pixel count.  The error at a given pixel is propagated
+ * to its not-yet-processed neighbors using the standard F-S fractions,
+ *		...	(here)	7/16
+ *		3/16	5/16	1/16
+ * We work left-to-right on even rows, right-to-left on odd rows.
+ *
+ * We can get away with a single array (holding one row's worth of errors)
+ * by using it to store the current row's errors at pixel columns not yet
+ * processed, but the next row's errors at columns already processed.  We
+ * need only a few extra variables to hold the errors immediately around the
+ * current column.  (If we are lucky, those variables are in registers, but
+ * even if not, they're probably cheaper to access than array elements are.)
+ *
+ * The fserrors[] array has (#columns + 2) entries; the extra entry at
+ * each end saves us from special-casing the first and last pixels.
+ * Each entry is three values long, one value for each color component.
+ *
+ * Note: on a wide image, we might not have enough room in a PC's near data
+ * segment to hold the error array; so it is allocated with alloc_large.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef INT16 FSERROR;		/* 16 bits should be enough */
+typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+#else
+typedef INT32 FSERROR;		/* may need more than 16 bits */
+typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
+#endif
+
+typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_quantizer pub; /* public fields */
+
+  /* Space for the eventually created colormap is stashed here */
+  JSAMPARRAY sv_colormap;	/* colormap allocated at init time */
+  int desired;			/* desired # of colors = size of colormap */
+
+  /* Variables for accumulating image statistics */
+  hist3d histogram;		/* pointer to the histogram */
+
+  boolean needs_zeroed;		/* TRUE if next pass must zero histogram */
+
+  /* Variables for Floyd-Steinberg dithering */
+  FSERRPTR fserrors;		/* accumulated errors */
+  boolean on_odd_row;		/* flag to remember which row we are on */
+  int * error_limiter;		/* table for clamping the applied error */
+} my_cquantizer;
+
+typedef my_cquantizer * my_cquantize_ptr;
+
+
+/*
+ * Prescan some rows of pixels.
+ * In this module the prescan simply updates the histogram, which has been
+ * initialized to zeroes by start_pass.
+ * An output_buf parameter is required by the method signature, but no data
+ * is actually output (in fact the buffer controller is probably passing a
+ * NULL pointer).
+ */
+
+METHODDEF(void)
+prescan_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register JSAMPROW ptr;
+  register histptr histp;
+  register hist3d histogram = cquantize->histogram;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    ptr = input_buf[row];
+    for (col = width; col > 0; col--) {
+      /* get pixel value and index into the histogram */
+      histp = & histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT]
+			 [GETJSAMPLE(ptr[1]) >> C1_SHIFT]
+			 [GETJSAMPLE(ptr[2]) >> C2_SHIFT];
+      /* increment, check for overflow and undo increment if so. */
+      if (++(*histp) <= 0)
+	(*histp)--;
+      ptr += 3;
+    }
+  }
+}
+
+
+/*
+ * Next we have the really interesting routines: selection of a colormap
+ * given the completed histogram.
+ * These routines work with a list of "boxes", each representing a rectangular
+ * subset of the input color space (to histogram precision).
+ */
+
+typedef struct {
+  /* The bounds of the box (inclusive); expressed as histogram indexes */
+  int c0min, c0max;
+  int c1min, c1max;
+  int c2min, c2max;
+  /* The volume (actually 2-norm) of the box */
+  INT32 volume;
+  /* The number of nonzero histogram cells within this box */
+  long colorcount;
+} box;
+
+typedef box * boxptr;
+
+
+LOCAL(boxptr)
+find_biggest_color_pop (boxptr boxlist, int numboxes)
+/* Find the splittable box with the largest color population */
+/* Returns NULL if no splittable boxes remain */
+{
+  register boxptr boxp;
+  register int i;
+  register long maxc = 0;
+  boxptr which = NULL;
+  
+  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
+    if (boxp->colorcount > maxc && boxp->volume > 0) {
+      which = boxp;
+      maxc = boxp->colorcount;
+    }
+  }
+  return which;
+}
+
+
+LOCAL(boxptr)
+find_biggest_volume (boxptr boxlist, int numboxes)
+/* Find the splittable box with the largest (scaled) volume */
+/* Returns NULL if no splittable boxes remain */
+{
+  register boxptr boxp;
+  register int i;
+  register INT32 maxv = 0;
+  boxptr which = NULL;
+  
+  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
+    if (boxp->volume > maxv) {
+      which = boxp;
+      maxv = boxp->volume;
+    }
+  }
+  return which;
+}
+
+
+LOCAL(void)
+update_box (j_decompress_ptr cinfo, boxptr boxp)
+/* Shrink the min/max bounds of a box to enclose only nonzero elements, */
+/* and recompute its volume and population */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  histptr histp;
+  int c0,c1,c2;
+  int c0min,c0max,c1min,c1max,c2min,c2max;
+  INT32 dist0,dist1,dist2;
+  long ccount;
+  
+  c0min = boxp->c0min;  c0max = boxp->c0max;
+  c1min = boxp->c1min;  c1max = boxp->c1max;
+  c2min = boxp->c2min;  c2max = boxp->c2max;
+  
+  if (c0max > c0min)
+    for (c0 = c0min; c0 <= c0max; c0++)
+      for (c1 = c1min; c1 <= c1max; c1++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c0min = c0min = c0;
+	    goto have_c0min;
+	  }
+      }
+ have_c0min:
+  if (c0max > c0min)
+    for (c0 = c0max; c0 >= c0min; c0--)
+      for (c1 = c1min; c1 <= c1max; c1++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c0max = c0max = c0;
+	    goto have_c0max;
+	  }
+      }
+ have_c0max:
+  if (c1max > c1min)
+    for (c1 = c1min; c1 <= c1max; c1++)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c1min = c1min = c1;
+	    goto have_c1min;
+	  }
+      }
+ have_c1min:
+  if (c1max > c1min)
+    for (c1 = c1max; c1 >= c1min; c1--)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c1max = c1max = c1;
+	    goto have_c1max;
+	  }
+      }
+ have_c1max:
+  if (c2max > c2min)
+    for (c2 = c2min; c2 <= c2max; c2++)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1min][c2];
+	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+	  if (*histp != 0) {
+	    boxp->c2min = c2min = c2;
+	    goto have_c2min;
+	  }
+      }
+ have_c2min:
+  if (c2max > c2min)
+    for (c2 = c2max; c2 >= c2min; c2--)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1min][c2];
+	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+	  if (*histp != 0) {
+	    boxp->c2max = c2max = c2;
+	    goto have_c2max;
+	  }
+      }
+ have_c2max:
+
+  /* Update box volume.
+   * We use 2-norm rather than real volume here; this biases the method
+   * against making long narrow boxes, and it has the side benefit that
+   * a box is splittable iff norm > 0.
+   * Since the differences are expressed in histogram-cell units,
+   * we have to shift back to JSAMPLE units to get consistent distances;
+   * after which, we scale according to the selected distance scale factors.
+   */
+  dist0 = ((c0max - c0min) << C0_SHIFT) * C0_SCALE;
+  dist1 = ((c1max - c1min) << C1_SHIFT) * C1_SCALE;
+  dist2 = ((c2max - c2min) << C2_SHIFT) * C2_SCALE;
+  boxp->volume = dist0*dist0 + dist1*dist1 + dist2*dist2;
+  
+  /* Now scan remaining volume of box and compute population */
+  ccount = 0;
+  for (c0 = c0min; c0 <= c0max; c0++)
+    for (c1 = c1min; c1 <= c1max; c1++) {
+      histp = & histogram[c0][c1][c2min];
+      for (c2 = c2min; c2 <= c2max; c2++, histp++)
+	if (*histp != 0) {
+	  ccount++;
+	}
+    }
+  boxp->colorcount = ccount;
+}
+
+
+LOCAL(int)
+median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes,
+	    int desired_colors)
+/* Repeatedly select and split the largest box until we have enough boxes */
+{
+  int n,lb;
+  int c0,c1,c2,cmax;
+  register boxptr b1,b2;
+
+  while (numboxes < desired_colors) {
+    /* Select box to split.
+     * Current algorithm: by population for first half, then by volume.
+     */
+    if (numboxes*2 <= desired_colors) {
+      b1 = find_biggest_color_pop(boxlist, numboxes);
+    } else {
+      b1 = find_biggest_volume(boxlist, numboxes);
+    }
+    if (b1 == NULL)		/* no splittable boxes left! */
+      break;
+    b2 = &boxlist[numboxes];	/* where new box will go */
+    /* Copy the color bounds to the new box. */
+    b2->c0max = b1->c0max; b2->c1max = b1->c1max; b2->c2max = b1->c2max;
+    b2->c0min = b1->c0min; b2->c1min = b1->c1min; b2->c2min = b1->c2min;
+    /* Choose which axis to split the box on.
+     * Current algorithm: longest scaled axis.
+     * See notes in update_box about scaling distances.
+     */
+    c0 = ((b1->c0max - b1->c0min) << C0_SHIFT) * C0_SCALE;
+    c1 = ((b1->c1max - b1->c1min) << C1_SHIFT) * C1_SCALE;
+    c2 = ((b1->c2max - b1->c2min) << C2_SHIFT) * C2_SCALE;
+    /* We want to break any ties in favor of green, then red, blue last.
+     * This code does the right thing for R,G,B or B,G,R color orders only.
+     */
+#if RGB_RED == 0
+    cmax = c1; n = 1;
+    if (c0 > cmax) { cmax = c0; n = 0; }
+    if (c2 > cmax) { n = 2; }
+#else
+    cmax = c1; n = 1;
+    if (c2 > cmax) { cmax = c2; n = 2; }
+    if (c0 > cmax) { n = 0; }
+#endif
+    /* Choose split point along selected axis, and update box bounds.
+     * Current algorithm: split at halfway point.
+     * (Since the box has been shrunk to minimum volume,
+     * any split will produce two nonempty subboxes.)
+     * Note that lb value is max for lower box, so must be < old max.
+     */
+    switch (n) {
+    case 0:
+      lb = (b1->c0max + b1->c0min) / 2;
+      b1->c0max = lb;
+      b2->c0min = lb+1;
+      break;
+    case 1:
+      lb = (b1->c1max + b1->c1min) / 2;
+      b1->c1max = lb;
+      b2->c1min = lb+1;
+      break;
+    case 2:
+      lb = (b1->c2max + b1->c2min) / 2;
+      b1->c2max = lb;
+      b2->c2min = lb+1;
+      break;
+    }
+    /* Update stats for boxes */
+    update_box(cinfo, b1);
+    update_box(cinfo, b2);
+    numboxes++;
+  }
+  return numboxes;
+}
+
+
+LOCAL(void)
+compute_color (j_decompress_ptr cinfo, boxptr boxp, int icolor)
+/* Compute representative color for a box, put it in colormap[icolor] */
+{
+  /* Current algorithm: mean weighted by pixels (not colors) */
+  /* Note it is important to get the rounding correct! */
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  histptr histp;
+  int c0,c1,c2;
+  int c0min,c0max,c1min,c1max,c2min,c2max;
+  long count;
+  long total = 0;
+  long c0total = 0;
+  long c1total = 0;
+  long c2total = 0;
+  
+  c0min = boxp->c0min;  c0max = boxp->c0max;
+  c1min = boxp->c1min;  c1max = boxp->c1max;
+  c2min = boxp->c2min;  c2max = boxp->c2max;
+  
+  for (c0 = c0min; c0 <= c0max; c0++)
+    for (c1 = c1min; c1 <= c1max; c1++) {
+      histp = & histogram[c0][c1][c2min];
+      for (c2 = c2min; c2 <= c2max; c2++) {
+	if ((count = *histp++) != 0) {
+	  total += count;
+	  c0total += ((c0 << C0_SHIFT) + ((1<<C0_SHIFT)>>1)) * count;
+	  c1total += ((c1 << C1_SHIFT) + ((1<<C1_SHIFT)>>1)) * count;
+	  c2total += ((c2 << C2_SHIFT) + ((1<<C2_SHIFT)>>1)) * count;
+	}
+      }
+    }
+  
+  cinfo->colormap[0][icolor] = (JSAMPLE) ((c0total + (total>>1)) / total);
+  cinfo->colormap[1][icolor] = (JSAMPLE) ((c1total + (total>>1)) / total);
+  cinfo->colormap[2][icolor] = (JSAMPLE) ((c2total + (total>>1)) / total);
+}
+
+
+LOCAL(void)
+select_colors (j_decompress_ptr cinfo, int desired_colors)
+/* Master routine for color selection */
+{
+  boxptr boxlist;
+  int numboxes;
+  int i;
+
+  /* Allocate workspace for box list */
+  boxlist = (boxptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, desired_colors * SIZEOF(box));
+  /* Initialize one box containing whole space */
+  numboxes = 1;
+  boxlist[0].c0min = 0;
+  boxlist[0].c0max = MAXJSAMPLE >> C0_SHIFT;
+  boxlist[0].c1min = 0;
+  boxlist[0].c1max = MAXJSAMPLE >> C1_SHIFT;
+  boxlist[0].c2min = 0;
+  boxlist[0].c2max = MAXJSAMPLE >> C2_SHIFT;
+  /* Shrink it to actually-used volume and set its statistics */
+  update_box(cinfo, & boxlist[0]);
+  /* Perform median-cut to produce final box list */
+  numboxes = median_cut(cinfo, boxlist, numboxes, desired_colors);
+  /* Compute the representative color for each box, fill colormap */
+  for (i = 0; i < numboxes; i++)
+    compute_color(cinfo, & boxlist[i], i);
+  cinfo->actual_number_of_colors = numboxes;
+  TRACEMS1(cinfo, 1, JTRC_QUANT_SELECTED, numboxes);
+}
+
+
+/*
+ * These routines are concerned with the time-critical task of mapping input
+ * colors to the nearest color in the selected colormap.
+ *
+ * We re-use the histogram space as an "inverse color map", essentially a
+ * cache for the results of nearest-color searches.  All colors within a
+ * histogram cell will be mapped to the same colormap entry, namely the one
+ * closest to the cell's center.  This may not be quite the closest entry to
+ * the actual input color, but it's almost as good.  A zero in the cache
+ * indicates we haven't found the nearest color for that cell yet; the array
+ * is cleared to zeroes before starting the mapping pass.  When we find the
+ * nearest color for a cell, its colormap index plus one is recorded in the
+ * cache for future use.  The pass2 scanning routines call fill_inverse_cmap
+ * when they need to use an unfilled entry in the cache.
+ *
+ * Our method of efficiently finding nearest colors is based on the "locally
+ * sorted search" idea described by Heckbert and on the incremental distance
+ * calculation described by Spencer W. Thomas in chapter III.1 of Graphics
+ * Gems II (James Arvo, ed.  Academic Press, 1991).  Thomas points out that
+ * the distances from a given colormap entry to each cell of the histogram can
+ * be computed quickly using an incremental method: the differences between
+ * distances to adjacent cells themselves differ by a constant.  This allows a
+ * fairly fast implementation of the "brute force" approach of computing the
+ * distance from every colormap entry to every histogram cell.  Unfortunately,
+ * it needs a work array to hold the best-distance-so-far for each histogram
+ * cell (because the inner loop has to be over cells, not colormap entries).
+ * The work array elements have to be INT32s, so the work array would need
+ * 256Kb at our recommended precision.  This is not feasible in DOS machines.
+ *
+ * To get around these problems, we apply Thomas' method to compute the
+ * nearest colors for only the cells within a small subbox of the histogram.
+ * The work array need be only as big as the subbox, so the memory usage
+ * problem is solved.  Furthermore, we need not fill subboxes that are never
+ * referenced in pass2; many images use only part of the color gamut, so a
+ * fair amount of work is saved.  An additional advantage of this
+ * approach is that we can apply Heckbert's locality criterion to quickly
+ * eliminate colormap entries that are far away from the subbox; typically
+ * three-fourths of the colormap entries are rejected by Heckbert's criterion,
+ * and we need not compute their distances to individual cells in the subbox.
+ * The speed of this approach is heavily influenced by the subbox size: too
+ * small means too much overhead, too big loses because Heckbert's criterion
+ * can't eliminate as many colormap entries.  Empirically the best subbox
+ * size seems to be about 1/512th of the histogram (1/8th in each direction).
+ *
+ * Thomas' article also describes a refined method which is asymptotically
+ * faster than the brute-force method, but it is also far more complex and
+ * cannot efficiently be applied to small subboxes.  It is therefore not
+ * useful for programs intended to be portable to DOS machines.  On machines
+ * with plenty of memory, filling the whole histogram in one shot with Thomas'
+ * refined method might be faster than the present code --- but then again,
+ * it might not be any faster, and it's certainly more complicated.
+ */
+
+
+/* log2(histogram cells in update box) for each axis; this can be adjusted */
+#define BOX_C0_LOG  (HIST_C0_BITS-3)
+#define BOX_C1_LOG  (HIST_C1_BITS-3)
+#define BOX_C2_LOG  (HIST_C2_BITS-3)
+
+#define BOX_C0_ELEMS  (1<<BOX_C0_LOG) /* # of hist cells in update box */
+#define BOX_C1_ELEMS  (1<<BOX_C1_LOG)
+#define BOX_C2_ELEMS  (1<<BOX_C2_LOG)
+
+#define BOX_C0_SHIFT  (C0_SHIFT + BOX_C0_LOG)
+#define BOX_C1_SHIFT  (C1_SHIFT + BOX_C1_LOG)
+#define BOX_C2_SHIFT  (C2_SHIFT + BOX_C2_LOG)
+
+
+/*
+ * The next three routines implement inverse colormap filling.  They could
+ * all be folded into one big routine, but splitting them up this way saves
+ * some stack space (the mindist[] and bestdist[] arrays need not coexist)
+ * and may allow some compilers to produce better code by registerizing more
+ * inner-loop variables.
+ */
+
+LOCAL(int)
+find_nearby_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
+		    JSAMPLE colorlist[])
+/* Locate the colormap entries close enough to an update box to be candidates
+ * for the nearest entry to some cell(s) in the update box.  The update box
+ * is specified by the center coordinates of its first cell.  The number of
+ * candidate colormap entries is returned, and their colormap indexes are
+ * placed in colorlist[].
+ * This routine uses Heckbert's "locally sorted search" criterion to select
+ * the colors that need further consideration.
+ */
+{
+  int numcolors = cinfo->actual_number_of_colors;
+  int maxc0, maxc1, maxc2;
+  int centerc0, centerc1, centerc2;
+  int i, x, ncolors;
+  INT32 minmaxdist, min_dist, max_dist, tdist;
+  INT32 mindist[MAXNUMCOLORS];	/* min distance to colormap entry i */
+
+  /* Compute true coordinates of update box's upper corner and center.
+   * Actually we compute the coordinates of the center of the upper-corner
+   * histogram cell, which are the upper bounds of the volume we care about.
+   * Note that since ">>" rounds down, the "center" values may be closer to
+   * min than to max; hence comparisons to them must be "<=", not "<".
+   */
+  maxc0 = minc0 + ((1 << BOX_C0_SHIFT) - (1 << C0_SHIFT));
+  centerc0 = (minc0 + maxc0) >> 1;
+  maxc1 = minc1 + ((1 << BOX_C1_SHIFT) - (1 << C1_SHIFT));
+  centerc1 = (minc1 + maxc1) >> 1;
+  maxc2 = minc2 + ((1 << BOX_C2_SHIFT) - (1 << C2_SHIFT));
+  centerc2 = (minc2 + maxc2) >> 1;
+
+  /* For each color in colormap, find:
+   *  1. its minimum squared-distance to any point in the update box
+   *     (zero if color is within update box);
+   *  2. its maximum squared-distance to any point in the update box.
+   * Both of these can be found by considering only the corners of the box.
+   * We save the minimum distance for each color in mindist[];
+   * only the smallest maximum distance is of interest.
+   */
+  minmaxdist = 0x7FFFFFFFL;
+
+  for (i = 0; i < numcolors; i++) {
+    /* We compute the squared-c0-distance term, then add in the other two. */
+    x = GETJSAMPLE(cinfo->colormap[0][i]);
+    if (x < minc0) {
+      tdist = (x - minc0) * C0_SCALE;
+      min_dist = tdist*tdist;
+      tdist = (x - maxc0) * C0_SCALE;
+      max_dist = tdist*tdist;
+    } else if (x > maxc0) {
+      tdist = (x - maxc0) * C0_SCALE;
+      min_dist = tdist*tdist;
+      tdist = (x - minc0) * C0_SCALE;
+      max_dist = tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      min_dist = 0;
+      if (x <= centerc0) {
+	tdist = (x - maxc0) * C0_SCALE;
+	max_dist = tdist*tdist;
+      } else {
+	tdist = (x - minc0) * C0_SCALE;
+	max_dist = tdist*tdist;
+      }
+    }
+
+    x = GETJSAMPLE(cinfo->colormap[1][i]);
+    if (x < minc1) {
+      tdist = (x - minc1) * C1_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - maxc1) * C1_SCALE;
+      max_dist += tdist*tdist;
+    } else if (x > maxc1) {
+      tdist = (x - maxc1) * C1_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - minc1) * C1_SCALE;
+      max_dist += tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      if (x <= centerc1) {
+	tdist = (x - maxc1) * C1_SCALE;
+	max_dist += tdist*tdist;
+      } else {
+	tdist = (x - minc1) * C1_SCALE;
+	max_dist += tdist*tdist;
+      }
+    }
+
+    x = GETJSAMPLE(cinfo->colormap[2][i]);
+    if (x < minc2) {
+      tdist = (x - minc2) * C2_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - maxc2) * C2_SCALE;
+      max_dist += tdist*tdist;
+    } else if (x > maxc2) {
+      tdist = (x - maxc2) * C2_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - minc2) * C2_SCALE;
+      max_dist += tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      if (x <= centerc2) {
+	tdist = (x - maxc2) * C2_SCALE;
+	max_dist += tdist*tdist;
+      } else {
+	tdist = (x - minc2) * C2_SCALE;
+	max_dist += tdist*tdist;
+      }
+    }
+
+    mindist[i] = min_dist;	/* save away the results */
+    if (max_dist < minmaxdist)
+      minmaxdist = max_dist;
+  }
+
+  /* Now we know that no cell in the update box is more than minmaxdist
+   * away from some colormap entry.  Therefore, only colors that are
+   * within minmaxdist of some part of the box need be considered.
+   */
+  ncolors = 0;
+  for (i = 0; i < numcolors; i++) {
+    if (mindist[i] <= minmaxdist)
+      colorlist[ncolors++] = (JSAMPLE) i;
+  }
+  return ncolors;
+}
+
+
+LOCAL(void)
+find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
+		  int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[])
+/* Find the closest colormap entry for each cell in the update box,
+ * given the list of candidate colors prepared by find_nearby_colors.
+ * Return the indexes of the closest entries in the bestcolor[] array.
+ * This routine uses Thomas' incremental distance calculation method to
+ * find the distance from a colormap entry to successive cells in the box.
+ */
+{
+  int ic0, ic1, ic2;
+  int i, icolor;
+  register INT32 * bptr;	/* pointer into bestdist[] array */
+  JSAMPLE * cptr;		/* pointer into bestcolor[] array */
+  INT32 dist0, dist1;		/* initial distance values */
+  register INT32 dist2;		/* current distance in inner loop */
+  INT32 xx0, xx1;		/* distance increments */
+  register INT32 xx2;
+  INT32 inc0, inc1, inc2;	/* initial values for increments */
+  /* This array holds the distance to the nearest-so-far color for each cell */
+  INT32 bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+
+  /* Initialize best-distance for each cell of the update box */
+  bptr = bestdist;
+  for (i = BOX_C0_ELEMS*BOX_C1_ELEMS*BOX_C2_ELEMS-1; i >= 0; i--)
+    *bptr++ = 0x7FFFFFFFL;
+  
+  /* For each color selected by find_nearby_colors,
+   * compute its distance to the center of each cell in the box.
+   * If that's less than best-so-far, update best distance and color number.
+   */
+  
+  /* Nominal steps between cell centers ("x" in Thomas article) */
+#define STEP_C0  ((1 << C0_SHIFT) * C0_SCALE)
+#define STEP_C1  ((1 << C1_SHIFT) * C1_SCALE)
+#define STEP_C2  ((1 << C2_SHIFT) * C2_SCALE)
+  
+  for (i = 0; i < numcolors; i++) {
+    icolor = GETJSAMPLE(colorlist[i]);
+    /* Compute (square of) distance from minc0/c1/c2 to this color */
+    inc0 = (minc0 - GETJSAMPLE(cinfo->colormap[0][icolor])) * C0_SCALE;
+    dist0 = inc0*inc0;
+    inc1 = (minc1 - GETJSAMPLE(cinfo->colormap[1][icolor])) * C1_SCALE;
+    dist0 += inc1*inc1;
+    inc2 = (minc2 - GETJSAMPLE(cinfo->colormap[2][icolor])) * C2_SCALE;
+    dist0 += inc2*inc2;
+    /* Form the initial difference increments */
+    inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
+    inc1 = inc1 * (2 * STEP_C1) + STEP_C1 * STEP_C1;
+    inc2 = inc2 * (2 * STEP_C2) + STEP_C2 * STEP_C2;
+    /* Now loop over all cells in box, updating distance per Thomas method */
+    bptr = bestdist;
+    cptr = bestcolor;
+    xx0 = inc0;
+    for (ic0 = BOX_C0_ELEMS-1; ic0 >= 0; ic0--) {
+      dist1 = dist0;
+      xx1 = inc1;
+      for (ic1 = BOX_C1_ELEMS-1; ic1 >= 0; ic1--) {
+	dist2 = dist1;
+	xx2 = inc2;
+	for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) {
+	  if (dist2 < *bptr) {
+	    *bptr = dist2;
+	    *cptr = (JSAMPLE) icolor;
+	  }
+	  dist2 += xx2;
+	  xx2 += 2 * STEP_C2 * STEP_C2;
+	  bptr++;
+	  cptr++;
+	}
+	dist1 += xx1;
+	xx1 += 2 * STEP_C1 * STEP_C1;
+      }
+      dist0 += xx0;
+      xx0 += 2 * STEP_C0 * STEP_C0;
+    }
+  }
+}
+
+
+LOCAL(void)
+fill_inverse_cmap (j_decompress_ptr cinfo, int c0, int c1, int c2)
+/* Fill the inverse-colormap entries in the update box that contains */
+/* histogram cell c0/c1/c2.  (Only that one cell MUST be filled, but */
+/* we can fill as many others as we wish.) */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  int minc0, minc1, minc2;	/* lower left corner of update box */
+  int ic0, ic1, ic2;
+  register JSAMPLE * cptr;	/* pointer into bestcolor[] array */
+  register histptr cachep;	/* pointer into main cache array */
+  /* This array lists the candidate colormap indexes. */
+  JSAMPLE colorlist[MAXNUMCOLORS];
+  int numcolors;		/* number of candidate colors */
+  /* This array holds the actually closest colormap index for each cell. */
+  JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+
+  /* Convert cell coordinates to update box ID */
+  c0 >>= BOX_C0_LOG;
+  c1 >>= BOX_C1_LOG;
+  c2 >>= BOX_C2_LOG;
+
+  /* Compute true coordinates of update box's origin corner.
+   * Actually we compute the coordinates of the center of the corner
+   * histogram cell, which are the lower bounds of the volume we care about.
+   */
+  minc0 = (c0 << BOX_C0_SHIFT) + ((1 << C0_SHIFT) >> 1);
+  minc1 = (c1 << BOX_C1_SHIFT) + ((1 << C1_SHIFT) >> 1);
+  minc2 = (c2 << BOX_C2_SHIFT) + ((1 << C2_SHIFT) >> 1);
+  
+  /* Determine which colormap entries are close enough to be candidates
+   * for the nearest entry to some cell in the update box.
+   */
+  numcolors = find_nearby_colors(cinfo, minc0, minc1, minc2, colorlist);
+
+  /* Determine the actually nearest colors. */
+  find_best_colors(cinfo, minc0, minc1, minc2, numcolors, colorlist,
+		   bestcolor);
+
+  /* Save the best color numbers (plus 1) in the main cache array */
+  c0 <<= BOX_C0_LOG;		/* convert ID back to base cell indexes */
+  c1 <<= BOX_C1_LOG;
+  c2 <<= BOX_C2_LOG;
+  cptr = bestcolor;
+  for (ic0 = 0; ic0 < BOX_C0_ELEMS; ic0++) {
+    for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
+      cachep = & histogram[c0+ic0][c1+ic1][c2];
+      for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
+	*cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1);
+      }
+    }
+  }
+}
+
+
+/*
+ * Map some rows of pixels to the output colormapped representation.
+ */
+
+METHODDEF(void)
+pass2_no_dither (j_decompress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
+/* This version performs no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  register JSAMPROW inptr, outptr;
+  register histptr cachep;
+  register int c0, c1, c2;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    inptr = input_buf[row];
+    outptr = output_buf[row];
+    for (col = width; col > 0; col--) {
+      /* get pixel value and index into the cache */
+      c0 = GETJSAMPLE(*inptr++) >> C0_SHIFT;
+      c1 = GETJSAMPLE(*inptr++) >> C1_SHIFT;
+      c2 = GETJSAMPLE(*inptr++) >> C2_SHIFT;
+      cachep = & histogram[c0][c1][c2];
+      /* If we have not seen this color before, find nearest colormap entry */
+      /* and update the cache */
+      if (*cachep == 0)
+	fill_inverse_cmap(cinfo, c0,c1,c2);
+      /* Now emit the colormap index for this cell */
+      *outptr++ = (JSAMPLE) (*cachep - 1);
+    }
+  }
+}
+
+
+METHODDEF(void)
+pass2_fs_dither (j_decompress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
+/* This version performs Floyd-Steinberg dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  register LOCFSERROR cur0, cur1, cur2;	/* current error or pixel value */
+  LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */
+  LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */
+  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
+  JSAMPROW inptr;		/* => current input pixel */
+  JSAMPROW outptr;		/* => current output pixel */
+  histptr cachep;
+  int dir;			/* +1 or -1 depending on direction */
+  int dir3;			/* 3*dir, for advancing inptr & errorptr */
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  JSAMPLE *range_limit = cinfo->sample_range_limit;
+  int *error_limit = cquantize->error_limiter;
+  JSAMPROW colormap0 = cinfo->colormap[0];
+  JSAMPROW colormap1 = cinfo->colormap[1];
+  JSAMPROW colormap2 = cinfo->colormap[2];
+  SHIFT_TEMPS
+
+  for (row = 0; row < num_rows; row++) {
+    inptr = input_buf[row];
+    outptr = output_buf[row];
+    if (cquantize->on_odd_row) {
+      /* work right to left in this row */
+      inptr += (width-1) * 3;	/* so point to rightmost pixel */
+      outptr += width-1;
+      dir = -1;
+      dir3 = -3;
+      errorptr = cquantize->fserrors + (width+1)*3; /* => entry after last column */
+      cquantize->on_odd_row = FALSE; /* flip for next time */
+    } else {
+      /* work left to right in this row */
+      dir = 1;
+      dir3 = 3;
+      errorptr = cquantize->fserrors; /* => entry before first real column */
+      cquantize->on_odd_row = TRUE; /* flip for next time */
+    }
+    /* Preset error values: no error propagated to first pixel from left */
+    cur0 = cur1 = cur2 = 0;
+    /* and no error propagated to row below yet */
+    belowerr0 = belowerr1 = belowerr2 = 0;
+    bpreverr0 = bpreverr1 = bpreverr2 = 0;
+
+    for (col = width; col > 0; col--) {
+      /* curN holds the error propagated from the previous pixel on the
+       * current line.  Add the error propagated from the previous line
+       * to form the complete error correction term for this pixel, and
+       * round the error term (which is expressed * 16) to an integer.
+       * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
+       * for either sign of the error value.
+       * Note: errorptr points to *previous* column's array entry.
+       */
+      cur0 = RIGHT_SHIFT(cur0 + errorptr[dir3+0] + 8, 4);
+      cur1 = RIGHT_SHIFT(cur1 + errorptr[dir3+1] + 8, 4);
+      cur2 = RIGHT_SHIFT(cur2 + errorptr[dir3+2] + 8, 4);
+      /* Limit the error using transfer function set by init_error_limit.
+       * See comments with init_error_limit for rationale.
+       */
+      cur0 = error_limit[cur0];
+      cur1 = error_limit[cur1];
+      cur2 = error_limit[cur2];
+      /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
+       * The maximum error is +- MAXJSAMPLE (or less with error limiting);
+       * this sets the required size of the range_limit array.
+       */
+      cur0 += GETJSAMPLE(inptr[0]);
+      cur1 += GETJSAMPLE(inptr[1]);
+      cur2 += GETJSAMPLE(inptr[2]);
+      cur0 = GETJSAMPLE(range_limit[cur0]);
+      cur1 = GETJSAMPLE(range_limit[cur1]);
+      cur2 = GETJSAMPLE(range_limit[cur2]);
+      /* Index into the cache with adjusted pixel value */
+      cachep = & histogram[cur0>>C0_SHIFT][cur1>>C1_SHIFT][cur2>>C2_SHIFT];
+      /* If we have not seen this color before, find nearest colormap */
+      /* entry and update the cache */
+      if (*cachep == 0)
+	fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT);
+      /* Now emit the colormap index for this cell */
+      { register int pixcode = *cachep - 1;
+	*outptr = (JSAMPLE) pixcode;
+	/* Compute representation error for this pixel */
+	cur0 -= GETJSAMPLE(colormap0[pixcode]);
+	cur1 -= GETJSAMPLE(colormap1[pixcode]);
+	cur2 -= GETJSAMPLE(colormap2[pixcode]);
+      }
+      /* Compute error fractions to be propagated to adjacent pixels.
+       * Add these into the running sums, and simultaneously shift the
+       * next-line error sums left by 1 column.
+       */
+      { register LOCFSERROR bnexterr, delta;
+
+	bnexterr = cur0;	/* Process component 0 */
+	delta = cur0 * 2;
+	cur0 += delta;		/* form error * 3 */
+	errorptr[0] = (FSERROR) (bpreverr0 + cur0);
+	cur0 += delta;		/* form error * 5 */
+	bpreverr0 = belowerr0 + cur0;
+	belowerr0 = bnexterr;
+	cur0 += delta;		/* form error * 7 */
+	bnexterr = cur1;	/* Process component 1 */
+	delta = cur1 * 2;
+	cur1 += delta;		/* form error * 3 */
+	errorptr[1] = (FSERROR) (bpreverr1 + cur1);
+	cur1 += delta;		/* form error * 5 */
+	bpreverr1 = belowerr1 + cur1;
+	belowerr1 = bnexterr;
+	cur1 += delta;		/* form error * 7 */
+	bnexterr = cur2;	/* Process component 2 */
+	delta = cur2 * 2;
+	cur2 += delta;		/* form error * 3 */
+	errorptr[2] = (FSERROR) (bpreverr2 + cur2);
+	cur2 += delta;		/* form error * 5 */
+	bpreverr2 = belowerr2 + cur2;
+	belowerr2 = bnexterr;
+	cur2 += delta;		/* form error * 7 */
+      }
+      /* At this point curN contains the 7/16 error value to be propagated
+       * to the next pixel on the current line, and all the errors for the
+       * next line have been shifted over.  We are therefore ready to move on.
+       */
+      inptr += dir3;		/* Advance pixel pointers to next column */
+      outptr += dir;
+      errorptr += dir3;		/* advance errorptr to current column */
+    }
+    /* Post-loop cleanup: we must unload the final error values into the
+     * final fserrors[] entry.  Note we need not unload belowerrN because
+     * it is for the dummy column before or after the actual array.
+     */
+    errorptr[0] = (FSERROR) bpreverr0; /* unload prev errs into array */
+    errorptr[1] = (FSERROR) bpreverr1;
+    errorptr[2] = (FSERROR) bpreverr2;
+  }
+}
+
+
+/*
+ * Initialize the error-limiting transfer function (lookup table).
+ * The raw F-S error computation can potentially compute error values of up to
+ * +- MAXJSAMPLE.  But we want the maximum correction applied to a pixel to be
+ * much less, otherwise obviously wrong pixels will be created.  (Typical
+ * effects include weird fringes at color-area boundaries, isolated bright
+ * pixels in a dark area, etc.)  The standard advice for avoiding this problem
+ * is to ensure that the "corners" of the color cube are allocated as output
+ * colors; then repeated errors in the same direction cannot cause cascading
+ * error buildup.  However, that only prevents the error from getting
+ * completely out of hand; Aaron Giles reports that error limiting improves
+ * the results even with corner colors allocated.
+ * A simple clamping of the error values to about +- MAXJSAMPLE/8 works pretty
+ * well, but the smoother transfer function used below is even better.  Thanks
+ * to Aaron Giles for this idea.
+ */
+
+LOCAL(void)
+init_error_limit (j_decompress_ptr cinfo)
+/* Allocate and fill in the error_limiter table */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  int * table;
+  int in, out;
+
+  table = (int *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * SIZEOF(int));
+  table += MAXJSAMPLE;		/* so can index -MAXJSAMPLE .. +MAXJSAMPLE */
+  cquantize->error_limiter = table;
+
+#define STEPSIZE ((MAXJSAMPLE+1)/16)
+  /* Map errors 1:1 up to +- MAXJSAMPLE/16 */
+  out = 0;
+  for (in = 0; in < STEPSIZE; in++, out++) {
+    table[in] = out; table[-in] = -out;
+  }
+  /* Map errors 1:2 up to +- 3*MAXJSAMPLE/16 */
+  for (; in < STEPSIZE*3; in++, out += (in&1) ? 0 : 1) {
+    table[in] = out; table[-in] = -out;
+  }
+  /* Clamp the rest to final out value (which is (MAXJSAMPLE+1)/8) */
+  for (; in <= MAXJSAMPLE; in++) {
+    table[in] = out; table[-in] = -out;
+  }
+#undef STEPSIZE
+}
+
+
+/*
+ * Finish up at the end of each pass.
+ */
+
+METHODDEF(void)
+finish_pass1 (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+
+  /* Select the representative colors and fill in cinfo->colormap */
+  cinfo->colormap = cquantize->sv_colormap;
+  select_colors(cinfo, cquantize->desired);
+  /* Force next pass to zero the color index table */
+  cquantize->needs_zeroed = TRUE;
+}
+
+
+METHODDEF(void)
+finish_pass2 (j_decompress_ptr cinfo)
+{
+  /* no work */
+}
+
+
+/*
+ * Initialize for each processing pass.
+ */
+
+METHODDEF(void)
+start_pass_2_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  int i;
+
+  /* Only F-S dithering or no dithering is supported. */
+  /* If user asks for ordered dither, give him F-S. */
+  if (cinfo->dither_mode != JDITHER_NONE)
+    cinfo->dither_mode = JDITHER_FS;
+
+  if (is_pre_scan) {
+    /* Set up method pointers */
+    cquantize->pub.color_quantize = prescan_quantize;
+    cquantize->pub.finish_pass = finish_pass1;
+    cquantize->needs_zeroed = TRUE; /* Always zero histogram */
+  } else {
+    /* Set up method pointers */
+    if (cinfo->dither_mode == JDITHER_FS)
+      cquantize->pub.color_quantize = pass2_fs_dither;
+    else
+      cquantize->pub.color_quantize = pass2_no_dither;
+    cquantize->pub.finish_pass = finish_pass2;
+
+    /* Make sure color count is acceptable */
+    i = cinfo->actual_number_of_colors;
+    if (i < 1)
+      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 1);
+    if (i > MAXNUMCOLORS)
+      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
+
+    if (cinfo->dither_mode == JDITHER_FS) {
+      size_t arraysize = ((size_t) cinfo->output_width + (size_t) 2)
+	* (3 * SIZEOF(FSERROR));
+      /* Allocate Floyd-Steinberg workspace if we didn't already. */
+      if (cquantize->fserrors == NULL)
+	cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
+      /* Initialize the propagated errors to zero. */
+      FMEMZERO((void FAR *) cquantize->fserrors, arraysize);
+      /* Make the error-limit table if we didn't already. */
+      if (cquantize->error_limiter == NULL)
+	init_error_limit(cinfo);
+      cquantize->on_odd_row = FALSE;
+    }
+
+  }
+  /* Zero the histogram or inverse color map, if necessary */
+  if (cquantize->needs_zeroed) {
+    for (i = 0; i < HIST_C0_ELEMS; i++) {
+      FMEMZERO((void FAR *) histogram[i],
+	       HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
+    }
+    cquantize->needs_zeroed = FALSE;
+  }
+}
+
+
+/*
+ * Switch to a new external colormap between output passes.
+ */
+
+METHODDEF(void)
+new_color_map_2_quant (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+
+  /* Reset the inverse color map */
+  cquantize->needs_zeroed = TRUE;
+}
+
+
+/*
+ * Module initialization routine for 2-pass color quantization.
+ */
+
+GLOBAL(void)
+jinit_2pass_quantizer (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize;
+  int i;
+
+  cquantize = (my_cquantize_ptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_cquantizer));
+  cinfo->cquantize = &cquantize->pub;
+  cquantize->pub.start_pass = start_pass_2_quant;
+  cquantize->pub.new_color_map = new_color_map_2_quant;
+  cquantize->fserrors = NULL;	/* flag optional arrays not allocated */
+  cquantize->error_limiter = NULL;
+
+  /* Make sure jdmaster didn't give me a case I can't handle */
+  if (cinfo->out_color_components != 3)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  /* Allocate the histogram/inverse colormap storage */
+  cquantize->histogram = (hist3d) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * SIZEOF(hist2d));
+  for (i = 0; i < HIST_C0_ELEMS; i++) {
+    cquantize->histogram[i] = (hist2d) (*cinfo->mem->alloc_large)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
+  }
+  cquantize->needs_zeroed = TRUE; /* histogram is garbage now */
+
+  /* Allocate storage for the completed colormap, if required.
+   * We do this now since it is FAR storage and may affect
+   * the memory manager's space calculations.
+   */
+  if (cinfo->enable_2pass_quant) {
+    /* Make sure color count is acceptable */
+    int desired = cinfo->desired_number_of_colors;
+    /* Lower bound on # of colors ... somewhat arbitrary as long as > 0 */
+    if (desired < 8)
+      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 8);
+    /* Make sure colormap indexes can be represented by JSAMPLEs */
+    if (desired > MAXNUMCOLORS)
+      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
+    cquantize->sv_colormap = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       (JDIMENSION) desired, (JDIMENSION) 3);
+    cquantize->desired = desired;
+  } else
+    cquantize->sv_colormap = NULL;
+
+  /* Only F-S dithering or no dithering is supported. */
+  /* If user asks for ordered dither, give him F-S. */
+  if (cinfo->dither_mode != JDITHER_NONE)
+    cinfo->dither_mode = JDITHER_FS;
+
+  /* Allocate Floyd-Steinberg workspace if necessary.
+   * This isn't really needed until pass 2, but again it is FAR storage.
+   * Although we will cope with a later change in dither_mode,
+   * we do not promise to honor max_memory_to_use if dither_mode changes.
+   */
+  if (cinfo->dither_mode == JDITHER_FS) {
+    cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       ((size_t) cinfo->output_width + (size_t) 2) * (3 * SIZEOF(FSERROR)));
+    /* Might as well create the error-limiting table too. */
+    init_error_limit(cinfo);
+  }
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jutils.c b/cmake/externals/libjpeg/jutils.c
new file mode 100644
index 000000000..31e16dfb5
--- /dev/null
+++ b/cmake/externals/libjpeg/jutils.c
@@ -0,0 +1,224 @@
+/*
+ * jutils.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Modified 2009-2020 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains tables and miscellaneous utility routines needed
+ * for both compression and decompression.
+ * Note we prefix all global names with "j" to minimize conflicts with
+ * a surrounding application.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * jpeg_zigzag_order[i] is the zigzag-order position of the i'th element
+ * of a DCT block read in natural order (left to right, top to bottom).
+ */
+
+#if 0				/* This table is not actually needed in v6a */
+
+const int jpeg_zigzag_order[DCTSIZE2] = {
+   0,  1,  5,  6, 14, 15, 27, 28,
+   2,  4,  7, 13, 16, 26, 29, 42,
+   3,  8, 12, 17, 25, 30, 41, 43,
+   9, 11, 18, 24, 31, 40, 44, 53,
+  10, 19, 23, 32, 39, 45, 52, 54,
+  20, 22, 33, 38, 46, 51, 55, 60,
+  21, 34, 37, 47, 50, 56, 59, 61,
+  35, 36, 48, 49, 57, 58, 62, 63
+};
+
+#endif
+
+/*
+ * jpeg_natural_order[i] is the natural-order position of the i'th element
+ * of zigzag order.
+ *
+ * When reading corrupted data, the Huffman decoders could attempt
+ * to reference an entry beyond the end of this array (if the decoded
+ * zero run length reaches past the end of the block).  To prevent
+ * wild stores without adding an inner-loop test, we put some extra
+ * "63"s after the real entries.  This will cause the extra coefficient
+ * to be stored in location 63 of the block, not somewhere random.
+ * The worst case would be a run-length of 15, which means we need 16
+ * fake entries.
+ */
+
+const int jpeg_natural_order[DCTSIZE2+16] = {
+   0,  1,  8, 16,  9,  2,  3, 10,
+  17, 24, 32, 25, 18, 11,  4,  5,
+  12, 19, 26, 33, 40, 48, 41, 34,
+  27, 20, 13,  6,  7, 14, 21, 28,
+  35, 42, 49, 56, 57, 50, 43, 36,
+  29, 22, 15, 23, 30, 37, 44, 51,
+  58, 59, 52, 45, 38, 31, 39, 46,
+  53, 60, 61, 54, 47, 55, 62, 63,
+  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+  63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order7[7*7+16] = {
+   0,  1,  8, 16,  9,  2,  3, 10,
+  17, 24, 32, 25, 18, 11,  4,  5,
+  12, 19, 26, 33, 40, 48, 41, 34,
+  27, 20, 13,  6, 14, 21, 28, 35,
+  42, 49, 50, 43, 36, 29, 22, 30,
+  37, 44, 51, 52, 45, 38, 46, 53,
+  54,
+  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+  63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order6[6*6+16] = {
+   0,  1,  8, 16,  9,  2,  3, 10,
+  17, 24, 32, 25, 18, 11,  4,  5,
+  12, 19, 26, 33, 40, 41, 34, 27,
+  20, 13, 21, 28, 35, 42, 43, 36,
+  29, 37, 44, 45,
+  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+  63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order5[5*5+16] = {
+   0,  1,  8, 16,  9,  2,  3, 10,
+  17, 24, 32, 25, 18, 11,  4, 12,
+  19, 26, 33, 34, 27, 20, 28, 35,
+  36,
+  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+  63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order4[4*4+16] = {
+   0,  1,  8, 16,  9,  2,  3, 10,
+  17, 24, 25, 18, 11, 19, 26, 27,
+  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+  63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order3[3*3+16] = {
+   0,  1,  8, 16,  9,  2, 10, 17,
+  18,
+  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+  63, 63, 63, 63, 63, 63, 63, 63
+};
+
+const int jpeg_natural_order2[2*2+16] = {
+   0,  1,  8,  9,
+  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+  63, 63, 63, 63, 63, 63, 63, 63
+};
+
+
+/*
+ * Arithmetic utilities
+ */
+
+GLOBAL(long)
+jdiv_round_up (long a, long b)
+/* Compute a/b rounded up to next integer, ie, ceil(a/b) */
+/* Assumes a >= 0, b > 0 */
+{
+  return (a + b - 1L) / b;
+}
+
+
+GLOBAL(long)
+jround_up (long a, long b)
+/* Compute a rounded up to next multiple of b, ie, ceil(a/b)*b */
+/* Assumes a >= 0, b > 0 */
+{
+  a += b - 1L;
+  return a - (a % b);
+}
+
+
+/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays
+ * and coefficient-block arrays.  This won't work on 80x86 because the arrays
+ * are FAR and we're assuming a small-pointer memory model.  However, some
+ * DOS compilers provide far-pointer versions of memcpy() and memset() even
+ * in the small-model libraries.  These will be used if USE_FMEM is defined.
+ * Otherwise, the routines below do it the hard way.  (The performance cost
+ * is not all that great, because these routines aren't very heavily used.)
+ */
+
+#ifndef NEED_FAR_POINTERS	/* normal case, same as regular macro */
+#define FMEMCOPY(dest,src,size)	MEMCOPY(dest,src,size)
+#else				/* 80x86 case, define if we can */
+#ifdef USE_FMEM
+#define FMEMCOPY(dest,src,size)	_fmemcpy((void FAR *)(dest), (const void FAR *)(src), (size_t)(size))
+#else
+/* This function is for use by the FMEMZERO macro defined in jpegint.h.
+ * Do not call this function directly, use the FMEMZERO macro instead.
+ */
+GLOBAL(void)
+jzero_far (void FAR * target, size_t bytestozero)
+/* Zero out a chunk of FAR memory. */
+/* This might be sample-array data, block-array data, or alloc_large data. */
+{
+  register char FAR * ptr = (char FAR *) target;
+  register size_t count;
+
+  for (count = bytestozero; count > 0; count--) {
+    *ptr++ = 0;
+  }
+}
+#endif
+#endif
+
+
+GLOBAL(void)
+jcopy_sample_rows (JSAMPARRAY input_array,
+		   JSAMPARRAY output_array,
+		   int num_rows, JDIMENSION num_cols)
+/* Copy some rows of samples from one place to another.
+ * num_rows rows are copied from *input_array++ to *output_array++;
+ * these areas may overlap for duplication.
+ * The source and destination arrays must be at least as wide as num_cols.
+ */
+{
+  register JSAMPROW inptr, outptr;
+#ifdef FMEMCOPY
+  register size_t count = (size_t) num_cols * SIZEOF(JSAMPLE);
+#else
+  register JDIMENSION count;
+#endif
+  register int row;
+
+  for (row = num_rows; row > 0; row--) {
+    inptr = *input_array++;
+    outptr = *output_array++;
+#ifdef FMEMCOPY
+    FMEMCOPY(outptr, inptr, count);
+#else
+    for (count = num_cols; count > 0; count--)
+      *outptr++ = *inptr++;	/* needn't bother with GETJSAMPLE() here */
+#endif
+  }
+}
+
+
+GLOBAL(void)
+jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row,
+		 JDIMENSION num_blocks)
+/* Copy a row of coefficient blocks from one place to another. */
+{
+#ifdef FMEMCOPY
+  FMEMCOPY(output_row, input_row, (size_t) num_blocks * (DCTSIZE2 * SIZEOF(JCOEF)));
+#else
+  register JCOEFPTR inptr, outptr;
+  register long count;
+
+  inptr = (JCOEFPTR) input_row;
+  outptr = (JCOEFPTR) output_row;
+  for (count = (long) num_blocks * DCTSIZE2; count > 0; count--) {
+    *outptr++ = *inptr++;
+  }
+#endif
+}
diff --git a/cmake/externals/libjpeg/jversion.h b/cmake/externals/libjpeg/jversion.h
new file mode 100644
index 000000000..df53ef5e5
--- /dev/null
+++ b/cmake/externals/libjpeg/jversion.h
@@ -0,0 +1,14 @@
+/*
+ * jversion.h
+ *
+ * Copyright (C) 1991-2024, Thomas G. Lane, Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains software version identification.
+ */
+
+
+#define JVERSION	"9f  14-Jan-2024"
+
+#define JCOPYRIGHT	"Copyright (C) 2024, Thomas G. Lane, Guido Vollbeding"
diff --git a/cmake/externals/libspng/CMakeLists.txt b/cmake/externals/libspng/CMakeLists.txt
new file mode 100644
index 000000000..c277888e5
--- /dev/null
+++ b/cmake/externals/libspng/CMakeLists.txt
@@ -0,0 +1,39 @@
+# ----------------------------------------------------------------------------
+#  CMake file for libspng. See root CMakeLists.txt
+#
+# ----------------------------------------------------------------------------
+
+project(SPNG)
+
+set(CURR_INCLUDE_DIR "${CMAKE_CURRENT_LIST_DIR}")
+set_property(GLOBAL PROPERTY SPNG_INCLUDE_DIR ${CURR_INCLUDE_DIR})
+
+file(GLOB_RECURSE spng_headers RELATIVE "${CMAKE_CURRENT_LIST_DIR}" "*.h")
+file(GLOB_RECURSE spng_sources RELATIVE "${CMAKE_CURRENT_LIST_DIR}" "*.c")
+
+message(STATUS "libspng will be used as PNG codec")
+
+# ----------------------------------------------------------------------------------
+#         Define the library target:
+# ----------------------------------------------------------------------------------
+
+if(MSVC)
+    add_definitions(-D_CRT_SECURE_NO_DEPRECATE)
+endif(MSVC)
+
+add_library(SPNG STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${spng_headers} ${spng_sources})
+target_link_libraries(SPNG ${ZLIB_LIBRARIES})
+
+set_target_properties(SPNG
+        PROPERTIES OUTPUT_NAME SPNG
+        DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
+        COMPILE_PDB_NAME SPNG
+        COMPILE_PDB_NAME_DEBUG "SPNG${OPENCV_DEBUG_POSTFIX}"
+        ARCHIVE_OUTPUT_DIRECTORY "${3P_LIBRARY_OUTPUT_PATH}"
+        )
+
+target_compile_definitions(SPNG PUBLIC SPNG_STATIC)
+
+if(ENABLE_SOLUTION_FOLDERS)
+    set_target_properties(SPNG PROPERTIES FOLDER "3rdparty")
+endif()
\ No newline at end of file
diff --git a/cmake/externals/libspng/LICENSE b/cmake/externals/libspng/LICENSE
new file mode 100644
index 000000000..a29d5a2fa
--- /dev/null
+++ b/cmake/externals/libspng/LICENSE
@@ -0,0 +1,25 @@
+BSD 2-Clause License
+
+Copyright (c) 2018-2023, Randy <randy408@protonmail.com>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/cmake/externals/libspng/spng.c b/cmake/externals/libspng/spng.c
new file mode 100644
index 000000000..b22b7110a
--- /dev/null
+++ b/cmake/externals/libspng/spng.c
@@ -0,0 +1,6980 @@
+/* SPDX-License-Identifier: (BSD-2-Clause AND libpng-2.0) */
+
+#define SPNG__BUILD
+
+#include "spng.h"
+
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+
+#define ZLIB_CONST
+
+#ifdef __FRAMAC__
+    #define SPNG_DISABLE_OPT
+    #include "tests/framac_stubs.h"
+#else
+    #ifdef SPNG_USE_MINIZ
+        #include <miniz.h>
+    #else
+        #include <zlib.h>
+    #endif
+#endif
+
+#ifdef SPNG_MULTITHREADING
+    #include <pthread.h>
+#endif
+
+/* Not build options, edit at your own risk! */
+#define SPNG_READ_SIZE (8192)
+#define SPNG_WRITE_SIZE SPNG_READ_SIZE
+#define SPNG_MAX_CHUNK_COUNT (1000)
+
+#define SPNG_TARGET_CLONES(x)
+
+#ifndef SPNG_DISABLE_OPT
+
+    #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
+        #define SPNG_X86
+
+        #if defined(__x86_64__) || defined(_M_X64)
+            #define SPNG_X86_64
+        #endif
+
+    #elif defined(__aarch64__) || defined(_M_ARM64) /* || defined(__ARM_NEON) */
+        #define SPNG_ARM /* NOTE: only arm64 builds are tested! */
+    #else
+        #pragma message "disabling SIMD optimizations for unknown target"
+        #define SPNG_DISABLE_OPT
+    #endif
+
+    #if defined(SPNG_X86_64) && defined(SPNG_ENABLE_TARGET_CLONES)
+        #undef SPNG_TARGET_CLONES
+        #define SPNG_TARGET_CLONES(x) __attribute__((target_clones(x)))
+    #else
+        #define SPNG_TARGET_CLONES(x)
+    #endif
+
+    #ifndef SPNG_DISABLE_OPT
+        static void defilter_sub3(size_t rowbytes, unsigned char *row);
+        static void defilter_sub4(size_t rowbytes, unsigned char *row);
+        static void defilter_avg3(size_t rowbytes, unsigned char *row, const unsigned char *prev);
+        static void defilter_avg4(size_t rowbytes, unsigned char *row, const unsigned char *prev);
+        static void defilter_paeth3(size_t rowbytes, unsigned char *row, const unsigned char *prev);
+        static void defilter_paeth4(size_t rowbytes, unsigned char *row, const unsigned char *prev);
+
+        #if defined(SPNG_ARM)
+        static uint32_t expand_palette_rgba8_neon(unsigned char *row, const unsigned char *scanline, const unsigned char *plte, uint32_t width);
+        static uint32_t expand_palette_rgb8_neon(unsigned char *row, const unsigned char *scanline, const unsigned char *plte, uint32_t width);
+        #endif
+    #endif
+#endif
+
+#if defined(_MSC_VER)
+    #pragma warning(push)
+    #pragma warning(disable: 4244)
+#endif
+
+#if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) || defined(__BIG_ENDIAN__)
+    #define SPNG_BIG_ENDIAN
+#else
+    #define SPNG_LITTLE_ENDIAN
+#endif
+
+enum spng_state
+{
+    SPNG_STATE_INVALID = 0,
+    SPNG_STATE_INIT = 1, /* No PNG buffer/stream is set */
+    SPNG_STATE_INPUT, /* Decoder input PNG was set */
+    SPNG_STATE_OUTPUT = SPNG_STATE_INPUT, /* Encoder output was set */
+    SPNG_STATE_IHDR, /* IHDR was read/written */
+    SPNG_STATE_FIRST_IDAT,  /* Encoded up to / reached first IDAT */
+    SPNG_STATE_DECODE_INIT, /* Decoder is ready for progressive reads */
+    SPNG_STATE_ENCODE_INIT = SPNG_STATE_DECODE_INIT,
+    SPNG_STATE_EOI, /* Reached the last scanline/row */
+    SPNG_STATE_LAST_IDAT, /* Reached last IDAT, set at end of decode_image() */
+    SPNG_STATE_AFTER_IDAT, /*  */
+    SPNG_STATE_IEND, /* Reached IEND */
+};
+
+enum spng__internal
+{
+    SPNG__IO_SIGNAL = 1 << 9,
+    SPNG__CTX_FLAGS_ALL = (SPNG_CTX_IGNORE_ADLER32 | SPNG_CTX_ENCODER)
+};
+
+#define SPNG_STR(x) _SPNG_STR(x)
+#define _SPNG_STR(x) #x
+
+#define SPNG_VERSION_STRING SPNG_STR(SPNG_VERSION_MAJOR) "." \
+                            SPNG_STR(SPNG_VERSION_MINOR) "." \
+                            SPNG_STR(SPNG_VERSION_PATCH)
+
+#define SPNG_GET_CHUNK_BOILERPLATE(chunk) \
+    if(ctx == NULL) return 1; \
+    int ret = read_chunks(ctx, 0); \
+    if(ret) return ret; \
+    if(!ctx->stored.chunk) return SPNG_ECHUNKAVAIL; \
+    if(chunk == NULL) return 1
+
+#define SPNG_SET_CHUNK_BOILERPLATE(chunk) \
+    if(ctx == NULL || chunk == NULL) return 1; \
+    if(ctx->data == NULL && !ctx->encode_only) return SPNG_ENOSRC; \
+    int ret = read_chunks(ctx, 0); \
+    if(ret) return ret
+
+/* Determine if the spng_option can be overriden/optimized */
+#define spng__optimize(option) (ctx->optimize_option & (1 << option))
+
+struct spng_subimage
+{
+    uint32_t width;
+    uint32_t height;
+    size_t out_width; /* byte width based on output format */
+    size_t scanline_width;
+};
+
+struct spng_text2
+{
+    int type;
+    char *keyword;
+    char *text;
+
+    size_t text_length;
+
+    uint8_t compression_flag; /* iTXt only */
+    char *language_tag; /* iTXt only */
+    char *translated_keyword; /* iTXt only */
+
+    size_t cache_usage;
+    char user_keyword_storage[80];
+};
+
+struct decode_flags
+{
+    unsigned apply_trns:  1;
+    unsigned apply_gamma: 1;
+    unsigned use_sbit:    1;
+    unsigned indexed:     1;
+    unsigned do_scaling:  1;
+    unsigned interlaced:  1;
+    unsigned same_layout: 1;
+    unsigned zerocopy:    1;
+    unsigned unpack:      1;
+};
+
+struct encode_flags
+{
+    unsigned interlace:      1;
+    unsigned same_layout:    1;
+    unsigned to_bigendian:   1;
+    unsigned progressive:    1;
+    unsigned finalize:       1;
+
+    enum spng_filter_choice filter_choice;
+};
+
+struct spng_chunk_bitfield
+{
+    unsigned ihdr: 1;
+    unsigned plte: 1;
+    unsigned chrm: 1;
+    unsigned iccp: 1;
+    unsigned gama: 1;
+    unsigned sbit: 1;
+    unsigned srgb: 1;
+    unsigned text: 1;
+    unsigned bkgd: 1;
+    unsigned hist: 1;
+    unsigned trns: 1;
+    unsigned phys: 1;
+    unsigned splt: 1;
+    unsigned time: 1;
+    unsigned offs: 1;
+    unsigned exif: 1;
+    unsigned unknown: 1;
+};
+
+/* Packed sample iterator */
+struct spng__iter
+{
+    const uint8_t mask;
+    unsigned shift_amount;
+    const unsigned initial_shift, bit_depth;
+    const unsigned char *samples;
+};
+
+union spng__decode_plte
+{
+    struct spng_plte_entry rgba[256];
+    unsigned char rgb[256 * 3];
+    unsigned char raw[256 * 4];
+    uint32_t align_this;
+};
+
+struct spng__zlib_options
+{
+    int compression_level;
+    int window_bits;
+    int mem_level;
+    int strategy;
+    int data_type;
+};
+
+typedef void spng__undo(spng_ctx *ctx);
+
+struct spng_ctx
+{
+    size_t data_size;
+    size_t bytes_read;
+    size_t stream_buf_size;
+    unsigned char *stream_buf;
+    const unsigned char *data;
+
+    /* User-defined pointers for streaming */
+    spng_read_fn *read_fn;
+    spng_write_fn *write_fn;
+    void *stream_user_ptr;
+
+    /* Used for buffer reads */
+    const unsigned char *png_base;
+    size_t bytes_left;
+    size_t last_read_size;
+
+    /* Used for encoding */
+    int user_owns_out_png;
+    unsigned char *out_png;
+    unsigned char *write_ptr;
+    size_t out_png_size;
+    size_t bytes_encoded;
+
+    /* These are updated by read/write_header()/read_chunk_bytes() */
+    struct spng_chunk current_chunk;
+    uint32_t cur_chunk_bytes_left;
+    uint32_t cur_actual_crc;
+
+    struct spng_alloc alloc;
+
+    enum spng_ctx_flags flags;
+    enum spng_format fmt;
+
+    enum spng_state state;
+
+    unsigned streaming: 1;
+    unsigned internal_buffer: 1; /* encoding to internal buffer */
+
+    unsigned inflate: 1;
+    unsigned deflate: 1;
+    unsigned encode_only: 1;
+    unsigned strict: 1;
+    unsigned discard: 1;
+    unsigned skip_crc: 1;
+    unsigned keep_unknown: 1;
+    unsigned prev_was_idat: 1;
+
+    struct spng__zlib_options image_options;
+    struct spng__zlib_options text_options;
+
+    spng__undo *undo;
+
+    /* input file contains this chunk */
+    struct spng_chunk_bitfield file;
+
+    /* chunk was stored with spng_set_*() */
+    struct spng_chunk_bitfield user;
+
+    /* chunk was stored by reading or with spng_set_*() */
+    struct spng_chunk_bitfield stored;
+
+    /* used to reset the above in case of an error */
+    struct spng_chunk_bitfield prev_stored;
+
+    struct spng_chunk first_idat, last_idat;
+
+    uint32_t max_width, max_height;
+
+    size_t max_chunk_size;
+    size_t chunk_cache_limit;
+    size_t chunk_cache_usage;
+    uint32_t chunk_count_limit;
+    uint32_t chunk_count_total;
+
+    int crc_action_critical;
+    int crc_action_ancillary;
+
+    uint32_t optimize_option;
+
+    struct spng_ihdr ihdr;
+
+    struct spng_plte plte;
+
+    struct spng_chrm_int chrm_int;
+    struct spng_iccp iccp;
+
+    uint32_t gama;
+
+    struct spng_sbit sbit;
+
+    uint8_t srgb_rendering_intent;
+
+    uint32_t n_text;
+    struct spng_text2 *text_list;
+
+    struct spng_bkgd bkgd;
+    struct spng_hist hist;
+    struct spng_trns trns;
+    struct spng_phys phys;
+
+    uint32_t n_splt;
+    struct spng_splt *splt_list;
+
+    struct spng_time time;
+    struct spng_offs offs;
+    struct spng_exif exif;
+
+    uint32_t n_chunks;
+    struct spng_unknown_chunk *chunk_list;
+
+    struct spng_subimage subimage[7];
+
+    z_stream zstream;
+    unsigned char *scanline_buf, *prev_scanline_buf, *row_buf, *filtered_scanline_buf;
+    unsigned char *scanline, *prev_scanline, *row, *filtered_scanline;
+
+    /* based on fmt */
+    size_t image_size; /* may be zero */
+    size_t image_width;
+
+    unsigned bytes_per_pixel; /* derived from ihdr */
+    unsigned pixel_size; /* derived from spng_format+ihdr */
+    int widest_pass;
+    int last_pass; /* last non-empty pass */
+
+    uint16_t *gamma_lut; /* points to either _lut8 or _lut16 */
+    uint16_t *gamma_lut16;
+    uint16_t gamma_lut8[256];
+    unsigned char trns_px[8];
+    union spng__decode_plte decode_plte;
+    struct spng_sbit decode_sb;
+    struct decode_flags decode_flags;
+    struct spng_row_info row_info;
+
+    struct encode_flags encode_flags;
+};
+
+static const uint32_t spng_u32max = INT32_MAX;
+
+static const uint32_t adam7_x_start[7] = { 0, 4, 0, 2, 0, 1, 0 };
+static const uint32_t adam7_y_start[7] = { 0, 0, 4, 0, 2, 0, 1 };
+static const uint32_t adam7_x_delta[7] = { 8, 8, 4, 4, 2, 2, 1 };
+static const uint32_t adam7_y_delta[7] = { 8, 8, 8, 4, 4, 2, 2 };
+
+static const uint8_t spng_signature[8] = { 137, 80, 78, 71, 13, 10, 26, 10 };
+
+static const uint8_t type_ihdr[4] = { 73, 72, 68, 82 };
+static const uint8_t type_plte[4] = { 80, 76, 84, 69 };
+static const uint8_t type_idat[4] = { 73, 68, 65, 84 };
+static const uint8_t type_iend[4] = { 73, 69, 78, 68 };
+
+static const uint8_t type_trns[4] = { 116, 82, 78, 83 };
+static const uint8_t type_chrm[4] = { 99,  72, 82, 77 };
+static const uint8_t type_gama[4] = { 103, 65, 77, 65 };
+static const uint8_t type_iccp[4] = { 105, 67, 67, 80 };
+static const uint8_t type_sbit[4] = { 115, 66, 73, 84 };
+static const uint8_t type_srgb[4] = { 115, 82, 71, 66 };
+static const uint8_t type_text[4] = { 116, 69, 88, 116 };
+static const uint8_t type_ztxt[4] = { 122, 84, 88, 116 };
+static const uint8_t type_itxt[4] = { 105, 84, 88, 116 };
+static const uint8_t type_bkgd[4] = { 98,  75, 71, 68 };
+static const uint8_t type_hist[4] = { 104, 73, 83, 84 };
+static const uint8_t type_phys[4] = { 112, 72, 89, 115 };
+static const uint8_t type_splt[4] = { 115, 80, 76, 84 };
+static const uint8_t type_time[4] = { 116, 73, 77, 69 };
+
+static const uint8_t type_offs[4] = { 111, 70, 70, 115 };
+static const uint8_t type_exif[4] = { 101, 88, 73, 102 };
+
+static inline void *spng__malloc(spng_ctx *ctx,  size_t size)
+{
+    return ctx->alloc.malloc_fn(size);
+}
+
+static inline void *spng__calloc(spng_ctx *ctx, size_t nmemb, size_t size)
+{
+    return ctx->alloc.calloc_fn(nmemb, size);
+}
+
+static inline void *spng__realloc(spng_ctx *ctx, void *ptr, size_t size)
+{
+    return ctx->alloc.realloc_fn(ptr, size);
+}
+
+static inline void spng__free(spng_ctx *ctx, void *ptr)
+{
+    ctx->alloc.free_fn(ptr);
+}
+
+#if defined(SPNG_USE_MINIZ)
+static void *spng__zalloc(void *opaque, size_t items, size_t size)
+#else
+static void *spng__zalloc(void *opaque, uInt items, uInt size)
+#endif
+{
+    spng_ctx *ctx = opaque;
+
+    if(size > SIZE_MAX / items) return NULL;
+
+    size_t len = (size_t)items * size;
+
+    return spng__malloc(ctx, len);
+}
+
+static void spng__zfree(void *opqaue, void *ptr)
+{
+    spng_ctx *ctx = opqaue;
+    spng__free(ctx, ptr);
+}
+
+static inline uint16_t read_u16(const void *src)
+{
+    const unsigned char *data = src;
+
+    return (data[0] & 0xFFU) << 8 | (data[1] & 0xFFU);
+}
+
+static inline uint32_t read_u32(const void *src)
+{
+    const unsigned char *data = src;
+
+    return (data[0] & 0xFFUL) << 24 | (data[1] & 0xFFUL) << 16 |
+           (data[2] & 0xFFUL) << 8  | (data[3] & 0xFFUL);
+}
+
+static inline int32_t read_s32(const void *src)
+{
+    int32_t ret = (int32_t)read_u32(src);
+
+    return ret;
+}
+
+static inline void write_u16(void *dest, uint16_t x)
+{
+    unsigned char *data = dest;
+
+    data[0] = x >> 8;
+    data[1] = x & 0xFF;
+}
+
+static inline void write_u32(void *dest, uint32_t x)
+{
+    unsigned char *data = dest;
+
+    data[0] = (x >> 24);
+    data[1] = (x >> 16) & 0xFF;
+    data[2] = (x >> 8) & 0xFF;
+    data[3] = x & 0xFF;
+}
+
+static inline void write_s32(void *dest, int32_t x)
+{
+    uint32_t n = x;
+    write_u32(dest, n);
+}
+
+/* Returns an iterator for 1,2,4,8-bit samples */
+static struct spng__iter spng__iter_init(unsigned bit_depth, const unsigned char *samples)
+{
+    struct spng__iter iter =
+    {
+        .mask = (uint32_t)(1 << bit_depth) - 1,
+        .shift_amount = 8 - bit_depth,
+        .initial_shift = 8 - bit_depth,
+        .bit_depth = bit_depth,
+        .samples = samples
+    };
+
+    return iter;
+}
+
+/* Returns the current sample unpacked, iterates to the next one */
+static inline uint8_t get_sample(struct spng__iter *iter)
+{
+    uint8_t x = (iter->samples[0] >> iter->shift_amount) & iter->mask;
+
+    iter->shift_amount -= iter->bit_depth;
+
+    if(iter->shift_amount > 7)
+    {
+        iter->shift_amount = iter->initial_shift;
+        iter->samples++;
+    }
+
+    return x;
+}
+
+static void u16_row_to_host(void *row, size_t size)
+{
+    uint16_t *px = row;
+    size_t i, n = size / 2;
+
+    for(i=0; i < n; i++)
+    {
+        px[i] = read_u16(&px[i]);
+    }
+}
+
+static void u16_row_to_bigendian(void *row, size_t size)
+{
+    uint16_t *px = (uint16_t*)row;
+    size_t i, n = size / 2;
+
+    for(i=0; i < n; i++)
+    {
+        write_u16(&px[i], px[i]);
+    }
+}
+
+static void rgb8_row_to_rgba8(const unsigned char *row, unsigned char *out, uint32_t n)
+{
+    uint32_t i;
+    for(i=0; i < n; i++)
+    {
+        memcpy(out + i * 4, row + i * 3, 3);
+        out[i*4+3] = 255;
+    }
+}
+
+static unsigned num_channels(const struct spng_ihdr *ihdr)
+{
+    switch(ihdr->color_type)
+    {
+        case SPNG_COLOR_TYPE_TRUECOLOR: return 3;
+        case SPNG_COLOR_TYPE_GRAYSCALE_ALPHA: return 2;
+        case SPNG_COLOR_TYPE_TRUECOLOR_ALPHA: return 4;
+        case SPNG_COLOR_TYPE_GRAYSCALE:
+        case SPNG_COLOR_TYPE_INDEXED:
+            return 1;
+        default: return 0;
+    }
+}
+
+/* Calculate scanline width in bits, round up to the nearest byte */
+static int calculate_scanline_width(const struct spng_ihdr *ihdr, uint32_t width, size_t *scanline_width)
+{
+    if(ihdr == NULL || !width) return SPNG_EINTERNAL;
+
+    size_t res = num_channels(ihdr) * ihdr->bit_depth;
+
+    if(res > SIZE_MAX / width) return SPNG_EOVERFLOW;
+    res = res * width;
+
+    res += 15; /* Filter byte + 7 for rounding */
+
+    if(res < 15) return SPNG_EOVERFLOW;
+
+    res /= 8;
+
+    if(res > UINT32_MAX) return SPNG_EOVERFLOW;
+
+    *scanline_width = res;
+
+    return 0;
+}
+
+static int calculate_subimages(struct spng_ctx *ctx)
+{
+    if(ctx == NULL) return SPNG_EINTERNAL;
+
+    struct spng_ihdr *ihdr = &ctx->ihdr;
+    struct spng_subimage *sub = ctx->subimage;
+
+    if(ihdr->interlace_method == 1)
+    {
+        sub[0].width = (ihdr->width + 7) >> 3;
+        sub[0].height = (ihdr->height + 7) >> 3;
+        sub[1].width = (ihdr->width + 3) >> 3;
+        sub[1].height = (ihdr->height + 7) >> 3;
+        sub[2].width = (ihdr->width + 3) >> 2;
+        sub[2].height = (ihdr->height + 3) >> 3;
+        sub[3].width = (ihdr->width + 1) >> 2;
+        sub[3].height = (ihdr->height + 3) >> 2;
+        sub[4].width = (ihdr->width + 1) >> 1;
+        sub[4].height = (ihdr->height + 1) >> 2;
+        sub[5].width = ihdr->width >> 1;
+        sub[5].height = (ihdr->height + 1) >> 1;
+        sub[6].width = ihdr->width;
+        sub[6].height = ihdr->height >> 1;
+    }
+    else
+    {
+        sub[0].width = ihdr->width;
+        sub[0].height = ihdr->height;
+    }
+
+    int i;
+    for(i=0; i < 7; i++)
+    {
+        if(sub[i].width == 0 || sub[i].height == 0) continue;
+
+        int ret = calculate_scanline_width(ihdr, sub[i].width, &sub[i].scanline_width);
+        if(ret) return ret;
+
+        if(sub[ctx->widest_pass].scanline_width < sub[i].scanline_width) ctx->widest_pass = i;
+
+        ctx->last_pass = i;
+    }
+
+    return 0;
+}
+
+static int check_decode_fmt(const struct spng_ihdr *ihdr, const int fmt)
+{
+    switch(fmt)
+    {
+        case SPNG_FMT_RGBA8:
+        case SPNG_FMT_RGBA16:
+        case SPNG_FMT_RGB8:
+        case SPNG_FMT_PNG:
+        case SPNG_FMT_RAW:
+            return 0;
+        case SPNG_FMT_G8:
+        case SPNG_FMT_GA8:
+            if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE && ihdr->bit_depth <= 8) return 0;
+            else return SPNG_EFMT;
+        case SPNG_FMT_GA16:
+            if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE && ihdr->bit_depth == 16) return 0;
+            else return SPNG_EFMT;
+        default: return SPNG_EFMT;
+    }
+}
+
+static int calculate_image_width(const struct spng_ihdr *ihdr, int fmt, size_t *len)
+{
+    if(ihdr == NULL || len == NULL) return SPNG_EINTERNAL;
+
+    size_t res = ihdr->width;
+    unsigned bytes_per_pixel;
+
+    switch(fmt)
+    {
+        case SPNG_FMT_RGBA8:
+        case SPNG_FMT_GA16:
+            bytes_per_pixel = 4;
+            break;
+        case SPNG_FMT_RGBA16:
+            bytes_per_pixel = 8;
+            break;
+        case SPNG_FMT_RGB8:
+            bytes_per_pixel = 3;
+            break;
+        case SPNG_FMT_PNG:
+        case SPNG_FMT_RAW:
+        {
+            int ret = calculate_scanline_width(ihdr, ihdr->width, &res);
+            if(ret) return ret;
+
+            res -= 1; /* exclude filter byte */
+            bytes_per_pixel = 1;
+            break;
+        }
+        case SPNG_FMT_G8:
+            bytes_per_pixel = 1;
+            break;
+        case SPNG_FMT_GA8:
+            bytes_per_pixel = 2;
+            break;
+        default: return SPNG_EINTERNAL;
+    }
+
+    if(res > SIZE_MAX / bytes_per_pixel) return SPNG_EOVERFLOW;
+    res = res * bytes_per_pixel;
+
+    *len = res;
+
+    return 0;
+}
+
+static int calculate_image_size(const struct spng_ihdr *ihdr, int fmt, size_t *len)
+{
+    if(ihdr == NULL || len == NULL) return SPNG_EINTERNAL;
+
+    size_t res = 0;
+
+    int ret = calculate_image_width(ihdr, fmt, &res);
+    if(ret) return ret;
+
+    if(res > SIZE_MAX / ihdr->height) return SPNG_EOVERFLOW;
+    res = res * ihdr->height;
+
+    *len = res;
+
+    return 0;
+}
+
+static int increase_cache_usage(spng_ctx *ctx, size_t bytes, int new_chunk)
+{
+    if(ctx == NULL || !bytes) return SPNG_EINTERNAL;
+
+    if(new_chunk)
+    {
+        ctx->chunk_count_total++;
+        if(ctx->chunk_count_total < 1) return SPNG_EOVERFLOW;
+
+        if(ctx->chunk_count_total > ctx->chunk_count_limit) return SPNG_ECHUNK_LIMITS;
+    }
+
+    size_t new_usage = ctx->chunk_cache_usage + bytes;
+
+    if(new_usage < ctx->chunk_cache_usage) return SPNG_EOVERFLOW;
+
+    if(new_usage > ctx->chunk_cache_limit) return SPNG_ECHUNK_LIMITS;
+
+    ctx->chunk_cache_usage = new_usage;
+
+    return 0;
+}
+
+static int decrease_cache_usage(spng_ctx *ctx, size_t usage)
+{
+    if(ctx == NULL || !usage) return SPNG_EINTERNAL;
+    if(usage > ctx->chunk_cache_usage) return SPNG_EINTERNAL;
+
+    ctx->chunk_cache_usage -= usage;
+
+    return 0;
+}
+
+static int is_critical_chunk(struct spng_chunk *chunk)
+{
+    if(chunk == NULL) return 0;
+    if((chunk->type[0] & (1 << 5)) == 0) return 1;
+
+    return 0;
+}
+
+static int decode_err(spng_ctx *ctx, int err)
+{
+    ctx->state = SPNG_STATE_INVALID;
+
+    return err;
+}
+
+static int encode_err(spng_ctx *ctx, int err)
+{
+    ctx->state = SPNG_STATE_INVALID;
+
+    return err;
+}
+
+static inline int read_data(spng_ctx *ctx, size_t bytes)
+{
+    if(ctx == NULL) return SPNG_EINTERNAL;
+    if(!bytes) return 0;
+
+    if(ctx->streaming && (bytes > SPNG_READ_SIZE)) return SPNG_EINTERNAL;
+
+    int ret = ctx->read_fn(ctx, ctx->stream_user_ptr, ctx->stream_buf, bytes);
+
+    if(ret)
+    {
+        if(ret > 0 || ret < SPNG_IO_ERROR) ret = SPNG_IO_ERROR;
+
+        return ret;
+    }
+
+    ctx->bytes_read += bytes;
+    if(ctx->bytes_read < bytes) return SPNG_EOVERFLOW;
+
+    return 0;
+}
+
+/* Ensure there is enough space for encoding starting at ctx->write_ptr  */
+static int require_bytes(spng_ctx *ctx, size_t bytes)
+{
+    if(ctx == NULL) return SPNG_EINTERNAL;
+
+    if(ctx->streaming)
+    {
+        if(bytes > ctx->stream_buf_size)
+        {
+            size_t new_size = ctx->stream_buf_size;
+
+            /* Start at default IDAT size + header + crc */
+            if(new_size < (SPNG_WRITE_SIZE + 12)) new_size = SPNG_WRITE_SIZE + 12;
+
+            if(new_size < bytes) new_size = bytes;
+
+            void *temp = spng__realloc(ctx, ctx->stream_buf, new_size);
+
+            if(temp == NULL) return encode_err(ctx, SPNG_EMEM);
+
+            ctx->stream_buf = temp;
+            ctx->stream_buf_size = bytes;
+            ctx->write_ptr = ctx->stream_buf;
+        }
+
+        return 0;
+    }
+
+    if(!ctx->internal_buffer) return SPNG_ENODST;
+
+    size_t required = ctx->bytes_encoded + bytes;
+    if(required < bytes) return SPNG_EOVERFLOW;
+
+    if(required > ctx->out_png_size)
+    {
+        size_t new_size = ctx->out_png_size;
+
+        /* Start with a size that doesn't require a realloc() 100% of the time */
+        if(new_size < (SPNG_WRITE_SIZE * 2)) new_size = SPNG_WRITE_SIZE * 2;
+
+        /* Prefer the next power of two over the requested size */
+        while(new_size < required)
+        {
+            if(new_size / SIZE_MAX > 2) return encode_err(ctx, SPNG_EOVERFLOW);
+
+            new_size *= 2;
+        }
+
+        void *temp = spng__realloc(ctx, ctx->out_png, new_size);
+
+        if(temp == NULL) return encode_err(ctx, SPNG_EMEM);
+
+        ctx->out_png = temp;
+        ctx->out_png_size = new_size;
+        ctx->write_ptr = ctx->out_png + ctx->bytes_encoded;
+    }
+
+    return 0;
+}
+
+static int write_data(spng_ctx *ctx, const void *data, size_t bytes)
+{
+    if(ctx == NULL) return SPNG_EINTERNAL;
+    if(!bytes) return 0;
+
+    if(ctx->streaming)
+    {
+        if(bytes > SPNG_WRITE_SIZE) return SPNG_EINTERNAL;
+
+        int ret = ctx->write_fn(ctx, ctx->stream_user_ptr, (void*)data, bytes);
+
+        if(ret)
+        {
+            if(ret > 0 || ret < SPNG_IO_ERROR) ret = SPNG_IO_ERROR;
+
+            return encode_err(ctx, ret);
+        }
+    }
+    else
+    {
+        int ret = require_bytes(ctx, bytes);
+        if(ret) return encode_err(ctx, ret);
+
+        memcpy(ctx->write_ptr, data, bytes);
+
+        ctx->write_ptr += bytes;
+    }
+
+    ctx->bytes_encoded += bytes;
+    if(ctx->bytes_encoded < bytes) return SPNG_EOVERFLOW;
+
+    return 0;
+}
+
+static int write_header(spng_ctx *ctx, const uint8_t chunk_type[4], size_t chunk_length, unsigned char **data)
+{
+    if(ctx == NULL || chunk_type == NULL) return SPNG_EINTERNAL;
+    if(chunk_length > spng_u32max) return SPNG_EINTERNAL;
+
+    size_t total = chunk_length + 12;
+
+    int ret = require_bytes(ctx, total);
+    if(ret) return ret;
+
+    uint32_t crc = crc32(0, NULL, 0);
+    ctx->current_chunk.crc = crc32(crc, chunk_type, 4);
+
+    memcpy(&ctx->current_chunk.type, chunk_type, 4);
+    ctx->current_chunk.length = (uint32_t)chunk_length;
+
+    if(!data) return SPNG_EINTERNAL;
+
+    if(ctx->streaming) *data = ctx->stream_buf + 8;
+    else *data = ctx->write_ptr + 8;
+
+    return 0;
+}
+
+static int trim_chunk(spng_ctx *ctx, uint32_t length)
+{
+    if(length > spng_u32max) return SPNG_EINTERNAL;
+    if(length > ctx->current_chunk.length) return SPNG_EINTERNAL;
+
+    ctx->current_chunk.length = length;
+
+    return 0;
+}
+
+static int finish_chunk(spng_ctx *ctx)
+{
+    if(ctx == NULL) return SPNG_EINTERNAL;
+
+    struct spng_chunk *chunk = &ctx->current_chunk;
+
+    unsigned char *header;
+    unsigned char *chunk_data;
+
+    if(ctx->streaming)
+    {
+        chunk_data = ctx->stream_buf + 8;
+        header = ctx->stream_buf;
+    }
+    else
+    {
+        chunk_data = ctx->write_ptr + 8;
+        header = ctx->write_ptr;
+    }
+
+    write_u32(header, chunk->length);
+    memcpy(header + 4, chunk->type, 4);
+
+    chunk->crc = crc32(chunk->crc, chunk_data, chunk->length);
+
+    write_u32(chunk_data + chunk->length, chunk->crc);
+
+    if(ctx->streaming)
+    {
+        const unsigned char *ptr = ctx->stream_buf;
+        uint32_t bytes_left = chunk->length + 12;
+        uint32_t len = 0;
+
+        while(bytes_left)
+        {
+            ptr += len;
+            len = SPNG_WRITE_SIZE;
+
+            if(len > bytes_left) len = bytes_left;
+
+            int ret = write_data(ctx, ptr, len);
+            if(ret) return ret;
+
+            bytes_left -= len;
+        }
+    }
+    else
+    {
+        ctx->bytes_encoded += chunk->length;
+        if(ctx->bytes_encoded < chunk->length) return SPNG_EOVERFLOW;
+
+        ctx->bytes_encoded += 12;
+        if(ctx->bytes_encoded < 12) return SPNG_EOVERFLOW;
+
+        ctx->write_ptr += chunk->length + 12;
+    }
+
+    return 0;
+}
+
+static int write_chunk(spng_ctx *ctx, const uint8_t type[4], const void *data, size_t length)
+{
+    if(ctx == NULL || type == NULL) return SPNG_EINTERNAL;
+    if(length && data == NULL) return SPNG_EINTERNAL;
+
+    unsigned char *write_ptr;
+
+    int ret = write_header(ctx, type, length, &write_ptr);
+    if(ret) return ret;
+
+    if(length) memcpy(write_ptr, data, length);
+
+    return finish_chunk(ctx);
+}
+
+static int write_iend(spng_ctx *ctx)
+{
+    unsigned char iend_chunk[12] = { 0, 0, 0, 0, 73, 69, 78, 68, 174, 66, 96, 130 };
+    return write_data(ctx, iend_chunk, 12);
+}
+
+static int write_unknown_chunks(spng_ctx *ctx, enum spng_location location)
+{
+    if(!ctx->stored.unknown) return 0;
+
+    const struct spng_unknown_chunk *chunk = ctx->chunk_list;
+
+    uint32_t i;
+    for(i=0; i < ctx->n_chunks; i++, chunk++)
+    {
+        if(chunk->location != location) continue;
+
+        int ret = write_chunk(ctx, chunk->type, chunk->data, chunk->length);
+        if(ret) return ret;
+    }
+
+    return 0;
+}
+
+/* Read and check the current chunk's crc,
+   returns -SPNG_CRC_DISCARD if the chunk should be discarded */
+static inline int read_and_check_crc(spng_ctx *ctx)
+{
+    if(ctx == NULL) return SPNG_EINTERNAL;
+
+    int ret;
+    ret = read_data(ctx, 4);
+    if(ret) return ret;
+
+    ctx->current_chunk.crc = read_u32(ctx->data);
+
+    if(ctx->skip_crc) return 0;
+
+    if(ctx->cur_actual_crc != ctx->current_chunk.crc)
+    {
+        if(is_critical_chunk(&ctx->current_chunk))
+        {
+            if(ctx->crc_action_critical == SPNG_CRC_USE) return 0;
+        }
+        else
+        {
+            if(ctx->crc_action_ancillary == SPNG_CRC_USE) return 0;
+            if(ctx->crc_action_ancillary == SPNG_CRC_DISCARD) return -SPNG_CRC_DISCARD;
+        }
+
+        return SPNG_ECHUNK_CRC;
+    }
+
+    return 0;
+}
+
+/* Read and validate the current chunk's crc and the next chunk header */
+static inline int read_header(spng_ctx *ctx)
+{
+    if(ctx == NULL) return SPNG_EINTERNAL;
+
+    int ret;
+    struct spng_chunk chunk = { 0 };
+
+    ret = read_and_check_crc(ctx);
+    if(ret)
+    {
+        if(ret == -SPNG_CRC_DISCARD)
+        {
+            ctx->discard = 1;
+        }
+        else return ret;
+    }
+
+    ret = read_data(ctx, 8);
+    if(ret) return ret;
+
+    chunk.offset = ctx->bytes_read - 8;
+
+    chunk.length = read_u32(ctx->data);
+
+    memcpy(&chunk.type, ctx->data + 4, 4);
+
+    if(chunk.length > spng_u32max) return SPNG_ECHUNK_STDLEN;
+
+    ctx->cur_chunk_bytes_left = chunk.length;
+
+    if(is_critical_chunk(&chunk) && ctx->crc_action_critical == SPNG_CRC_USE) ctx->skip_crc = 1;
+    else if(ctx->crc_action_ancillary == SPNG_CRC_USE) ctx->skip_crc = 1;
+    else ctx->skip_crc = 0;
+
+    if(!ctx->skip_crc)
+    {
+        ctx->cur_actual_crc = crc32(0, NULL, 0);
+        ctx->cur_actual_crc = crc32(ctx->cur_actual_crc, chunk.type, 4);
+    }
+
+    ctx->current_chunk = chunk;
+
+    return 0;
+}
+
+/* Read chunk bytes and update crc */
+static int read_chunk_bytes(spng_ctx *ctx, uint32_t bytes)
+{
+    if(ctx == NULL) return SPNG_EINTERNAL;
+    if(!ctx->cur_chunk_bytes_left || !bytes) return SPNG_EINTERNAL;
+    if(bytes > ctx->cur_chunk_bytes_left) return SPNG_EINTERNAL; /* XXX: more specific error? */
+
+    int ret;
+
+    ret = read_data(ctx, bytes);
+    if(ret) return ret;
+
+    if(!ctx->skip_crc) ctx->cur_actual_crc = crc32(ctx->cur_actual_crc, ctx->data, bytes);
+
+    ctx->cur_chunk_bytes_left -= bytes;
+
+    return ret;
+}
+
+/* read_chunk_bytes() + read_data() with custom output buffer */
+static int read_chunk_bytes2(spng_ctx *ctx, void *out, uint32_t bytes)
+{
+    if(ctx == NULL) return SPNG_EINTERNAL;
+    if(!ctx->cur_chunk_bytes_left || !bytes) return SPNG_EINTERNAL;
+    if(bytes > ctx->cur_chunk_bytes_left) return SPNG_EINTERNAL; /* XXX: more specific error? */
+
+    int ret;
+    uint32_t len = bytes;
+
+    if(ctx->streaming && len > SPNG_READ_SIZE) len = SPNG_READ_SIZE;
+
+    while(bytes)
+    {
+        if(len > bytes) len = bytes;
+
+        ret = ctx->read_fn(ctx, ctx->stream_user_ptr, out, len);
+        if(ret) return ret;
+
+        if(!ctx->streaming) memcpy(out, ctx->data, len);
+
+        ctx->bytes_read += len;
+        if(ctx->bytes_read < len) return SPNG_EOVERFLOW;
+
+        if(!ctx->skip_crc) ctx->cur_actual_crc = crc32(ctx->cur_actual_crc, out, len);
+
+        ctx->cur_chunk_bytes_left -= len;
+
+        out = (char*)out + len;
+        bytes -= len;
+        len = SPNG_READ_SIZE;
+    }
+
+    return 0;
+}
+
+static int discard_chunk_bytes(spng_ctx *ctx, uint32_t bytes)
+{
+    if(ctx == NULL) return SPNG_EINTERNAL;
+    if(!bytes) return 0;
+
+    int ret;
+
+    if(ctx->streaming) /* Do small, consecutive reads */
+    {
+        while(bytes)
+        {
+            uint32_t len = SPNG_READ_SIZE;
+
+            if(len > bytes) len = bytes;
+
+            ret = read_chunk_bytes(ctx, len);
+            if(ret) return ret;
+
+            bytes -= len;
+        }
+    }
+    else
+    {
+        ret = read_chunk_bytes(ctx, bytes);
+        if(ret) return ret;
+    }
+
+    return 0;
+}
+
+static int spng__inflate_init(spng_ctx *ctx, int window_bits)
+{
+    if(ctx->zstream.state) inflateEnd(&ctx->zstream);
+
+    ctx->inflate = 1;
+
+    ctx->zstream.zalloc = spng__zalloc;
+    ctx->zstream.zfree = spng__zfree;
+    ctx->zstream.opaque = ctx;
+
+    if(inflateInit2(&ctx->zstream, window_bits) != Z_OK) return SPNG_EZLIB_INIT;
+
+#if ZLIB_VERNUM >= 0x1290 && !defined(SPNG_USE_MINIZ)
+
+    int validate = 1;
+
+    if(ctx->flags & SPNG_CTX_IGNORE_ADLER32) validate = 0;
+
+    if(is_critical_chunk(&ctx->current_chunk))
+    {
+        if(ctx->crc_action_critical == SPNG_CRC_USE) validate = 0;
+    }
+    else /* ancillary */
+    {
+        if(ctx->crc_action_ancillary == SPNG_CRC_USE) validate = 0;
+    }
+
+    if(inflateValidate(&ctx->zstream, validate)) return SPNG_EZLIB_INIT;
+
+#else /* This requires zlib >= 1.2.11 */
+    #pragma message ("inflateValidate() not available, SPNG_CTX_IGNORE_ADLER32 will be ignored")
+#endif
+
+    return 0;
+}
+
+static int spng__deflate_init(spng_ctx *ctx, struct spng__zlib_options *options)
+{
+    if(ctx->zstream.state) deflateEnd(&ctx->zstream);
+
+    ctx->deflate = 1;
+
+    z_stream *zstream = &ctx->zstream;
+    zstream->zalloc = spng__zalloc;
+    zstream->zfree = spng__zfree;
+    zstream->opaque = ctx;
+    zstream->data_type = options->data_type;
+
+    int ret = deflateInit2(zstream, options->compression_level, Z_DEFLATED, options->window_bits, options->mem_level, options->strategy);
+
+    if(ret != Z_OK) return SPNG_EZLIB_INIT;
+
+    return 0;
+}
+
+/* Inflate a zlib stream starting with start_buf if non-NULL,
+   continuing from the datastream till an end marker,
+   allocating and writing the inflated stream to *out,
+   leaving "extra" bytes at the end, final buffer length is *len.
+
+   Takes into account the chunk size and cache limits.
+*/
+static int spng__inflate_stream(spng_ctx *ctx, char **out, size_t *len, size_t extra, const void *start_buf, size_t start_len)
+{
+    int ret = spng__inflate_init(ctx, 15);
+    if(ret) return ret;
+
+    size_t max = ctx->chunk_cache_limit - ctx->chunk_cache_usage;
+
+    if(ctx->max_chunk_size < max) max = ctx->max_chunk_size;
+
+    if(extra > max) return SPNG_ECHUNK_LIMITS;
+    max -= extra;
+
+    uint32_t read_size;
+    size_t size = 8 * 1024;
+    void *t, *buf = spng__malloc(ctx, size);
+
+    if(buf == NULL) return SPNG_EMEM;
+
+    z_stream *stream = &ctx->zstream;
+
+    if(start_buf != NULL && start_len)
+    {
+        stream->avail_in = (uInt)start_len;
+        stream->next_in = start_buf;
+    }
+    else
+    {
+        stream->avail_in = 0;
+        stream->next_in = NULL;
+    }
+
+    stream->avail_out = (uInt)size;
+    stream->next_out = buf;
+
+    while(ret != Z_STREAM_END)
+    {
+        ret = inflate(stream, Z_NO_FLUSH);
+
+        if(ret == Z_STREAM_END) break;
+
+        if(ret != Z_OK && ret != Z_BUF_ERROR)
+        {
+            ret = SPNG_EZLIB;
+            goto err;
+        }
+
+        if(!stream->avail_out) /* Resize buffer */
+        {
+            /* overflow or reached chunk/cache limit */
+            if( (2 > SIZE_MAX / size) || (size > max / 2) )
+            {
+                ret = SPNG_ECHUNK_LIMITS;
+                goto err;
+            }
+
+            size *= 2;
+
+            t = spng__realloc(ctx, buf, size);
+            if(t == NULL) goto mem;
+
+            buf = t;
+
+            stream->avail_out = (uInt)size / 2;
+            stream->next_out = (unsigned char*)buf + size / 2;
+        }
+        else if(!stream->avail_in) /* Read more chunk bytes */
+        {
+            read_size = ctx->cur_chunk_bytes_left;
+            if(ctx->streaming && read_size > SPNG_READ_SIZE) read_size = SPNG_READ_SIZE;
+
+            ret = read_chunk_bytes(ctx, read_size);
+
+            if(ret)
+            {
+                if(!read_size) ret = SPNG_EZLIB;
+
+                goto err;
+            }
+
+            stream->avail_in = read_size;
+            stream->next_in = ctx->data;
+        }
+    }
+
+    size = stream->total_out;
+
+    if(!size)
+    {
+        ret = SPNG_EZLIB;
+        goto err;
+    }
+
+    size += extra;
+    if(size < extra) goto mem;
+
+    t = spng__realloc(ctx, buf, size);
+    if(t == NULL) goto mem;
+
+    buf = t;
+
+    (void)increase_cache_usage(ctx, size, 0);
+
+    *out = buf;
+    *len = size;
+
+    return 0;
+
+mem:
+    ret = SPNG_EMEM;
+err:
+    spng__free(ctx, buf);
+    return ret;
+}
+
+/* Read at least one byte from the IDAT stream */
+static int read_idat_bytes(spng_ctx *ctx, uint32_t *bytes_read)
+{
+    if(ctx == NULL || bytes_read == NULL) return SPNG_EINTERNAL;
+    if(memcmp(ctx->current_chunk.type, type_idat, 4)) return SPNG_EIDAT_TOO_SHORT;
+
+    int ret;
+    uint32_t len;
+
+    while(!ctx->cur_chunk_bytes_left)
+    {
+        ret = read_header(ctx);
+        if(ret) return ret;
+
+        if(memcmp(ctx->current_chunk.type, type_idat, 4)) return SPNG_EIDAT_TOO_SHORT;
+    }
+
+    if(ctx->streaming)
+    {/* TODO: estimate bytes to read for progressive reads */
+        len = SPNG_READ_SIZE;
+        if(len > ctx->cur_chunk_bytes_left) len = ctx->cur_chunk_bytes_left;
+    }
+    else len = ctx->current_chunk.length;
+
+    ret = read_chunk_bytes(ctx, len);
+
+    *bytes_read = len;
+
+    return ret;
+}
+
+static int read_scanline_bytes(spng_ctx *ctx, unsigned char *dest, size_t len)
+{
+    if(ctx == NULL || dest == NULL) return SPNG_EINTERNAL;
+
+    int ret = Z_OK;
+    uint32_t bytes_read;
+
+    z_stream *zstream = &ctx->zstream;
+
+    zstream->avail_out = (uInt)len;
+    zstream->next_out = dest;
+
+    while(zstream->avail_out != 0)
+    {
+        ret = inflate(zstream, Z_NO_FLUSH);
+
+        if(ret == Z_OK) continue;
+
+        if(ret == Z_STREAM_END) /* Reached an end-marker */
+        {
+            if(zstream->avail_out != 0) return SPNG_EIDAT_TOO_SHORT;
+        }
+        else if(ret == Z_BUF_ERROR) /* Read more IDAT bytes */
+        {
+            ret = read_idat_bytes(ctx, &bytes_read);
+            if(ret) return ret;
+
+            zstream->avail_in = bytes_read;
+            zstream->next_in = ctx->data;
+        }
+        else return SPNG_EIDAT_STREAM;
+    }
+
+    return 0;
+}
+
+static uint8_t paeth(uint8_t a, uint8_t b, uint8_t c)
+{
+    int16_t p = a + b - c;
+    int16_t pa = abs(p - a);
+    int16_t pb = abs(p - b);
+    int16_t pc = abs(p - c);
+
+    if(pa <= pb && pa <= pc) return a;
+    else if(pb <= pc) return b;
+
+    return c;
+}
+
+SPNG_TARGET_CLONES("default,avx2")
+static void defilter_up(size_t bytes, unsigned char *row, const unsigned char *prev)
+{
+    size_t i;
+    for(i=0; i < bytes; i++)
+    {
+        row[i] += prev[i];
+    }
+}
+
+/* Defilter *scanline in-place.
+   *prev_scanline and *scanline should point to the first pixel,
+   scanline_width is the width of the scanline including the filter byte.
+*/
+static int defilter_scanline(const unsigned char *prev_scanline, unsigned char *scanline,
+                             size_t scanline_width, unsigned bytes_per_pixel, unsigned filter)
+{
+    if(prev_scanline == NULL || scanline == NULL || !scanline_width) return SPNG_EINTERNAL;
+
+    size_t i;
+    scanline_width--;
+
+    if(filter == 0) return 0;
+
+#ifndef SPNG_DISABLE_OPT
+    if(filter == SPNG_FILTER_UP) goto no_opt;
+
+    if(bytes_per_pixel == 4)
+    {
+        if(filter == SPNG_FILTER_SUB)
+            defilter_sub4(scanline_width, scanline);
+        else if(filter == SPNG_FILTER_AVERAGE)
+            defilter_avg4(scanline_width, scanline, prev_scanline);
+        else if(filter == SPNG_FILTER_PAETH)
+            defilter_paeth4(scanline_width, scanline, prev_scanline);
+        else return SPNG_EFILTER;
+
+        return 0;
+    }
+    else if(bytes_per_pixel == 3)
+    {
+        if(filter == SPNG_FILTER_SUB)
+            defilter_sub3(scanline_width, scanline);
+        else if(filter == SPNG_FILTER_AVERAGE)
+            defilter_avg3(scanline_width, scanline, prev_scanline);
+        else if(filter == SPNG_FILTER_PAETH)
+            defilter_paeth3(scanline_width, scanline, prev_scanline);
+        else return SPNG_EFILTER;
+
+        return 0;
+    }
+no_opt:
+#endif
+
+    if(filter == SPNG_FILTER_UP)
+    {
+        defilter_up(scanline_width, scanline, prev_scanline);
+        return 0;
+    }
+
+    for(i=0; i < scanline_width; i++)
+    {
+        uint8_t x, a, b, c;
+
+        if(i >= bytes_per_pixel)
+        {
+            a = scanline[i - bytes_per_pixel];
+            b = prev_scanline[i];
+            c = prev_scanline[i - bytes_per_pixel];
+        }
+        else /* First pixel in row */
+        {
+            a = 0;
+            b = prev_scanline[i];
+            c = 0;
+        }
+
+        x = scanline[i];
+
+        switch(filter)
+        {
+            case SPNG_FILTER_SUB:
+            {
+                x = x + a;
+                break;
+            }
+            case SPNG_FILTER_AVERAGE:
+            {
+                uint16_t avg = (a + b) / 2;
+                x = x + avg;
+                break;
+            }
+            case SPNG_FILTER_PAETH:
+            {
+                x = x + paeth(a,b,c);
+                break;
+            }
+        }
+
+        scanline[i] = x;
+    }
+
+    return 0;
+}
+
+static int filter_scanline(unsigned char *filtered, const unsigned char *prev_scanline, const unsigned char *scanline,
+                           size_t scanline_width, unsigned bytes_per_pixel, const unsigned filter)
+{
+    if(prev_scanline == NULL || scanline == NULL || scanline_width <= 1) return SPNG_EINTERNAL;
+
+    if(filter > 4) return SPNG_EFILTER;
+    if(filter == 0) return 0;
+
+    scanline_width--;
+
+    uint32_t i;
+    for(i=0; i < scanline_width; i++)
+    {
+        uint8_t x, a, b, c;
+
+        if(i >= bytes_per_pixel)
+        {
+            a = scanline[i - bytes_per_pixel];
+            b = prev_scanline[i];
+            c = prev_scanline[i - bytes_per_pixel];
+        }
+        else /* first pixel in row */
+        {
+            a = 0;
+            b = prev_scanline[i];
+            c = 0;
+        }
+
+        x = scanline[i];
+
+        switch(filter)
+        {
+            case SPNG_FILTER_SUB:
+            {
+                x = x - a;
+                break;
+            }
+            case SPNG_FILTER_UP:
+            {
+                x = x - b;
+                break;
+            }
+            case SPNG_FILTER_AVERAGE:
+            {
+                uint16_t avg = (a + b) / 2;
+                x = x - avg;
+                break;
+            }
+            case SPNG_FILTER_PAETH:
+            {
+                x = x - paeth(a,b,c);
+                break;
+            }
+        }
+
+        filtered[i] = x;
+    }
+
+    return 0;
+}
+
+static int32_t filter_sum(const unsigned char *prev_scanline, const unsigned char *scanline,
+                          size_t size, unsigned bytes_per_pixel, const unsigned filter)
+{
+    /* prevent potential over/underflow, bails out at a width of ~8M pixels for RGBA8 */
+    if(size > (INT32_MAX / 128)) return INT32_MAX;
+
+    uint32_t i;
+    int32_t sum = 0;
+    uint8_t x, a, b, c;
+
+    for(i=0; i < size; i++)
+    {
+        if(i >= bytes_per_pixel)
+        {
+            a = scanline[i - bytes_per_pixel];
+            b = prev_scanline[i];
+            c = prev_scanline[i - bytes_per_pixel];
+        }
+        else /* first pixel in row */
+        {
+            a = 0;
+            b = prev_scanline[i];
+            c = 0;
+        }
+
+        x = scanline[i];
+
+        switch(filter)
+        {
+            case SPNG_FILTER_NONE:
+            {
+                break;
+            }
+            case SPNG_FILTER_SUB:
+            {
+                x = x - a;
+                break;
+            }
+            case SPNG_FILTER_UP:
+            {
+                x = x - b;
+                break;
+            }
+            case SPNG_FILTER_AVERAGE:
+            {
+                uint16_t avg = (a + b) / 2;
+                x = x - avg;
+                break;
+            }
+            case SPNG_FILTER_PAETH:
+            {
+                x = x - paeth(a,b,c);
+                break;
+            }
+        }
+
+        sum += 128 - abs((int)x - 128);
+    }
+
+    return sum;
+}
+
+static unsigned get_best_filter(const unsigned char *prev_scanline, const unsigned char *scanline,
+                                size_t scanline_width, unsigned bytes_per_pixel, const int choices)
+{
+    if(!choices) return SPNG_FILTER_NONE;
+
+    scanline_width--;
+
+    int i;
+    unsigned int best_filter = 0;
+    enum spng_filter_choice flag;
+    int32_t sum, best_score = INT32_MAX;
+    int32_t filter_scores[5] = { INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX };
+
+    if( !(choices & (choices - 1)) )
+    {/* only one choice/bit is set */
+        for(i=0; i < 5; i++)
+        {
+            if(choices == 1 << (i + 3)) return i;
+        }
+    }
+
+    for(i=0; i < 5; i++)
+    {
+        flag = 1 << (i + 3);
+
+        if(choices & flag) sum = filter_sum(prev_scanline, scanline, scanline_width, bytes_per_pixel, i);
+        else continue;
+
+        filter_scores[i] = abs(sum);
+
+        if(filter_scores[i] < best_score)
+        {
+            best_score = filter_scores[i];
+            best_filter = i;
+        }
+    }
+
+    return best_filter;
+}
+
+/* Scale "sbits" significant bits in "sample" from "bit_depth" to "target"
+
+   "bit_depth" must be a valid PNG depth
+   "sbits" must be less than or equal to "bit_depth"
+   "target" must be between 1 and 16
+*/
+static uint16_t sample_to_target(uint16_t sample, unsigned bit_depth, unsigned sbits, unsigned target)
+{
+    if(bit_depth == sbits)
+    {
+        if(target == sbits) return sample; /* No scaling */
+    }/* bit_depth > sbits */
+    else sample = sample >> (bit_depth - sbits); /* Shift significant bits to bottom */
+
+    /* Downscale */
+    if(target < sbits) return sample >> (sbits - target);
+
+    /* Upscale using left bit replication */
+    int8_t shift_amount = target - sbits;
+    uint16_t sample_bits = sample;
+    sample = 0;
+
+    while(shift_amount >= 0)
+    {
+        sample = sample | (sample_bits << shift_amount);
+        shift_amount -= sbits;
+    }
+
+    int8_t partial = shift_amount + (int8_t)sbits;
+
+    if(partial != 0) sample = sample | (sample_bits >> abs(shift_amount));
+
+    return sample;
+}
+
+static inline void gamma_correct_row(unsigned char *row, uint32_t pixels, int fmt, const uint16_t *gamma_lut)
+{
+    uint32_t i;
+
+    if(fmt == SPNG_FMT_RGBA8)
+    {
+        unsigned char *px;
+        for(i=0; i < pixels; i++)
+        {
+            px = row + i * 4;
+
+            px[0] = gamma_lut[px[0]];
+            px[1] = gamma_lut[px[1]];
+            px[2] = gamma_lut[px[2]];
+        }
+    }
+    else if(fmt == SPNG_FMT_RGBA16)
+    {
+        for(i=0; i < pixels; i++)
+        {
+            uint16_t px[4];
+            memcpy(px, row + i * 8, 8);
+
+            px[0] = gamma_lut[px[0]];
+            px[1] = gamma_lut[px[1]];
+            px[2] = gamma_lut[px[2]];
+
+            memcpy(row + i * 8, px, 8);
+        }
+    }
+    else if(fmt == SPNG_FMT_RGB8)
+    {
+        unsigned char *px;
+        for(i=0; i < pixels; i++)
+        {
+            px = row + i * 3;
+
+            px[0] = gamma_lut[px[0]];
+            px[1] = gamma_lut[px[1]];
+            px[2] = gamma_lut[px[2]];
+        }
+    }
+}
+
+/* Apply transparency to output row */
+static inline void trns_row(unsigned char *row,
+                            const unsigned char *scanline,
+                            const unsigned char *trns,
+                            unsigned scanline_stride,
+                            struct spng_ihdr *ihdr,
+                            uint32_t pixels,
+                            int fmt)
+{
+    uint32_t i;
+    unsigned row_stride;
+    unsigned depth = ihdr->bit_depth;
+
+    if(fmt == SPNG_FMT_RGBA8)
+    {
+        if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE) return; /* already applied in the decoding loop */
+
+        row_stride = 4;
+        for(i=0; i < pixels; i++, scanline+=scanline_stride, row+=row_stride)
+        {
+            if(!memcmp(scanline, trns, scanline_stride)) row[3] = 0;
+        }
+    }
+    else if(fmt == SPNG_FMT_RGBA16)
+    {
+        if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE) return; /* already applied in the decoding loop */
+
+        row_stride = 8;
+        for(i=0; i < pixels; i++, scanline+=scanline_stride, row+=row_stride)
+        {
+            if(!memcmp(scanline, trns, scanline_stride)) memset(row + 6, 0, 2);
+        }
+    }
+    else if(fmt == SPNG_FMT_GA8)
+    {
+        row_stride = 2;
+
+        if(depth == 16)
+        {
+            for(i=0; i < pixels; i++, scanline+=scanline_stride, row+=row_stride)
+            {
+                if(!memcmp(scanline, trns, scanline_stride)) memset(row + 1, 0, 1);
+            }
+        }
+        else /* depth <= 8 */
+        {
+            struct spng__iter iter = spng__iter_init(depth, scanline);
+
+            for(i=0; i < pixels; i++, row+=row_stride)
+            {
+                if(trns[0] == get_sample(&iter)) row[1] = 0;
+            }
+        }
+    }
+    else if(fmt == SPNG_FMT_GA16)
+    {
+        row_stride = 4;
+
+        if(depth == 16)
+        {
+            for(i=0; i< pixels; i++, scanline+=scanline_stride, row+=row_stride)
+            {
+                if(!memcmp(scanline, trns, 2)) memset(row + 2, 0, 2);
+            }
+        }
+        else
+        {
+            struct spng__iter iter = spng__iter_init(depth, scanline);
+
+            for(i=0; i< pixels; i++, row+=row_stride)
+            {
+                if(trns[0] == get_sample(&iter)) memset(row + 2, 0, 2);
+            }
+        }
+    }
+    else return;
+}
+
+static inline void scale_row(unsigned char *row, uint32_t pixels, int fmt, unsigned depth, const struct spng_sbit *sbit)
+{
+    uint32_t i;
+
+    if(fmt == SPNG_FMT_RGBA8)
+    {
+        unsigned char px[4];
+        for(i=0; i < pixels; i++)
+        {
+            memcpy(px, row + i * 4, 4);
+
+            px[0] = sample_to_target(px[0], depth, sbit->red_bits, 8);
+            px[1] = sample_to_target(px[1], depth, sbit->green_bits, 8);
+            px[2] = sample_to_target(px[2], depth, sbit->blue_bits, 8);
+            px[3] = sample_to_target(px[3], depth, sbit->alpha_bits, 8);
+
+            memcpy(row + i * 4, px, 4);
+        }
+    }
+    else if(fmt == SPNG_FMT_RGBA16)
+    {
+        uint16_t px[4];
+        for(i=0; i < pixels; i++)
+        {
+            memcpy(px, row + i * 8, 8);
+
+            px[0] = sample_to_target(px[0], depth, sbit->red_bits, 16);
+            px[1] = sample_to_target(px[1], depth, sbit->green_bits, 16);
+            px[2] = sample_to_target(px[2], depth, sbit->blue_bits, 16);
+            px[3] = sample_to_target(px[3], depth, sbit->alpha_bits, 16);
+
+            memcpy(row + i * 8, px, 8);
+        }
+    }
+    else if(fmt == SPNG_FMT_RGB8)
+    {
+        unsigned char px[4];
+        for(i=0; i < pixels; i++)
+        {
+            memcpy(px, row + i * 3, 3);
+
+            px[0] = sample_to_target(px[0], depth, sbit->red_bits, 8);
+            px[1] = sample_to_target(px[1], depth, sbit->green_bits, 8);
+            px[2] = sample_to_target(px[2], depth, sbit->blue_bits, 8);
+
+            memcpy(row + i * 3, px, 3);
+        }
+    }
+    else if(fmt == SPNG_FMT_G8)
+    {
+        for(i=0; i < pixels; i++)
+        {
+            row[i] = sample_to_target(row[i], depth, sbit->grayscale_bits, 8);
+        }
+    }
+    else if(fmt == SPNG_FMT_GA8)
+    {
+        for(i=0; i < pixels; i++)
+        {
+            row[i*2] = sample_to_target(row[i*2], depth, sbit->grayscale_bits, 8);
+        }
+    }
+}
+
+/* Expand to *row using 8-bit palette indices from *scanline */
+static void expand_row(unsigned char *row,
+                       const unsigned char *scanline,
+                       const union spng__decode_plte *decode_plte,
+                       uint32_t width,
+                       int fmt)
+{
+    uint32_t i = 0;
+    unsigned char *px;
+    unsigned char entry;
+    const struct spng_plte_entry *plte = decode_plte->rgba;
+
+#if defined(SPNG_ARM)
+    if(fmt == SPNG_FMT_RGBA8) i = expand_palette_rgba8_neon(row, scanline, decode_plte->raw, width);
+    else if(fmt == SPNG_FMT_RGB8)
+    {
+        i = expand_palette_rgb8_neon(row, scanline, decode_plte->raw, width);
+
+        for(; i < width; i++)
+        {/* In this case the LUT is 3 bytes packed */
+            px = row + i * 3;
+            entry = scanline[i];
+            px[0] = decode_plte->raw[entry * 3 + 0];
+            px[1] = decode_plte->raw[entry * 3 + 1];
+            px[2] = decode_plte->raw[entry * 3 + 2];
+        }
+        return;
+    }
+#endif
+
+    if(fmt == SPNG_FMT_RGBA8)
+    {
+        for(; i < width; i++)
+        {
+            px = row + i * 4;
+            entry = scanline[i];
+            px[0] = plte[entry].red;
+            px[1] = plte[entry].green;
+            px[2] = plte[entry].blue;
+            px[3] = plte[entry].alpha;
+        }
+    }
+    else if(fmt == SPNG_FMT_RGB8)
+    {
+        for(; i < width; i++)
+        {
+            px = row + i * 3;
+            entry = scanline[i];
+            px[0] = plte[entry].red;
+            px[1] = plte[entry].green;
+            px[2] = plte[entry].blue;
+        }
+    }
+}
+
+/* Unpack 1/2/4/8-bit samples to G8/GA8/GA16 or G16 -> GA16 */
+static void unpack_scanline(unsigned char *out, const unsigned char *scanline, uint32_t width, unsigned bit_depth, int fmt)
+{
+    struct spng__iter iter = spng__iter_init(bit_depth, scanline);
+    uint32_t i;
+    uint16_t sample, alpha = 65535;
+
+
+    if(fmt == SPNG_FMT_GA8) goto ga8;
+    else if(fmt == SPNG_FMT_GA16) goto ga16;
+
+    /* 1/2/4-bit -> 8-bit */
+    for(i=0; i < width; i++) out[i] = get_sample(&iter);
+
+    return;
+
+ga8:
+    /* 1/2/4/8-bit -> GA8 */
+    for(i=0; i < width; i++)
+    {
+        out[i*2] = get_sample(&iter);
+        out[i*2 + 1] = 255;
+    }
+
+    return;
+
+ga16:
+
+    /* 16 -> GA16 */
+    if(bit_depth == 16)
+    {
+        for(i=0; i < width; i++)
+        {
+            memcpy(out + i * 4, scanline + i * 2, 2);
+            memcpy(out + i * 4 + 2, &alpha, 2);
+        }
+        return;
+    }
+
+     /* 1/2/4/8-bit -> GA16 */
+    for(i=0; i < width; i++)
+    {
+        sample = get_sample(&iter);
+        memcpy(out + i * 4, &sample, 2);
+        memcpy(out + i * 4 + 2, &alpha, 2);
+    }
+}
+
+static int check_ihdr(const struct spng_ihdr *ihdr, uint32_t max_width, uint32_t max_height)
+{
+    if(ihdr->width > spng_u32max || !ihdr->width) return SPNG_EWIDTH;
+    if(ihdr->height > spng_u32max || !ihdr->height) return SPNG_EHEIGHT;
+
+    if(ihdr->width > max_width) return SPNG_EUSER_WIDTH;
+    if(ihdr->height > max_height) return SPNG_EUSER_HEIGHT;
+
+    switch(ihdr->color_type)
+    {
+        case SPNG_COLOR_TYPE_GRAYSCALE:
+        {
+            if( !(ihdr->bit_depth == 1 || ihdr->bit_depth == 2 ||
+                  ihdr->bit_depth == 4 || ihdr->bit_depth == 8 ||
+                  ihdr->bit_depth == 16) )
+                  return SPNG_EBIT_DEPTH;
+
+            break;
+        }
+        case SPNG_COLOR_TYPE_TRUECOLOR:
+        case SPNG_COLOR_TYPE_GRAYSCALE_ALPHA:
+        case SPNG_COLOR_TYPE_TRUECOLOR_ALPHA:
+        {
+            if( !(ihdr->bit_depth == 8 || ihdr->bit_depth == 16) )
+                return SPNG_EBIT_DEPTH;
+
+            break;
+        }
+        case SPNG_COLOR_TYPE_INDEXED:
+        {
+            if( !(ihdr->bit_depth == 1 || ihdr->bit_depth == 2 ||
+                  ihdr->bit_depth == 4 || ihdr->bit_depth == 8) )
+                return SPNG_EBIT_DEPTH;
+
+            break;
+        }
+        default: return SPNG_ECOLOR_TYPE;
+    }
+
+    if(ihdr->compression_method) return SPNG_ECOMPRESSION_METHOD;
+    if(ihdr->filter_method) return SPNG_EFILTER_METHOD;
+
+    if(ihdr->interlace_method > 1) return SPNG_EINTERLACE_METHOD;
+
+    return 0;
+}
+
+static int check_plte(const struct spng_plte *plte, const struct spng_ihdr *ihdr)
+{
+    if(plte == NULL || ihdr == NULL) return 1;
+
+    if(plte->n_entries == 0) return 1;
+    if(plte->n_entries > 256) return 1;
+
+    if(ihdr->color_type == SPNG_COLOR_TYPE_INDEXED)
+    {
+        if(plte->n_entries > (1U << ihdr->bit_depth)) return 1;
+    }
+
+    return 0;
+}
+
+static int check_sbit(const struct spng_sbit *sbit, const struct spng_ihdr *ihdr)
+{
+    if(sbit == NULL || ihdr == NULL) return 1;
+
+    if(ihdr->color_type == 0)
+    {
+        if(sbit->grayscale_bits == 0) return SPNG_ESBIT;
+        if(sbit->grayscale_bits > ihdr->bit_depth) return SPNG_ESBIT;
+    }
+    else if(ihdr->color_type == 2 || ihdr->color_type == 3)
+    {
+        if(sbit->red_bits == 0) return SPNG_ESBIT;
+        if(sbit->green_bits == 0) return SPNG_ESBIT;
+        if(sbit->blue_bits == 0) return SPNG_ESBIT;
+
+        uint8_t bit_depth;
+        if(ihdr->color_type == 3) bit_depth = 8;
+        else bit_depth = ihdr->bit_depth;
+
+        if(sbit->red_bits > bit_depth) return SPNG_ESBIT;
+        if(sbit->green_bits > bit_depth) return SPNG_ESBIT;
+        if(sbit->blue_bits > bit_depth) return SPNG_ESBIT;
+    }
+    else if(ihdr->color_type == 4)
+    {
+        if(sbit->grayscale_bits == 0) return SPNG_ESBIT;
+        if(sbit->alpha_bits == 0) return SPNG_ESBIT;
+
+        if(sbit->grayscale_bits > ihdr->bit_depth) return SPNG_ESBIT;
+        if(sbit->alpha_bits > ihdr->bit_depth) return SPNG_ESBIT;
+    }
+    else if(ihdr->color_type == 6)
+    {
+        if(sbit->red_bits == 0) return SPNG_ESBIT;
+        if(sbit->green_bits == 0) return SPNG_ESBIT;
+        if(sbit->blue_bits == 0) return SPNG_ESBIT;
+        if(sbit->alpha_bits == 0) return SPNG_ESBIT;
+
+        if(sbit->red_bits > ihdr->bit_depth) return SPNG_ESBIT;
+        if(sbit->green_bits > ihdr->bit_depth) return SPNG_ESBIT;
+        if(sbit->blue_bits > ihdr->bit_depth) return SPNG_ESBIT;
+        if(sbit->alpha_bits > ihdr->bit_depth) return SPNG_ESBIT;
+    }
+
+    return 0;
+}
+
+static int check_chrm_int(const struct spng_chrm_int *chrm_int)
+{
+    if(chrm_int == NULL) return 1;
+
+    if(chrm_int->white_point_x > spng_u32max ||
+       chrm_int->white_point_y > spng_u32max ||
+       chrm_int->red_x > spng_u32max ||
+       chrm_int->red_y > spng_u32max ||
+       chrm_int->green_x  > spng_u32max ||
+       chrm_int->green_y  > spng_u32max ||
+       chrm_int->blue_x > spng_u32max ||
+       chrm_int->blue_y > spng_u32max) return SPNG_ECHRM;
+
+    return 0;
+}
+
+static int check_phys(const struct spng_phys *phys)
+{
+    if(phys == NULL) return 1;
+
+    if(phys->unit_specifier > 1) return SPNG_EPHYS;
+
+    if(phys->ppu_x > spng_u32max) return SPNG_EPHYS;
+    if(phys->ppu_y > spng_u32max) return SPNG_EPHYS;
+
+    return 0;
+}
+
+static int check_time(const struct spng_time *time)
+{
+    if(time == NULL) return 1;
+
+    if(time->month == 0 || time->month > 12) return 1;
+    if(time->day == 0 || time->day > 31) return 1;
+    if(time->hour > 23) return 1;
+    if(time->minute > 59) return 1;
+    if(time->second > 60) return 1;
+
+    return 0;
+}
+
+static int check_offs(const struct spng_offs *offs)
+{
+    if(offs == NULL) return 1;
+
+    if(offs->unit_specifier > 1) return 1;
+
+    return 0;
+}
+
+static int check_exif(const struct spng_exif *exif)
+{
+    if(exif == NULL) return 1;
+    if(exif->data == NULL) return 1;
+
+    if(exif->length < 4) return SPNG_ECHUNK_SIZE;
+    if(exif->length > spng_u32max) return SPNG_ECHUNK_STDLEN;
+
+    const uint8_t exif_le[4] = { 73, 73, 42, 0 };
+    const uint8_t exif_be[4] = { 77, 77, 0, 42 };
+
+    if(memcmp(exif->data, exif_le, 4) && memcmp(exif->data, exif_be, 4)) return 1;
+
+    return 0;
+}
+
+/* Validate PNG keyword */
+static int check_png_keyword(const char *str)
+{
+    if(str == NULL) return 1;
+    size_t len = strlen(str);
+    const char *end = str + len;
+
+    if(!len) return 1;
+    if(len > 79) return 1;
+    if(str[0] == ' ') return 1; /* Leading space */
+    if(end[-1] == ' ') return 1; /* Trailing space */
+    if(strstr(str, "  ") != NULL) return 1; /* Consecutive spaces */
+
+    uint8_t c;
+    while(str != end)
+    {
+        memcpy(&c, str, 1);
+
+        if( (c >= 32 && c <= 126) || (c >= 161) ) str++;
+        else return 1; /* Invalid character */
+    }
+
+    return 0;
+}
+
+/* Validate PNG text *str up to 'len' bytes */
+static int check_png_text(const char *str, size_t len)
+{/* XXX: are consecutive newlines permitted? */
+    if(str == NULL || len == 0) return 1;
+
+    uint8_t c;
+    size_t i = 0;
+    while(i < len)
+    {
+        memcpy(&c, str + i, 1);
+
+        if( (c >= 32 && c <= 126) || (c >= 161) || c == 10) i++;
+        else return 1; /* Invalid character */
+    }
+
+    return 0;
+}
+
+/* Returns non-zero for standard chunks which are stored without allocating memory */
+static int is_small_chunk(uint8_t type[4])
+{
+    if(!memcmp(type, type_plte, 4)) return 1;
+    else if(!memcmp(type, type_chrm, 4)) return 1;
+    else if(!memcmp(type, type_gama, 4)) return 1;
+    else if(!memcmp(type, type_sbit, 4)) return 1;
+    else if(!memcmp(type, type_srgb, 4)) return 1;
+    else if(!memcmp(type, type_bkgd, 4)) return 1;
+    else if(!memcmp(type, type_trns, 4)) return 1;
+    else if(!memcmp(type, type_hist, 4)) return 1;
+    else if(!memcmp(type, type_phys, 4)) return 1;
+    else if(!memcmp(type, type_time, 4)) return 1;
+    else if(!memcmp(type, type_offs, 4)) return 1;
+    else return 0;
+}
+
+static int read_ihdr(spng_ctx *ctx)
+{
+    int ret;
+    struct spng_chunk *chunk = &ctx->current_chunk;
+    const unsigned char *data;
+
+    chunk->offset = 8;
+    chunk->length = 13;
+    size_t sizeof_sig_ihdr = 29;
+
+    ret = read_data(ctx, sizeof_sig_ihdr);
+    if(ret) return ret;
+
+    data = ctx->data;
+
+    if(memcmp(data, spng_signature, sizeof(spng_signature))) return SPNG_ESIGNATURE;
+
+    chunk->length = read_u32(data + 8);
+    memcpy(&chunk->type, data + 12, 4);
+
+    if(chunk->length != 13) return SPNG_EIHDR_SIZE;
+    if(memcmp(chunk->type, type_ihdr, 4)) return SPNG_ENOIHDR;
+
+    ctx->cur_actual_crc = crc32(0, NULL, 0);
+    ctx->cur_actual_crc = crc32(ctx->cur_actual_crc, data + 12, 17);
+
+    ctx->ihdr.width = read_u32(data + 16);
+    ctx->ihdr.height = read_u32(data + 20);
+    ctx->ihdr.bit_depth = data[24];
+    ctx->ihdr.color_type = data[25];
+    ctx->ihdr.compression_method = data[26];
+    ctx->ihdr.filter_method = data[27];
+    ctx->ihdr.interlace_method = data[28];
+
+    ret = check_ihdr(&ctx->ihdr, ctx->max_width, ctx->max_height);
+    if(ret) return ret;
+
+    ctx->file.ihdr = 1;
+    ctx->stored.ihdr = 1;
+
+    if(ctx->ihdr.bit_depth < 8) ctx->bytes_per_pixel = 1;
+    else ctx->bytes_per_pixel = num_channels(&ctx->ihdr) * (ctx->ihdr.bit_depth / 8);
+
+    ret = calculate_subimages(ctx);
+    if(ret) return ret;
+
+    return 0;
+}
+
+static void splt_undo(spng_ctx *ctx)
+{
+    struct spng_splt *splt = &ctx->splt_list[ctx->n_splt - 1];
+
+    spng__free(ctx, splt->entries);
+
+    decrease_cache_usage(ctx, sizeof(struct spng_splt));
+    decrease_cache_usage(ctx, splt->n_entries * sizeof(struct spng_splt_entry));
+
+    splt->entries = NULL;
+
+    ctx->n_splt--;
+}
+
+static void text_undo(spng_ctx *ctx)
+{
+    struct spng_text2 *text = &ctx->text_list[ctx->n_text - 1];
+
+    spng__free(ctx, text->keyword);
+    if(text->compression_flag) spng__free(ctx, text->text);
+
+    decrease_cache_usage(ctx, text->cache_usage);
+    decrease_cache_usage(ctx, sizeof(struct spng_text2));
+
+    text->keyword = NULL;
+    text->text = NULL;
+
+    ctx->n_text--;
+}
+
+static void chunk_undo(spng_ctx *ctx)
+{
+    struct spng_unknown_chunk *chunk = &ctx->chunk_list[ctx->n_chunks - 1];
+
+    spng__free(ctx, chunk->data);
+
+    decrease_cache_usage(ctx, chunk->length);
+    decrease_cache_usage(ctx, sizeof(struct spng_unknown_chunk));
+
+    chunk->data = NULL;
+
+    ctx->n_chunks--;
+}
+
+static int read_non_idat_chunks(spng_ctx *ctx)
+{
+    int ret;
+    struct spng_chunk chunk;
+    const unsigned char *data;
+
+    ctx->discard = 0;
+    ctx->undo = NULL;
+    ctx->prev_stored = ctx->stored;
+
+    while( !(ret = read_header(ctx)))
+    {
+        if(ctx->discard)
+        {
+            if(ctx->undo) ctx->undo(ctx);
+
+            ctx->stored = ctx->prev_stored;
+        }
+
+        ctx->discard = 0;
+        ctx->undo = NULL;
+
+        ctx->prev_stored = ctx->stored;
+        chunk = ctx->current_chunk;
+
+        if(!memcmp(chunk.type, type_idat, 4))
+        {
+            if(ctx->state < SPNG_STATE_FIRST_IDAT)
+            {
+                if(ctx->ihdr.color_type == 3 && !ctx->stored.plte) return SPNG_ENOPLTE;
+
+                ctx->first_idat = chunk;
+                return 0;
+            }
+
+            if(ctx->prev_was_idat)
+            {
+                /* Ignore extra IDAT's */
+                ret = discard_chunk_bytes(ctx, chunk.length);
+                if(ret) return ret;
+
+                continue;
+            }
+            else return SPNG_ECHUNK_POS; /* IDAT chunk not at the end of the IDAT sequence */
+        }
+
+        ctx->prev_was_idat = 0;
+
+        if(is_small_chunk(chunk.type))
+        {
+            /* None of the known chunks can be zero length */
+            if(!chunk.length) return SPNG_ECHUNK_SIZE;
+
+            /* The largest of these chunks is PLTE with 256 entries */
+            ret = read_chunk_bytes(ctx, chunk.length > 768 ? 768 : chunk.length);
+            if(ret) return ret;
+        }
+
+        data = ctx->data;
+
+        if(is_critical_chunk(&chunk))
+        {
+            if(!memcmp(chunk.type, type_plte, 4))
+            {
+                if(ctx->file.trns || ctx->file.hist || ctx->file.bkgd) return SPNG_ECHUNK_POS;
+                if(chunk.length % 3 != 0) return SPNG_ECHUNK_SIZE;
+
+                ctx->plte.n_entries = chunk.length / 3;
+
+                if(check_plte(&ctx->plte, &ctx->ihdr)) return SPNG_ECHUNK_SIZE; /* XXX: EPLTE? */
+
+                size_t i;
+                for(i=0; i < ctx->plte.n_entries; i++)
+                {
+                    ctx->plte.entries[i].red   = data[i * 3];
+                    ctx->plte.entries[i].green = data[i * 3 + 1];
+                    ctx->plte.entries[i].blue  = data[i * 3 + 2];
+                }
+
+                ctx->file.plte = 1;
+                ctx->stored.plte = 1;
+            }
+            else if(!memcmp(chunk.type, type_iend, 4))
+            {
+                if(ctx->state == SPNG_STATE_AFTER_IDAT)
+                {
+                    if(chunk.length) return SPNG_ECHUNK_SIZE;
+
+                    ret = read_and_check_crc(ctx);
+                    if(ret == -SPNG_CRC_DISCARD) ret = 0;
+
+                    return ret;
+                }
+                else return SPNG_ECHUNK_POS;
+            }
+            else if(!memcmp(chunk.type, type_ihdr, 4)) return SPNG_ECHUNK_POS;
+            else return SPNG_ECHUNK_UNKNOWN_CRITICAL;
+        }
+        else if(!memcmp(chunk.type, type_chrm, 4)) /* Ancillary chunks */
+        {
+            if(ctx->file.plte) return SPNG_ECHUNK_POS;
+            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
+            if(ctx->file.chrm) return SPNG_EDUP_CHRM;
+
+            if(chunk.length != 32) return SPNG_ECHUNK_SIZE;
+
+            ctx->chrm_int.white_point_x = read_u32(data);
+            ctx->chrm_int.white_point_y = read_u32(data + 4);
+            ctx->chrm_int.red_x = read_u32(data + 8);
+            ctx->chrm_int.red_y = read_u32(data + 12);
+            ctx->chrm_int.green_x = read_u32(data + 16);
+            ctx->chrm_int.green_y = read_u32(data + 20);
+            ctx->chrm_int.blue_x = read_u32(data + 24);
+            ctx->chrm_int.blue_y = read_u32(data + 28);
+
+            if(check_chrm_int(&ctx->chrm_int)) return SPNG_ECHRM;
+
+            ctx->file.chrm = 1;
+            ctx->stored.chrm = 1;
+        }
+        else if(!memcmp(chunk.type, type_gama, 4))
+        {
+            if(ctx->file.plte) return SPNG_ECHUNK_POS;
+            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
+            if(ctx->file.gama) return SPNG_EDUP_GAMA;
+
+            if(chunk.length != 4) return SPNG_ECHUNK_SIZE;
+
+            ctx->gama = read_u32(data);
+
+            if(!ctx->gama) return SPNG_EGAMA;
+            if(ctx->gama > spng_u32max) return SPNG_EGAMA;
+
+            ctx->file.gama = 1;
+            ctx->stored.gama = 1;
+        }
+        else if(!memcmp(chunk.type, type_sbit, 4))
+        {
+            if(ctx->file.plte) return SPNG_ECHUNK_POS;
+            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
+            if(ctx->file.sbit) return SPNG_EDUP_SBIT;
+
+            if(ctx->ihdr.color_type == 0)
+            {
+                if(chunk.length != 1) return SPNG_ECHUNK_SIZE;
+
+                ctx->sbit.grayscale_bits = data[0];
+            }
+            else if(ctx->ihdr.color_type == 2 || ctx->ihdr.color_type == 3)
+            {
+                if(chunk.length != 3) return SPNG_ECHUNK_SIZE;
+
+                ctx->sbit.red_bits = data[0];
+                ctx->sbit.green_bits = data[1];
+                ctx->sbit.blue_bits = data[2];
+            }
+            else if(ctx->ihdr.color_type == 4)
+            {
+                if(chunk.length != 2) return SPNG_ECHUNK_SIZE;
+
+                ctx->sbit.grayscale_bits = data[0];
+                ctx->sbit.alpha_bits = data[1];
+            }
+            else if(ctx->ihdr.color_type == 6)
+            {
+                if(chunk.length != 4) return SPNG_ECHUNK_SIZE;
+
+                ctx->sbit.red_bits = data[0];
+                ctx->sbit.green_bits = data[1];
+                ctx->sbit.blue_bits = data[2];
+                ctx->sbit.alpha_bits = data[3];
+            }
+
+            if(check_sbit(&ctx->sbit, &ctx->ihdr)) return SPNG_ESBIT;
+
+            ctx->file.sbit = 1;
+            ctx->stored.sbit = 1;
+        }
+        else if(!memcmp(chunk.type, type_srgb, 4))
+        {
+            if(ctx->file.plte) return SPNG_ECHUNK_POS;
+            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
+            if(ctx->file.srgb) return SPNG_EDUP_SRGB;
+
+            if(chunk.length != 1) return SPNG_ECHUNK_SIZE;
+
+            ctx->srgb_rendering_intent = data[0];
+
+            if(ctx->srgb_rendering_intent > 3) return SPNG_ESRGB;
+
+            ctx->file.srgb = 1;
+            ctx->stored.srgb = 1;
+        }
+        else if(!memcmp(chunk.type, type_bkgd, 4))
+        {
+            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
+            if(ctx->file.bkgd) return SPNG_EDUP_BKGD;
+
+            if(ctx->ihdr.color_type == 0 || ctx->ihdr.color_type == 4)
+            {
+                if(chunk.length != 2) return SPNG_ECHUNK_SIZE;
+
+                ctx->bkgd.gray = read_u16(data);
+            }
+            else if(ctx->ihdr.color_type == 2 || ctx->ihdr.color_type == 6)
+            {
+                if(chunk.length != 6) return SPNG_ECHUNK_SIZE;
+
+                ctx->bkgd.red = read_u16(data);
+                ctx->bkgd.green = read_u16(data + 2);
+                ctx->bkgd.blue = read_u16(data + 4);
+            }
+            else if(ctx->ihdr.color_type == 3)
+            {
+                if(chunk.length != 1) return SPNG_ECHUNK_SIZE;
+                if(!ctx->file.plte) return SPNG_EBKGD_NO_PLTE;
+
+                ctx->bkgd.plte_index = data[0];
+                if(ctx->bkgd.plte_index >= ctx->plte.n_entries) return SPNG_EBKGD_PLTE_IDX;
+            }
+
+            ctx->file.bkgd = 1;
+            ctx->stored.bkgd = 1;
+        }
+        else if(!memcmp(chunk.type, type_trns, 4))
+        {
+            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
+            if(ctx->file.trns) return SPNG_EDUP_TRNS;
+            if(!chunk.length) return SPNG_ECHUNK_SIZE;
+
+            if(ctx->ihdr.color_type == 0)
+            {
+                if(chunk.length != 2) return SPNG_ECHUNK_SIZE;
+
+                ctx->trns.gray = read_u16(data);
+            }
+            else if(ctx->ihdr.color_type == 2)
+            {
+                if(chunk.length != 6) return SPNG_ECHUNK_SIZE;
+
+                ctx->trns.red = read_u16(data);
+                ctx->trns.green = read_u16(data + 2);
+                ctx->trns.blue = read_u16(data + 4);
+            }
+            else if(ctx->ihdr.color_type == 3)
+            {
+                if(chunk.length > ctx->plte.n_entries) return SPNG_ECHUNK_SIZE;
+                if(!ctx->file.plte) return SPNG_ETRNS_NO_PLTE;
+
+                memcpy(ctx->trns.type3_alpha, data, chunk.length);
+                ctx->trns.n_type3_entries = chunk.length;
+            }
+
+            if(ctx->ihdr.color_type == 4 || ctx->ihdr.color_type == 6)  return SPNG_ETRNS_COLOR_TYPE;
+
+            ctx->file.trns = 1;
+            ctx->stored.trns = 1;
+        }
+        else if(!memcmp(chunk.type, type_hist, 4))
+        {
+            if(!ctx->file.plte) return SPNG_EHIST_NO_PLTE;
+            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
+            if(ctx->file.hist) return SPNG_EDUP_HIST;
+
+            if( (chunk.length / 2) != (ctx->plte.n_entries) ) return SPNG_ECHUNK_SIZE;
+
+            size_t k;
+            for(k=0; k < (chunk.length / 2); k++)
+            {
+                ctx->hist.frequency[k] = read_u16(data + k*2);
+            }
+
+            ctx->file.hist = 1;
+            ctx->stored.hist = 1;
+        }
+        else if(!memcmp(chunk.type, type_phys, 4))
+        {
+            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
+            if(ctx->file.phys) return SPNG_EDUP_PHYS;
+
+            if(chunk.length != 9) return SPNG_ECHUNK_SIZE;
+
+            ctx->phys.ppu_x = read_u32(data);
+            ctx->phys.ppu_y = read_u32(data + 4);
+            ctx->phys.unit_specifier = data[8];
+
+            if(check_phys(&ctx->phys)) return SPNG_EPHYS;
+
+            ctx->file.phys = 1;
+            ctx->stored.phys = 1;
+        }
+        else if(!memcmp(chunk.type, type_time, 4))
+        {
+            if(ctx->file.time) return SPNG_EDUP_TIME;
+
+            if(chunk.length != 7) return SPNG_ECHUNK_SIZE;
+
+            struct spng_time time;
+
+            time.year = read_u16(data);
+            time.month = data[2];
+            time.day = data[3];
+            time.hour = data[4];
+            time.minute = data[5];
+            time.second = data[6];
+
+            if(check_time(&time)) return SPNG_ETIME;
+
+            ctx->file.time = 1;
+
+            if(!ctx->user.time) ctx->time = time;
+
+            ctx->stored.time = 1;
+        }
+        else if(!memcmp(chunk.type, type_offs, 4))
+        {
+            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
+            if(ctx->file.offs) return SPNG_EDUP_OFFS;
+
+            if(chunk.length != 9) return SPNG_ECHUNK_SIZE;
+
+            ctx->offs.x = read_s32(data);
+            ctx->offs.y = read_s32(data + 4);
+            ctx->offs.unit_specifier = data[8];
+
+            if(check_offs(&ctx->offs)) return SPNG_EOFFS;
+
+            ctx->file.offs = 1;
+            ctx->stored.offs = 1;
+        }
+        else /* Arbitrary-length chunk */
+        {
+
+            if(!memcmp(chunk.type, type_exif, 4))
+            {
+                if(ctx->file.exif) return SPNG_EDUP_EXIF;
+                if(!chunk.length) return SPNG_EEXIF;
+
+                ctx->file.exif = 1;
+
+                if(ctx->user.exif) goto discard;
+
+                if(increase_cache_usage(ctx, chunk.length, 1)) return SPNG_ECHUNK_LIMITS;
+
+                struct spng_exif exif;
+
+                exif.length = chunk.length;
+
+                exif.data = spng__malloc(ctx, chunk.length);
+                if(exif.data == NULL) return SPNG_EMEM;
+
+                ret = read_chunk_bytes2(ctx, exif.data, chunk.length);
+                if(ret)
+                {
+                    spng__free(ctx, exif.data);
+                    return ret;
+                }
+
+                if(check_exif(&exif))
+                {
+                    spng__free(ctx, exif.data);
+                    return SPNG_EEXIF;
+                }
+
+                ctx->exif = exif;
+
+                ctx->stored.exif = 1;
+            }
+            else if(!memcmp(chunk.type, type_iccp, 4))
+            {/* TODO: add test file with color profile */
+                if(ctx->file.plte) return SPNG_ECHUNK_POS;
+                if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
+                if(ctx->file.iccp) return SPNG_EDUP_ICCP;
+                if(!chunk.length) return SPNG_ECHUNK_SIZE;
+
+                ctx->file.iccp = 1;
+
+                uint32_t peek_bytes =  81 > chunk.length ? chunk.length : 81;
+
+                ret = read_chunk_bytes(ctx, peek_bytes);
+                if(ret) return ret;
+
+                unsigned char *keyword_nul = memchr(ctx->data, '\0', peek_bytes);
+                if(keyword_nul == NULL) return SPNG_EICCP_NAME;
+
+                uint32_t keyword_len = keyword_nul - ctx->data;
+
+                if(keyword_len > 79) return SPNG_EICCP_NAME;
+
+                memcpy(ctx->iccp.profile_name, ctx->data, keyword_len);
+
+                if(check_png_keyword(ctx->iccp.profile_name)) return SPNG_EICCP_NAME;
+
+                if(chunk.length < (keyword_len + 2)) return SPNG_ECHUNK_SIZE;
+
+                if(ctx->data[keyword_len + 1] != 0) return SPNG_EICCP_COMPRESSION_METHOD;
+
+                ret = spng__inflate_stream(ctx, &ctx->iccp.profile, &ctx->iccp.profile_len, 0, ctx->data + keyword_len + 2, peek_bytes - (keyword_len + 2));
+
+                if(ret) return ret;
+
+                ctx->stored.iccp = 1;
+            }
+             else if(!memcmp(chunk.type, type_text, 4) ||
+                     !memcmp(chunk.type, type_ztxt, 4) ||
+                     !memcmp(chunk.type, type_itxt, 4))
+            {
+                if(!chunk.length) return SPNG_ECHUNK_SIZE;
+
+                ctx->file.text = 1;
+
+                if(ctx->user.text) goto discard;
+
+                if(increase_cache_usage(ctx, sizeof(struct spng_text2), 1)) return SPNG_ECHUNK_LIMITS;
+
+                ctx->n_text++;
+                if(ctx->n_text < 1) return SPNG_EOVERFLOW;
+                if(sizeof(struct spng_text2) > SIZE_MAX / ctx->n_text) return SPNG_EOVERFLOW;
+
+                void *buf = spng__realloc(ctx, ctx->text_list, ctx->n_text * sizeof(struct spng_text2));
+                if(buf == NULL) return SPNG_EMEM;
+                ctx->text_list = buf;
+
+                struct spng_text2 *text = &ctx->text_list[ctx->n_text - 1];
+                memset(text, 0, sizeof(struct spng_text2));
+
+                ctx->undo = text_undo;
+
+                uint32_t text_offset = 0, language_tag_offset = 0, translated_keyword_offset = 0;
+                uint32_t peek_bytes = 256; /* enough for 3 80-byte keywords and some text bytes */
+                uint32_t keyword_len;
+
+                if(peek_bytes > chunk.length) peek_bytes = chunk.length;
+
+                ret = read_chunk_bytes(ctx, peek_bytes);
+                if(ret) return ret;
+
+                data = ctx->data;
+
+                const unsigned char *zlib_stream = NULL;
+                const unsigned char *peek_end = data + peek_bytes;
+                const unsigned char *keyword_nul = memchr(data, 0, chunk.length > 80 ? 80 : chunk.length);
+
+                if(keyword_nul == NULL) return SPNG_ETEXT_KEYWORD;
+
+                keyword_len = keyword_nul - data;
+
+                if(!memcmp(chunk.type, type_text, 4))
+                {
+                    text->type = SPNG_TEXT;
+
+                    text->text_length = chunk.length - keyword_len - 1;
+
+                    text_offset = keyword_len;
+
+                    /* increment past nul if there is a text field */
+                    if(text->text_length) text_offset++;
+                }
+                else if(!memcmp(chunk.type, type_ztxt, 4))
+                {
+                    text->type = SPNG_ZTXT;
+
+                    if((peek_bytes - keyword_len) <= 2) return SPNG_EZTXT;
+
+                    if(keyword_nul[1]) return SPNG_EZTXT_COMPRESSION_METHOD;
+
+                    text->compression_flag = 1;
+
+                    text_offset = keyword_len + 2;
+                }
+                else if(!memcmp(chunk.type, type_itxt, 4))
+                {
+                    text->type = SPNG_ITXT;
+
+                    /* at least two 1-byte fields, two >=0 length strings, and one byte of (compressed) text */
+                    if((peek_bytes - keyword_len) < 5) return SPNG_EITXT;
+
+                    text->compression_flag = keyword_nul[1];
+
+                    if(text->compression_flag > 1) return SPNG_EITXT_COMPRESSION_FLAG;
+
+                    if(keyword_nul[2]) return SPNG_EITXT_COMPRESSION_METHOD;
+
+                    language_tag_offset = keyword_len + 3;
+
+                    const unsigned char *term;
+                    term = memchr(data + language_tag_offset, 0, peek_bytes - language_tag_offset);
+                    if(term == NULL) return SPNG_EITXT_LANG_TAG;
+
+                    if((peek_end - term) < 2) return SPNG_EITXT;
+
+                    translated_keyword_offset = term - data + 1;
+
+                    zlib_stream = memchr(data + translated_keyword_offset, 0, peek_bytes - translated_keyword_offset);
+                    if(zlib_stream == NULL) return SPNG_EITXT;
+                    if(zlib_stream == peek_end) return SPNG_EITXT;
+
+                    text_offset = zlib_stream - data + 1;
+                    text->text_length = chunk.length - text_offset;
+                }
+                else return SPNG_EINTERNAL;
+
+
+                if(text->compression_flag)
+                {
+                    /* cache usage = peek_bytes + decompressed text size + nul */
+                    if(increase_cache_usage(ctx, peek_bytes, 0)) return SPNG_ECHUNK_LIMITS;
+
+                    text->keyword = spng__calloc(ctx, 1, peek_bytes);
+                    if(text->keyword == NULL) return SPNG_EMEM;
+
+                    memcpy(text->keyword, data, peek_bytes);
+
+                    zlib_stream = ctx->data + text_offset;
+
+                    ret = spng__inflate_stream(ctx, &text->text, &text->text_length, 1, zlib_stream, peek_bytes - text_offset);
+
+                    if(ret) return ret;
+
+                    text->text[text->text_length - 1] = '\0';
+                    text->cache_usage = text->text_length + peek_bytes;
+                }
+                else
+                {
+                    if(increase_cache_usage(ctx, chunk.length + 1, 0)) return SPNG_ECHUNK_LIMITS;
+
+                    text->keyword = spng__malloc(ctx, chunk.length + 1);
+                    if(text->keyword == NULL) return SPNG_EMEM;
+
+                    memcpy(text->keyword, data, peek_bytes);
+
+                    if(chunk.length > peek_bytes)
+                    {
+                        ret = read_chunk_bytes2(ctx, text->keyword + peek_bytes, chunk.length - peek_bytes);
+                        if(ret) return ret;
+                    }
+
+                    text->text = text->keyword + text_offset;
+
+                    text->text_length = chunk.length - text_offset;
+
+                    text->text[text->text_length] = '\0';
+                    text->cache_usage = chunk.length + 1;
+                }
+
+                if(check_png_keyword(text->keyword)) return SPNG_ETEXT_KEYWORD;
+
+                text->text_length = strlen(text->text);
+
+                if(text->type != SPNG_ITXT)
+                {
+                    language_tag_offset = keyword_len;
+                    translated_keyword_offset = keyword_len;
+
+                    if(ctx->strict && check_png_text(text->text, text->text_length))
+                    {
+                        if(text->type == SPNG_ZTXT) return SPNG_EZTXT;
+                        else return SPNG_ETEXT;
+                    }
+                }
+
+                text->language_tag = text->keyword + language_tag_offset;
+                text->translated_keyword = text->keyword + translated_keyword_offset;
+
+                ctx->stored.text = 1;
+            }
+            else if(!memcmp(chunk.type, type_splt, 4))
+            {
+                if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
+                if(ctx->user.splt) goto discard; /* XXX: could check profile names for uniqueness */
+                if(!chunk.length) return SPNG_ECHUNK_SIZE;
+
+                ctx->file.splt = 1;
+
+                /* chunk.length + sizeof(struct spng_splt) + splt->n_entries * sizeof(struct spng_splt_entry) */
+                if(increase_cache_usage(ctx, chunk.length + sizeof(struct spng_splt), 1)) return SPNG_ECHUNK_LIMITS;
+
+                ctx->n_splt++;
+                if(ctx->n_splt < 1) return SPNG_EOVERFLOW;
+                if(sizeof(struct spng_splt) > SIZE_MAX / ctx->n_splt) return SPNG_EOVERFLOW;
+
+                void *buf = spng__realloc(ctx, ctx->splt_list, ctx->n_splt * sizeof(struct spng_splt));
+                if(buf == NULL) return SPNG_EMEM;
+                ctx->splt_list = buf;
+
+                struct spng_splt *splt = &ctx->splt_list[ctx->n_splt - 1];
+
+                memset(splt, 0, sizeof(struct spng_splt));
+
+                ctx->undo = splt_undo;
+
+                void *t = spng__malloc(ctx, chunk.length);
+                if(t == NULL) return SPNG_EMEM;
+
+                splt->entries = t; /* simplifies error handling */
+                data = t;
+
+                ret = read_chunk_bytes2(ctx, t, chunk.length);
+                if(ret) return ret;
+
+                uint32_t keyword_len = chunk.length < 80 ? chunk.length : 80;
+
+                const unsigned char *keyword_nul = memchr(data, 0, keyword_len);
+                if(keyword_nul == NULL) return SPNG_ESPLT_NAME;
+
+                keyword_len = keyword_nul - data;
+
+                memcpy(splt->name, data, keyword_len);
+
+                if(check_png_keyword(splt->name)) return SPNG_ESPLT_NAME;
+
+                uint32_t j;
+                for(j=0; j < (ctx->n_splt - 1); j++)
+                {
+                    if(!strcmp(ctx->splt_list[j].name, splt->name)) return SPNG_ESPLT_DUP_NAME;
+                }
+
+                if( (chunk.length - keyword_len) <= 2) return SPNG_ECHUNK_SIZE;
+
+                splt->sample_depth = data[keyword_len + 1];
+
+                uint32_t entries_len = chunk.length - keyword_len - 2;
+                if(!entries_len) return SPNG_ECHUNK_SIZE;
+
+                if(splt->sample_depth == 16)
+                {
+                    if(entries_len % 10 != 0) return SPNG_ECHUNK_SIZE;
+                    splt->n_entries = entries_len / 10;
+                }
+                else if(splt->sample_depth == 8)
+                {
+                    if(entries_len % 6 != 0) return SPNG_ECHUNK_SIZE;
+                    splt->n_entries = entries_len / 6;
+                }
+                else return SPNG_ESPLT_DEPTH;
+
+                if(!splt->n_entries) return SPNG_ECHUNK_SIZE;
+
+                size_t list_size = splt->n_entries;
+
+                if(list_size > SIZE_MAX / sizeof(struct spng_splt_entry)) return SPNG_EOVERFLOW;
+
+                list_size *= sizeof(struct spng_splt_entry);
+
+                if(increase_cache_usage(ctx, list_size, 0)) return SPNG_ECHUNK_LIMITS;
+
+                splt->entries = spng__malloc(ctx, list_size);
+                if(splt->entries == NULL)
+                {
+                    spng__free(ctx, t);
+                    return SPNG_EMEM;
+                }
+
+                data = (unsigned char*)t + keyword_len + 2;
+
+                uint32_t k;
+                if(splt->sample_depth == 16)
+                {
+                    for(k=0; k < splt->n_entries; k++)
+                    {
+                        splt->entries[k].red =   read_u16(data + k * 10);
+                        splt->entries[k].green = read_u16(data + k * 10 + 2);
+                        splt->entries[k].blue =  read_u16(data + k * 10 + 4);
+                        splt->entries[k].alpha = read_u16(data + k * 10 + 6);
+                        splt->entries[k].frequency = read_u16(data + k * 10 + 8);
+                    }
+                }
+                else if(splt->sample_depth == 8)
+                {
+                    for(k=0; k < splt->n_entries; k++)
+                    {
+                        splt->entries[k].red =   data[k * 6];
+                        splt->entries[k].green = data[k * 6 + 1];
+                        splt->entries[k].blue =  data[k * 6 + 2];
+                        splt->entries[k].alpha = data[k * 6 + 3];
+                        splt->entries[k].frequency = read_u16(data + k * 6 + 4);
+                    }
+                }
+
+                spng__free(ctx, t);
+                decrease_cache_usage(ctx, chunk.length);
+
+                ctx->stored.splt = 1;
+            }
+            else /* Unknown chunk */
+            {
+                ctx->file.unknown = 1;
+
+                if(!ctx->keep_unknown) goto discard;
+                if(ctx->user.unknown) goto discard;
+
+                if(increase_cache_usage(ctx, chunk.length + sizeof(struct spng_unknown_chunk), 1)) return SPNG_ECHUNK_LIMITS;
+
+                ctx->n_chunks++;
+                if(ctx->n_chunks < 1) return SPNG_EOVERFLOW;
+                if(sizeof(struct spng_unknown_chunk) > SIZE_MAX / ctx->n_chunks) return SPNG_EOVERFLOW;
+
+                void *buf = spng__realloc(ctx, ctx->chunk_list, ctx->n_chunks * sizeof(struct spng_unknown_chunk));
+                if(buf == NULL) return SPNG_EMEM;
+                ctx->chunk_list = buf;
+
+                struct spng_unknown_chunk *chunkp = &ctx->chunk_list[ctx->n_chunks - 1];
+
+                memset(chunkp, 0, sizeof(struct spng_unknown_chunk));
+
+                ctx->undo = chunk_undo;
+
+                memcpy(chunkp->type, chunk.type, 4);
+
+                if(ctx->state < SPNG_STATE_FIRST_IDAT)
+                {
+                    if(ctx->file.plte) chunkp->location = SPNG_AFTER_PLTE;
+                    else chunkp->location = SPNG_AFTER_IHDR;
+                }
+                else if(ctx->state >= SPNG_STATE_AFTER_IDAT) chunkp->location = SPNG_AFTER_IDAT;
+
+                if(chunk.length > 0)
+                {
+                    void *t = spng__malloc(ctx, chunk.length);
+                    if(t == NULL) return SPNG_EMEM;
+
+                    ret = read_chunk_bytes2(ctx, t, chunk.length);
+                    if(ret)
+                    {
+                        spng__free(ctx, t);
+                        return ret;
+                    }
+
+                    chunkp->length = chunk.length;
+                    chunkp->data = t;
+                }
+
+                ctx->stored.unknown = 1;
+            }
+
+discard:
+            ret = discard_chunk_bytes(ctx, ctx->cur_chunk_bytes_left);
+            if(ret) return ret;
+        }
+
+    }
+
+    return ret;
+}
+
+/* Read chunks before or after the IDAT chunks depending on state */
+static int read_chunks(spng_ctx *ctx, int only_ihdr)
+{
+    if(ctx == NULL) return SPNG_EINTERNAL;
+    if(!ctx->state) return SPNG_EBADSTATE;
+    if(ctx->data == NULL)
+    {
+        if(ctx->encode_only) return 0;
+        else return SPNG_EINTERNAL;
+    }
+
+    int ret = 0;
+
+    if(ctx->state == SPNG_STATE_INPUT)
+    {
+        ret = read_ihdr(ctx);
+
+        if(ret) return decode_err(ctx, ret);
+
+        ctx->state = SPNG_STATE_IHDR;
+    }
+
+    if(only_ihdr) return 0;
+
+    if(ctx->state == SPNG_STATE_EOI)
+    {
+        ctx->state = SPNG_STATE_AFTER_IDAT;
+        ctx->prev_was_idat = 1;
+    }
+
+    while(ctx->state < SPNG_STATE_FIRST_IDAT || ctx->state == SPNG_STATE_AFTER_IDAT)
+    {
+        ret = read_non_idat_chunks(ctx);
+
+        if(!ret)
+        {
+            if(ctx->state < SPNG_STATE_FIRST_IDAT) ctx->state = SPNG_STATE_FIRST_IDAT;
+            else if(ctx->state == SPNG_STATE_AFTER_IDAT) ctx->state = SPNG_STATE_IEND;
+        }
+        else
+        {
+            switch(ret)
+            {
+                case SPNG_ECHUNK_POS:
+                case SPNG_ECHUNK_SIZE: /* size != expected size, SPNG_ECHUNK_STDLEN = invalid size */
+                case SPNG_EDUP_PLTE:
+                case SPNG_EDUP_CHRM:
+                case SPNG_EDUP_GAMA:
+                case SPNG_EDUP_ICCP:
+                case SPNG_EDUP_SBIT:
+                case SPNG_EDUP_SRGB:
+                case SPNG_EDUP_BKGD:
+                case SPNG_EDUP_HIST:
+                case SPNG_EDUP_TRNS:
+                case SPNG_EDUP_PHYS:
+                case SPNG_EDUP_TIME:
+                case SPNG_EDUP_OFFS:
+                case SPNG_EDUP_EXIF:
+                case SPNG_ECHRM:
+                case SPNG_ETRNS_COLOR_TYPE:
+                case SPNG_ETRNS_NO_PLTE:
+                case SPNG_EGAMA:
+                case SPNG_EICCP_NAME:
+                case SPNG_EICCP_COMPRESSION_METHOD:
+                case SPNG_ESBIT:
+                case SPNG_ESRGB:
+                case SPNG_ETEXT:
+                case SPNG_ETEXT_KEYWORD:
+                case SPNG_EZTXT:
+                case SPNG_EZTXT_COMPRESSION_METHOD:
+                case SPNG_EITXT:
+                case SPNG_EITXT_COMPRESSION_FLAG:
+                case SPNG_EITXT_COMPRESSION_METHOD:
+                case SPNG_EITXT_LANG_TAG:
+                case SPNG_EITXT_TRANSLATED_KEY:
+                case SPNG_EBKGD_NO_PLTE:
+                case SPNG_EBKGD_PLTE_IDX:
+                case SPNG_EHIST_NO_PLTE:
+                case SPNG_EPHYS:
+                case SPNG_ESPLT_NAME:
+                case SPNG_ESPLT_DUP_NAME:
+                case SPNG_ESPLT_DEPTH:
+                case SPNG_ETIME:
+                case SPNG_EOFFS:
+                case SPNG_EEXIF:
+                case SPNG_EZLIB:
+                {
+                    if(!ctx->strict && !is_critical_chunk(&ctx->current_chunk))
+                    {
+                        ret = discard_chunk_bytes(ctx, ctx->cur_chunk_bytes_left);
+                        if(ret) return decode_err(ctx, ret);
+
+                        if(ctx->undo) ctx->undo(ctx);
+
+                        ctx->stored = ctx->prev_stored;
+
+                        ctx->discard = 0;
+                        ctx->undo = NULL;
+
+                        continue;
+                    }
+                    else return decode_err(ctx, ret);
+
+                    break;
+                }
+                default: return decode_err(ctx, ret);
+            }
+        }
+    }
+
+    return ret;
+}
+
+static int read_scanline(spng_ctx *ctx)
+{
+    int ret, pass = ctx->row_info.pass;
+    struct spng_row_info *ri = &ctx->row_info;
+    const struct spng_subimage *sub = ctx->subimage;
+    size_t scanline_width = sub[pass].scanline_width;
+    uint32_t scanline_idx = ri->scanline_idx;
+
+    uint8_t next_filter = 0;
+
+    if(scanline_idx == (sub[pass].height - 1) && ri->pass == ctx->last_pass)
+    {
+        ret = read_scanline_bytes(ctx, ctx->scanline, scanline_width - 1);
+    }
+    else
+    {
+        ret = read_scanline_bytes(ctx, ctx->scanline, scanline_width);
+        if(ret) return ret;
+
+        next_filter = ctx->scanline[scanline_width - 1];
+        if(next_filter > 4) ret = SPNG_EFILTER;
+    }
+
+    if(ret) return ret;
+
+    if(!scanline_idx && ri->filter > 1)
+    {
+        /* prev_scanline is all zeros for the first scanline */
+        memset(ctx->prev_scanline, 0, scanline_width);
+    }
+
+    if(ctx->ihdr.bit_depth == 16 && ctx->fmt != SPNG_FMT_RAW) u16_row_to_host(ctx->scanline, scanline_width - 1);
+
+    ret = defilter_scanline(ctx->prev_scanline, ctx->scanline, scanline_width, ctx->bytes_per_pixel, ri->filter);
+    if(ret) return ret;
+
+    ri->filter = next_filter;
+
+    return 0;
+}
+
+static int update_row_info(spng_ctx *ctx)
+{
+    int interlacing = ctx->ihdr.interlace_method;
+    struct spng_row_info *ri = &ctx->row_info;
+    const struct spng_subimage *sub = ctx->subimage;
+
+    if(ri->scanline_idx == (sub[ri->pass].height - 1)) /* Last scanline */
+    {
+        if(ri->pass == ctx->last_pass)
+        {
+            ctx->state = SPNG_STATE_EOI;
+
+            return SPNG_EOI;
+        }
+
+        ri->scanline_idx = 0;
+        ri->pass++;
+
+        /* Skip empty passes */
+        while( (!sub[ri->pass].width || !sub[ri->pass].height) && (ri->pass < ctx->last_pass)) ri->pass++;
+    }
+    else
+    {
+        ri->row_num++;
+        ri->scanline_idx++;
+    }
+
+    if(interlacing) ri->row_num = adam7_y_start[ri->pass] + ri->scanline_idx * adam7_y_delta[ri->pass];
+
+    return 0;
+}
+
+int spng_decode_scanline(spng_ctx *ctx, void *out, size_t len)
+{
+    if(ctx == NULL || out == NULL) return 1;
+
+    if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI;
+
+    struct decode_flags f = ctx->decode_flags;
+
+    struct spng_row_info *ri = &ctx->row_info;
+    const struct spng_subimage *sub = ctx->subimage;
+
+    const struct spng_ihdr *ihdr = &ctx->ihdr;
+    const uint16_t *gamma_lut = ctx->gamma_lut;
+    unsigned char *trns_px = ctx->trns_px;
+    const struct spng_sbit *sb = &ctx->decode_sb;
+    const struct spng_plte_entry *plte = ctx->decode_plte.rgba;
+    struct spng__iter iter = spng__iter_init(ihdr->bit_depth, ctx->scanline);
+
+    const unsigned char *scanline;
+
+    const int pass = ri->pass;
+    const int fmt = ctx->fmt;
+    const size_t scanline_width = sub[pass].scanline_width;
+    const uint32_t width = sub[pass].width;
+    uint32_t k;
+    uint8_t r_8, g_8, b_8, a_8, gray_8;
+    uint16_t r_16, g_16, b_16, a_16, gray_16;
+    r_8=0; g_8=0; b_8=0; a_8=0; gray_8=0;
+    r_16=0; g_16=0; b_16=0; a_16=0; gray_16=0;
+    size_t pixel_size = 4; /* SPNG_FMT_RGBA8 */
+    size_t pixel_offset = 0;
+    unsigned char *pixel;
+    unsigned processing_depth = ihdr->bit_depth;
+
+    if(f.indexed) processing_depth = 8;
+
+    if(fmt == SPNG_FMT_RGBA16) pixel_size = 8;
+    else if(fmt == SPNG_FMT_RGB8) pixel_size = 3;
+
+    if(len < sub[pass].out_width) return SPNG_EBUFSIZ;
+
+    int ret = read_scanline(ctx);
+
+    if(ret) return decode_err(ctx, ret);
+
+    scanline = ctx->scanline;
+
+    for(k=0; k < width; k++)
+    {
+        pixel = (unsigned char*)out + pixel_offset;
+        pixel_offset += pixel_size;
+
+        if(f.same_layout)
+        {
+            if(f.zerocopy) break;
+
+            memcpy(out, scanline, scanline_width - 1);
+            break;
+        }
+
+        if(f.unpack)
+        {
+            unpack_scanline(out, scanline, width, ihdr->bit_depth, fmt);
+            break;
+        }
+
+        if(ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR)
+        {
+            if(ihdr->bit_depth == 16)
+            {
+                memcpy(&r_16, scanline + (k * 6), 2);
+                memcpy(&g_16, scanline + (k * 6) + 2, 2);
+                memcpy(&b_16, scanline + (k * 6) + 4, 2);
+
+                a_16 = 65535;
+            }
+            else /* == 8 */
+            {
+                if(fmt == SPNG_FMT_RGBA8)
+                {
+                    rgb8_row_to_rgba8(scanline, out, width);
+                    break;
+                }
+
+                r_8 = scanline[k * 3];
+                g_8 = scanline[k * 3 + 1];
+                b_8 = scanline[k * 3 + 2];
+
+                a_8 = 255;
+            }
+        }
+        else if(ihdr->color_type == SPNG_COLOR_TYPE_INDEXED)
+        {
+            uint8_t entry = 0;
+
+            if(ihdr->bit_depth == 8)
+            {
+                if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGB8))
+                {
+                    expand_row(out, scanline, &ctx->decode_plte, width, fmt);
+                    break;
+                }
+
+                entry = scanline[k];
+            }
+            else /* < 8 */
+            {
+                entry = get_sample(&iter);
+            }
+
+            if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGB8))
+            {
+                pixel[0] = plte[entry].red;
+                pixel[1] = plte[entry].green;
+                pixel[2] = plte[entry].blue;
+                if(fmt == SPNG_FMT_RGBA8) pixel[3] = plte[entry].alpha;
+
+                continue;
+            }
+            else /* RGBA16 */
+            {
+                r_16 = plte[entry].red;
+                g_16 = plte[entry].green;
+                b_16 = plte[entry].blue;
+                a_16 = plte[entry].alpha;
+
+                r_16 = (r_16 << 8) | r_16;
+                g_16 = (g_16 << 8) | g_16;
+                b_16 = (b_16 << 8) | b_16;
+                a_16 = (a_16 << 8) | a_16;
+
+                memcpy(pixel, &r_16, 2);
+                memcpy(pixel + 2, &g_16, 2);
+                memcpy(pixel + 4, &b_16, 2);
+                memcpy(pixel + 6, &a_16, 2);
+
+                continue;
+            }
+        }
+        else if(ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR_ALPHA)
+        {
+            if(ihdr->bit_depth == 16)
+            {
+                memcpy(&r_16, scanline + (k * 8), 2);
+                memcpy(&g_16, scanline + (k * 8) + 2, 2);
+                memcpy(&b_16, scanline + (k * 8) + 4, 2);
+                memcpy(&a_16, scanline + (k * 8) + 6, 2);
+            }
+            else /* == 8 */
+            {
+                r_8 = scanline[k * 4];
+                g_8 = scanline[k * 4 + 1];
+                b_8 = scanline[k * 4 + 2];
+                a_8 = scanline[k * 4 + 3];
+            }
+        }
+        else if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE)
+        {
+            if(ihdr->bit_depth == 16)
+            {
+                memcpy(&gray_16, scanline + k * 2, 2);
+
+                if(f.apply_trns && ctx->trns.gray == gray_16) a_16 = 0;
+                else a_16 = 65535;
+
+                r_16 = gray_16;
+                g_16 = gray_16;
+                b_16 = gray_16;
+            }
+            else /* <= 8 */
+            {
+                gray_8 = get_sample(&iter);
+
+                if(f.apply_trns && ctx->trns.gray == gray_8) a_8 = 0;
+                else a_8 = 255;
+
+                r_8 = gray_8; g_8 = gray_8; b_8 = gray_8;
+            }
+        }
+        else if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE_ALPHA)
+        {
+            if(ihdr->bit_depth == 16)
+            {
+                memcpy(&gray_16, scanline + (k * 4), 2);
+                memcpy(&a_16, scanline + (k * 4) + 2, 2);
+
+                r_16 = gray_16;
+                g_16 = gray_16;
+                b_16 = gray_16;
+            }
+            else /* == 8 */
+            {
+                gray_8 = scanline[k * 2];
+                a_8 = scanline[k * 2 + 1];
+
+                r_8 = gray_8;
+                g_8 = gray_8;
+                b_8 = gray_8;
+            }
+        }
+
+
+        if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGB8))
+        {
+            if(ihdr->bit_depth == 16)
+            {
+                r_8 = r_16 >> 8;
+                g_8 = g_16 >> 8;
+                b_8 = b_16 >> 8;
+                a_8 = a_16 >> 8;
+            }
+
+            pixel[0] = r_8;
+            pixel[1] = g_8;
+            pixel[2] = b_8;
+
+            if(fmt == SPNG_FMT_RGBA8) pixel[3] = a_8;
+        }
+        else if(fmt == SPNG_FMT_RGBA16)
+        {
+            if(ihdr->bit_depth != 16)
+            {
+                r_16 = r_8;
+                g_16 = g_8;
+                b_16 = b_8;
+                a_16 = a_8;
+            }
+
+            memcpy(pixel, &r_16, 2);
+            memcpy(pixel + 2, &g_16, 2);
+            memcpy(pixel + 4, &b_16, 2);
+            memcpy(pixel + 6, &a_16, 2);
+        }
+    }/* for(k=0; k < width; k++) */
+
+    if(f.apply_trns) trns_row(out, scanline, trns_px, ctx->bytes_per_pixel, &ctx->ihdr, width, fmt);
+
+    if(f.do_scaling) scale_row(out, width, fmt, processing_depth, sb);
+
+    if(f.apply_gamma) gamma_correct_row(out, width, fmt, gamma_lut);
+
+    /* The previous scanline is always defiltered */
+    void *t = ctx->prev_scanline;
+    ctx->prev_scanline = ctx->scanline;
+    ctx->scanline = t;
+
+    ret = update_row_info(ctx);
+
+    if(ret == SPNG_EOI)
+    {
+        if(ctx->cur_chunk_bytes_left) /* zlib stream ended before an IDAT chunk boundary */
+        {/* Discard the rest of the chunk */
+            int error = discard_chunk_bytes(ctx, ctx->cur_chunk_bytes_left);
+            if(error) return decode_err(ctx, error);
+        }
+
+        ctx->last_idat = ctx->current_chunk;
+    }
+
+    return ret;
+}
+
+int spng_decode_row(spng_ctx *ctx, void *out, size_t len)
+{
+    if(ctx == NULL || out == NULL) return 1;
+    if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI;
+    if(len < ctx->image_width) return SPNG_EBUFSIZ;
+
+    const struct spng_ihdr *ihdr = &ctx->ihdr;
+    int ret, pass = ctx->row_info.pass;
+    unsigned char *outptr = out;
+
+    if(!ihdr->interlace_method || pass == 6) return spng_decode_scanline(ctx, out, len);
+
+    ret = spng_decode_scanline(ctx, ctx->row, ctx->image_width);
+    if(ret && ret != SPNG_EOI) return ret;
+
+    uint32_t k;
+    unsigned pixel_size = 4; /* RGBA8 */
+    if(ctx->fmt == SPNG_FMT_RGBA16) pixel_size = 8;
+    else if(ctx->fmt == SPNG_FMT_RGB8) pixel_size = 3;
+    else if(ctx->fmt == SPNG_FMT_G8) pixel_size = 1;
+    else if(ctx->fmt == SPNG_FMT_GA8) pixel_size = 2;
+    else if(ctx->fmt & (SPNG_FMT_PNG | SPNG_FMT_RAW))
+    {
+        if(ihdr->bit_depth < 8)
+        {
+            struct spng__iter iter = spng__iter_init(ihdr->bit_depth, ctx->row);
+            const uint8_t samples_per_byte = 8 / ihdr->bit_depth;
+            uint8_t sample;
+
+            for(k=0; k < ctx->subimage[pass].width; k++)
+            {
+                sample = get_sample(&iter);
+
+                size_t ioffset = adam7_x_start[pass] + k * adam7_x_delta[pass];
+
+                sample = sample << (iter.initial_shift - ioffset * ihdr->bit_depth % 8);
+
+                ioffset /= samples_per_byte;
+
+                outptr[ioffset] |= sample;
+            }
+
+            return 0;
+        }
+        else pixel_size = ctx->bytes_per_pixel;
+    }
+
+    for(k=0; k < ctx->subimage[pass].width; k++)
+    {
+        size_t ioffset = (adam7_x_start[pass] + (size_t) k * adam7_x_delta[pass]) * pixel_size;
+
+        memcpy(outptr + ioffset, ctx->row + k * pixel_size, pixel_size);
+    }
+
+    return 0;
+}
+
+int spng_decode_chunks(spng_ctx *ctx)
+{
+    if(ctx == NULL) return 1;
+    if(ctx->encode_only) return SPNG_ECTXTYPE;
+    if(ctx->state < SPNG_STATE_INPUT) return SPNG_ENOSRC;
+    if(ctx->state == SPNG_STATE_IEND) return 0;
+
+    return read_chunks(ctx, 0);
+}
+
+int spng_decode_image(spng_ctx *ctx, void *out, size_t len, int fmt, int flags)
+{
+    if(ctx == NULL) return 1;
+    if(ctx->encode_only) return SPNG_ECTXTYPE;
+    if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI;
+
+    const struct spng_ihdr *ihdr = &ctx->ihdr;
+
+    int ret = read_chunks(ctx, 0);
+    if(ret) return decode_err(ctx, ret);
+
+    ret = check_decode_fmt(ihdr, fmt);
+    if(ret) return ret;
+
+    ret = calculate_image_width(ihdr, fmt, &ctx->image_width);
+    if(ret) return decode_err(ctx, ret);
+
+    if(ctx->image_width > SIZE_MAX / ihdr->height) ctx->image_size = 0; /* overflow */
+    else ctx->image_size = ctx->image_width * ihdr->height;
+
+    if( !(flags & SPNG_DECODE_PROGRESSIVE) )
+    {
+        if(out == NULL) return 1;
+        if(!ctx->image_size) return SPNG_EOVERFLOW;
+        if(len < ctx->image_size) return SPNG_EBUFSIZ;
+    }
+
+    uint32_t bytes_read = 0;
+
+    ret = read_idat_bytes(ctx, &bytes_read);
+    if(ret) return decode_err(ctx, ret);
+
+    if(bytes_read > 1)
+    {
+        int valid = read_u16(ctx->data) % 31 ? 0 : 1;
+
+        unsigned flg = ctx->data[1];
+        unsigned flevel = flg >> 6;
+        int compression_level = Z_DEFAULT_COMPRESSION;
+
+        if(flevel == 0) compression_level = 0; /* fastest */
+        else if(flevel == 1) compression_level = 1; /* fast */
+        else if(flevel == 2) compression_level = 6; /* default */
+        else if(flevel == 3) compression_level = 9; /* slowest, max compression */
+
+        if(valid) ctx->image_options.compression_level = compression_level;
+    }
+
+    ret = spng__inflate_init(ctx, ctx->image_options.window_bits);
+    if(ret) return decode_err(ctx, ret);
+
+    ctx->zstream.avail_in = bytes_read;
+    ctx->zstream.next_in = ctx->data;
+
+    size_t scanline_buf_size = ctx->subimage[ctx->widest_pass].scanline_width;
+
+    scanline_buf_size += 32;
+
+    if(scanline_buf_size < 32) return SPNG_EOVERFLOW;
+
+    ctx->scanline_buf = spng__malloc(ctx, scanline_buf_size);
+    ctx->prev_scanline_buf = spng__malloc(ctx, scanline_buf_size);
+
+    ctx->scanline = ctx->scanline_buf;
+    ctx->prev_scanline = ctx->prev_scanline_buf;
+
+    struct decode_flags f = {0};
+
+    ctx->fmt = fmt;
+
+    if(ihdr->color_type == SPNG_COLOR_TYPE_INDEXED) f.indexed = 1;
+
+    unsigned processing_depth = ihdr->bit_depth;
+
+    if(f.indexed) processing_depth = 8;
+
+    if(ihdr->interlace_method)
+    {
+        f.interlaced = 1;
+        ctx->row_buf = spng__malloc(ctx, ctx->image_width);
+        ctx->row = ctx->row_buf;
+
+        if(ctx->row == NULL) return decode_err(ctx, SPNG_EMEM);
+    }
+
+    if(ctx->scanline == NULL || ctx->prev_scanline == NULL)
+    {
+        return decode_err(ctx, SPNG_EMEM);
+    }
+
+    f.do_scaling = 1;
+    if(f.indexed) f.do_scaling = 0;
+
+    unsigned depth_target = 8; /* FMT_RGBA8, G8 */
+    if(fmt == SPNG_FMT_RGBA16) depth_target = 16;
+
+    if(flags & SPNG_DECODE_TRNS && ctx->stored.trns) f.apply_trns = 1;
+    else flags &= ~SPNG_DECODE_TRNS;
+
+    if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE_ALPHA ||
+       ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR_ALPHA) flags &= ~SPNG_DECODE_TRNS;
+
+    if(flags & SPNG_DECODE_GAMMA && ctx->stored.gama) f.apply_gamma = 1;
+    else flags &= ~SPNG_DECODE_GAMMA;
+
+    if(flags & SPNG_DECODE_USE_SBIT && ctx->stored.sbit) f.use_sbit = 1;
+    else flags &= ~SPNG_DECODE_USE_SBIT;
+
+    if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGBA16))
+    {
+        if(ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR_ALPHA &&
+           ihdr->bit_depth == depth_target) f.same_layout = 1;
+    }
+    else if(fmt == SPNG_FMT_RGB8)
+    {
+        if(ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR &&
+           ihdr->bit_depth == depth_target) f.same_layout = 1;
+
+        f.apply_trns = 0; /* not applicable */
+    }
+    else if(fmt & (SPNG_FMT_PNG | SPNG_FMT_RAW))
+    {
+        f.same_layout = 1;
+        f.do_scaling = 0;
+        f.apply_gamma = 0; /* for now */
+        f.apply_trns = 0;
+    }
+    else if(fmt == SPNG_FMT_G8 && ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE && ihdr->bit_depth <= 8)
+    {
+        if(ihdr->bit_depth == depth_target) f.same_layout = 1;
+        else if(ihdr->bit_depth < 8) f.unpack = 1;
+
+        f.apply_trns = 0;
+    }
+    else if(fmt == SPNG_FMT_GA8 && ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE && ihdr->bit_depth <= 8)
+    {
+        if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE_ALPHA &&
+           ihdr->bit_depth == depth_target) f.same_layout = 1;
+        else if(ihdr->bit_depth <= 8) f.unpack = 1;
+    }
+    else if(fmt == SPNG_FMT_GA16 && ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE && ihdr->bit_depth == 16)
+    {
+        if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE_ALPHA &&
+           ihdr->bit_depth == depth_target) f.same_layout = 1;
+        else if(ihdr->bit_depth == 16) f.unpack = 1;
+    }
+
+    /*if(f.same_layout && !flags && !f.interlaced) f.zerocopy = 1;*/
+
+    uint16_t *gamma_lut = NULL;
+
+    if(f.apply_gamma)
+    {
+        float file_gamma = (float)ctx->gama / 100000.0f;
+        float max;
+
+        unsigned lut_entries;
+
+        if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGB8))
+        {
+            lut_entries = 256;
+            max = 255.0f;
+
+            gamma_lut = ctx->gamma_lut8;
+            ctx->gamma_lut = ctx->gamma_lut8;
+        }
+        else /* SPNG_FMT_RGBA16 */
+        {
+            lut_entries = 65536;
+            max = 65535.0f;
+
+            ctx->gamma_lut16 = spng__malloc(ctx, lut_entries * sizeof(uint16_t));
+            if(ctx->gamma_lut16 == NULL) return decode_err(ctx, SPNG_EMEM);
+
+            gamma_lut = ctx->gamma_lut16;
+            ctx->gamma_lut = ctx->gamma_lut16;
+        }
+
+        float screen_gamma = 2.2f;
+        float exponent = file_gamma * screen_gamma;
+
+        if(FP_ZERO == fpclassify(exponent)) return decode_err(ctx, SPNG_EGAMA);
+
+        exponent = 1.0f / exponent;
+
+        unsigned i;
+        for(i=0; i < lut_entries; i++)
+        {
+            float c = pow((float)i / max, exponent) * max;
+            if(c > max) c = max;
+
+            gamma_lut[i] = (uint16_t)c;
+        }
+    }
+
+    struct spng_sbit *sb = &ctx->decode_sb;
+
+    sb->red_bits = processing_depth;
+    sb->green_bits = processing_depth;
+    sb->blue_bits = processing_depth;
+    sb->alpha_bits = processing_depth;
+    sb->grayscale_bits = processing_depth;
+
+    if(f.use_sbit)
+    {
+        if(ihdr->color_type == 0)
+        {
+            sb->grayscale_bits = ctx->sbit.grayscale_bits;
+            sb->alpha_bits = ihdr->bit_depth;
+        }
+        else if(ihdr->color_type == 2 || ihdr->color_type == 3)
+        {
+            sb->red_bits = ctx->sbit.red_bits;
+            sb->green_bits = ctx->sbit.green_bits;
+            sb->blue_bits = ctx->sbit.blue_bits;
+            sb->alpha_bits = ihdr->bit_depth;
+        }
+        else if(ihdr->color_type == 4)
+        {
+            sb->grayscale_bits = ctx->sbit.grayscale_bits;
+            sb->alpha_bits = ctx->sbit.alpha_bits;
+        }
+        else /* == 6 */
+        {
+            sb->red_bits = ctx->sbit.red_bits;
+            sb->green_bits = ctx->sbit.green_bits;
+            sb->blue_bits = ctx->sbit.blue_bits;
+            sb->alpha_bits = ctx->sbit.alpha_bits;
+        }
+    }
+
+    if(ihdr->bit_depth == 16 && fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGB8))
+    {/* samples are scaled down by 8 bits in the decode loop */
+        sb->red_bits -= 8;
+        sb->green_bits -= 8;
+        sb->blue_bits -= 8;
+        sb->alpha_bits -= 8;
+        sb->grayscale_bits -= 8;
+
+        processing_depth = 8;
+    }
+
+    /* Prevent infinite loops in sample_to_target() */
+    if(!depth_target || depth_target > 16 ||
+       !processing_depth || processing_depth > 16 ||
+       !sb->grayscale_bits || sb->grayscale_bits > processing_depth ||
+       !sb->alpha_bits || sb->alpha_bits > processing_depth ||
+       !sb->red_bits || sb->red_bits > processing_depth ||
+       !sb->green_bits || sb->green_bits > processing_depth ||
+       !sb->blue_bits || sb->blue_bits > processing_depth)
+    {
+        return decode_err(ctx, SPNG_ESBIT);
+    }
+
+    if(sb->red_bits == sb->green_bits &&
+       sb->green_bits == sb->blue_bits &&
+       sb->blue_bits == sb->alpha_bits &&
+       sb->alpha_bits == processing_depth &&
+       processing_depth == depth_target) f.do_scaling = 0;
+
+    struct spng_plte_entry *plte = ctx->decode_plte.rgba;
+
+    /* Pre-process palette entries */
+    if(f.indexed)
+    {
+        uint8_t red, green, blue, alpha;
+
+        uint32_t i;
+        for(i=0; i < 256; i++)
+        {
+            if(f.apply_trns && i < ctx->trns.n_type3_entries)
+                ctx->plte.entries[i].alpha = ctx->trns.type3_alpha[i];
+            else
+                ctx->plte.entries[i].alpha = 255;
+
+            red   = sample_to_target(ctx->plte.entries[i].red, 8, sb->red_bits, 8);
+            green = sample_to_target(ctx->plte.entries[i].green, 8, sb->green_bits, 8);
+            blue  = sample_to_target(ctx->plte.entries[i].blue, 8, sb->blue_bits, 8);
+            alpha = sample_to_target(ctx->plte.entries[i].alpha, 8, sb->alpha_bits, 8);
+
+#if defined(SPNG_ARM)
+            if(fmt == SPNG_FMT_RGB8 && ihdr->bit_depth == 8)
+            {/* Working with 3 bytes at a time is more of an ARM thing */
+                ctx->decode_plte.rgb[i * 3 + 0] = red;
+                ctx->decode_plte.rgb[i * 3 + 1] = green;
+                ctx->decode_plte.rgb[i * 3 + 2] = blue;
+                continue;
+            }
+#endif
+            plte[i].red = red;
+            plte[i].green = green;
+            plte[i].blue = blue;
+            plte[i].alpha = alpha;
+        }
+
+        f.apply_trns = 0;
+    }
+
+    unsigned char *trns_px = ctx->trns_px;
+
+    if(f.apply_trns)
+    {
+        uint16_t mask = ~0;
+        if(ctx->ihdr.bit_depth < 16) mask = (1 << ctx->ihdr.bit_depth) - 1;
+
+        if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGBA16))
+        {
+            if(ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR)
+            {
+                if(ihdr->bit_depth == 16)
+                {
+                    memcpy(trns_px, &ctx->trns.red, 2);
+                    memcpy(trns_px + 2, &ctx->trns.green, 2);
+                    memcpy(trns_px + 4, &ctx->trns.blue, 2);
+                }
+                else
+                {
+                    trns_px[0] = ctx->trns.red & mask;
+                    trns_px[1] = ctx->trns.green & mask;
+                    trns_px[2] = ctx->trns.blue & mask;
+                }
+            }
+        }
+        else if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE) // fmt == SPNG_FMT_GA8 &&
+        {
+            if(ihdr->bit_depth == 16)
+            {
+                memcpy(trns_px, &ctx->trns.gray, 2);
+            }
+            else
+            {
+                trns_px[0] = ctx->trns.gray & mask;
+            }
+        }
+    }
+
+    ctx->decode_flags = f;
+
+    ctx->state = SPNG_STATE_DECODE_INIT;
+
+    struct spng_row_info *ri = &ctx->row_info;
+    struct spng_subimage *sub = ctx->subimage;
+
+    while(!sub[ri->pass].width || !sub[ri->pass].height) ri->pass++;
+
+    if(f.interlaced) ri->row_num = adam7_y_start[ri->pass];
+
+    unsigned pixel_size = 4; /* SPNG_FMT_RGBA8 */
+
+    if(fmt == SPNG_FMT_RGBA16) pixel_size = 8;
+    else if(fmt == SPNG_FMT_RGB8) pixel_size = 3;
+    else if(fmt == SPNG_FMT_G8) pixel_size = 1;
+    else if(fmt == SPNG_FMT_GA8) pixel_size = 2;
+
+    int i;
+    for(i=ri->pass; i <= ctx->last_pass; i++)
+    {
+        if(!sub[i].scanline_width) continue;
+
+        if(fmt & (SPNG_FMT_PNG | SPNG_FMT_RAW)) sub[i].out_width = sub[i].scanline_width - 1;
+        else sub[i].out_width = (size_t)sub[i].width * pixel_size;
+
+        if(sub[i].out_width > UINT32_MAX) return decode_err(ctx, SPNG_EOVERFLOW);
+    }
+
+    /* Read the first filter byte, offsetting all reads by 1 byte.
+    The scanlines will be aligned with the start of the array with
+    the next scanline's filter byte at the end,
+    the last scanline will end up being 1 byte "shorter". */
+    ret = read_scanline_bytes(ctx, &ri->filter, 1);
+    if(ret) return decode_err(ctx, ret);
+
+    if(ri->filter > 4) return decode_err(ctx, SPNG_EFILTER);
+
+    if(flags & SPNG_DECODE_PROGRESSIVE)
+    {
+        return 0;
+    }
+
+    do
+    {
+        size_t ioffset = ri->row_num * ctx->image_width;
+
+        ret = spng_decode_row(ctx, (unsigned char*)out + ioffset, ctx->image_width);
+    }while(!ret);
+
+    if(ret != SPNG_EOI) return decode_err(ctx, ret);
+
+    return 0;
+}
+
+int spng_get_row_info(spng_ctx *ctx, struct spng_row_info *row_info)
+{
+    if(ctx == NULL || row_info == NULL || ctx->state < SPNG_STATE_DECODE_INIT) return 1;
+
+    if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI;
+
+    *row_info = ctx->row_info;
+
+    return 0;
+}
+
+static int write_chunks_before_idat(spng_ctx *ctx)
+{
+    if(ctx == NULL) return SPNG_EINTERNAL;
+    if(!ctx->encode_only) return SPNG_EINTERNAL;
+    if(!ctx->stored.ihdr) return SPNG_EINTERNAL;
+
+    int ret;
+    uint32_t i;
+    size_t length;
+    const struct spng_ihdr *ihdr = &ctx->ihdr;
+    unsigned char *data = ctx->decode_plte.raw;
+
+    ret = write_data(ctx, spng_signature, 8);
+    if(ret) return ret;
+
+    write_u32(data,     ihdr->width);
+    write_u32(data + 4, ihdr->height);
+    data[8]  = ihdr->bit_depth;
+    data[9]  = ihdr->color_type;
+    data[10] = ihdr->compression_method;
+    data[11] = ihdr->filter_method;
+    data[12] = ihdr->interlace_method;
+
+    ret = write_chunk(ctx, type_ihdr, data, 13);
+    if(ret) return ret;
+
+    if(ctx->stored.chrm)
+    {
+        write_u32(data,      ctx->chrm_int.white_point_x);
+        write_u32(data + 4,  ctx->chrm_int.white_point_y);
+        write_u32(data + 8,  ctx->chrm_int.red_x);
+        write_u32(data + 12, ctx->chrm_int.red_y);
+        write_u32(data + 16, ctx->chrm_int.green_x);
+        write_u32(data + 20, ctx->chrm_int.green_y);
+        write_u32(data + 24, ctx->chrm_int.blue_x);
+        write_u32(data + 28, ctx->chrm_int.blue_y);
+
+        ret = write_chunk(ctx, type_chrm, data, 32);
+        if(ret) return ret;
+    }
+
+    if(ctx->stored.gama)
+    {
+        write_u32(data, ctx->gama);
+
+        ret = write_chunk(ctx, type_gama, data, 4);
+        if(ret) return ret;
+    }
+
+    if(ctx->stored.iccp)
+    {
+        uLongf dest_len = compressBound((uLong)ctx->iccp.profile_len);
+
+        Bytef *buf = spng__malloc(ctx, dest_len);
+        if(buf == NULL) return SPNG_EMEM;
+
+        ret = compress2(buf, &dest_len, (void*)ctx->iccp.profile, (uLong)ctx->iccp.profile_len, Z_DEFAULT_COMPRESSION);
+
+        if(ret != Z_OK)
+        {
+            spng__free(ctx, buf);
+            return SPNG_EZLIB;
+        }
+
+        size_t name_len = strlen(ctx->iccp.profile_name);
+
+        length = name_len + 2;
+        length += dest_len;
+
+        if(dest_len > length) return SPNG_EOVERFLOW;
+
+        unsigned char *cdata = NULL;
+
+        ret = write_header(ctx, type_iccp, length, &cdata);
+
+        if(ret)
+        {
+            spng__free(ctx, buf);
+            return ret;
+        }
+
+        memcpy(cdata, ctx->iccp.profile_name, name_len + 1);
+        cdata[name_len + 1] = 0; /* compression method */
+        memcpy(cdata + name_len + 2, buf, dest_len);
+
+        spng__free(ctx, buf);
+
+        ret = finish_chunk(ctx);
+        if(ret) return ret;
+    }
+
+    if(ctx->stored.sbit)
+    {
+        switch(ctx->ihdr.color_type)
+        {
+            case SPNG_COLOR_TYPE_GRAYSCALE:
+            {
+                length = 1;
+
+                data[0] = ctx->sbit.grayscale_bits;
+
+                break;
+            }
+            case SPNG_COLOR_TYPE_TRUECOLOR:
+            case SPNG_COLOR_TYPE_INDEXED:
+            {
+                length = 3;
+
+                data[0] = ctx->sbit.red_bits;
+                data[1] = ctx->sbit.green_bits;
+                data[2] = ctx->sbit.blue_bits;
+
+                break;
+            }
+            case SPNG_COLOR_TYPE_GRAYSCALE_ALPHA:
+            {
+                length = 2;
+
+                data[0] = ctx->sbit.grayscale_bits;
+                data[1] = ctx->sbit.alpha_bits;
+
+                break;
+            }
+            case SPNG_COLOR_TYPE_TRUECOLOR_ALPHA:
+            {
+                length = 4;
+
+                data[0] = ctx->sbit.red_bits;
+                data[1] = ctx->sbit.green_bits;
+                data[2] = ctx->sbit.blue_bits;
+                data[3] = ctx->sbit.alpha_bits;
+
+                break;
+            }
+            default: return SPNG_EINTERNAL;
+        }
+
+        ret = write_chunk(ctx, type_sbit, data, length);
+        if(ret) return ret;
+    }
+
+    if(ctx->stored.srgb)
+    {
+        ret = write_chunk(ctx, type_srgb, &ctx->srgb_rendering_intent, 1);
+        if(ret) return ret;
+    }
+
+    ret = write_unknown_chunks(ctx, SPNG_AFTER_IHDR);
+    if(ret) return ret;
+
+    if(ctx->stored.plte)
+    {
+        for(i=0; i < ctx->plte.n_entries; i++)
+        {
+            data[i * 3 + 0] = ctx->plte.entries[i].red;
+            data[i * 3 + 1] = ctx->plte.entries[i].green;
+            data[i * 3 + 2] = ctx->plte.entries[i].blue;
+        }
+
+        ret = write_chunk(ctx, type_plte, data, ctx->plte.n_entries * 3);
+        if(ret) return ret;
+    }
+
+    if(ctx->stored.bkgd)
+    {
+        switch(ctx->ihdr.color_type)
+        {
+            case SPNG_COLOR_TYPE_GRAYSCALE:
+            case SPNG_COLOR_TYPE_GRAYSCALE_ALPHA:
+            {
+                length = 2;
+
+                write_u16(data, ctx->bkgd.gray);
+
+                break;
+            }
+            case SPNG_COLOR_TYPE_TRUECOLOR:
+            case SPNG_COLOR_TYPE_TRUECOLOR_ALPHA:
+            {
+                length = 6;
+
+                write_u16(data,     ctx->bkgd.red);
+                write_u16(data + 2, ctx->bkgd.green);
+                write_u16(data + 4, ctx->bkgd.blue);
+
+                break;
+            }
+            case SPNG_COLOR_TYPE_INDEXED:
+            {
+                length = 1;
+
+                data[0] = ctx->bkgd.plte_index;
+
+                break;
+            }
+            default: return SPNG_EINTERNAL;
+        }
+
+        ret = write_chunk(ctx, type_bkgd, data, length);
+        if(ret) return ret;
+    }
+
+    if(ctx->stored.hist)
+    {
+        length = ctx->plte.n_entries * 2;
+
+        for(i=0; i < ctx->plte.n_entries; i++)
+        {
+            write_u16(data + i * 2, ctx->hist.frequency[i]);
+        }
+
+        ret = write_chunk(ctx, type_hist, data, length);
+        if(ret) return ret;
+    }
+
+    if(ctx->stored.trns)
+    {
+        if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_GRAYSCALE)
+        {
+            write_u16(data, ctx->trns.gray);
+
+            ret = write_chunk(ctx, type_trns, data, 2);
+        }
+        else if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_TRUECOLOR)
+        {
+            write_u16(data,     ctx->trns.red);
+            write_u16(data + 2, ctx->trns.green);
+            write_u16(data + 4, ctx->trns.blue);
+
+            ret = write_chunk(ctx, type_trns, data, 6);
+        }
+        else if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_INDEXED)
+        {
+            ret = write_chunk(ctx, type_trns, ctx->trns.type3_alpha, ctx->trns.n_type3_entries);
+        }
+
+        if(ret) return ret;
+    }
+
+    if(ctx->stored.phys)
+    {
+        write_u32(data,     ctx->phys.ppu_x);
+        write_u32(data + 4, ctx->phys.ppu_y);
+        data[8] = ctx->phys.unit_specifier;
+
+        ret = write_chunk(ctx, type_phys, data, 9);
+        if(ret) return ret;
+    }
+
+    if(ctx->stored.splt)
+    {
+        const struct spng_splt *splt;
+        unsigned char *cdata = NULL;
+
+        uint32_t k;
+        for(i=0; i < ctx->n_splt; i++)
+        {
+            splt = &ctx->splt_list[i];
+
+            size_t name_len = strlen(splt->name);
+            length = name_len + 1;
+
+            if(splt->sample_depth == 8) length += splt->n_entries * 6 + 1;
+            else if(splt->sample_depth == 16) length += splt->n_entries * 10 + 1;
+
+            ret = write_header(ctx, type_splt, length, &cdata);
+            if(ret) return ret;
+
+            memcpy(cdata, splt->name, name_len + 1);
+            cdata += name_len + 2;
+            cdata[-1] = splt->sample_depth;
+
+            if(splt->sample_depth == 8)
+            {
+                for(k=0; k < splt->n_entries; k++)
+                {
+                    cdata[k * 6 + 0] = splt->entries[k].red;
+                    cdata[k * 6 + 1] = splt->entries[k].green;
+                    cdata[k * 6 + 2] = splt->entries[k].blue;
+                    cdata[k * 6 + 3] = splt->entries[k].alpha;
+                    write_u16(cdata + k * 6 + 4, splt->entries[k].frequency);
+                }
+            }
+            else if(splt->sample_depth == 16)
+            {
+                for(k=0; k < splt->n_entries; k++)
+                {
+                    write_u16(cdata + k * 10 + 0, splt->entries[k].red);
+                    write_u16(cdata + k * 10 + 2, splt->entries[k].green);
+                    write_u16(cdata + k * 10 + 4, splt->entries[k].blue);
+                    write_u16(cdata + k * 10 + 6, splt->entries[k].alpha);
+                    write_u16(cdata + k * 10 + 8, splt->entries[k].frequency);
+                }
+            }
+
+            ret = finish_chunk(ctx);
+            if(ret) return ret;
+        }
+    }
+
+    if(ctx->stored.time)
+    {
+        write_u16(data, ctx->time.year);
+        data[2] = ctx->time.month;
+        data[3] = ctx->time.day;
+        data[4] = ctx->time.hour;
+        data[5] = ctx->time.minute;
+        data[6] = ctx->time.second;
+
+        ret = write_chunk(ctx, type_time, data, 7);
+        if(ret) return ret;
+    }
+
+    if(ctx->stored.text)
+    {
+        unsigned char *cdata = NULL;
+        const struct spng_text2 *text;
+        const uint8_t *text_type_array[4] = { 0, type_text, type_ztxt, type_itxt };
+
+        for(i=0; i < ctx->n_text; i++)
+        {
+            text = &ctx->text_list[i];
+
+            const uint8_t *text_chunk_type = text_type_array[text->type];
+            Bytef *compressed_text = NULL;
+            size_t keyword_len = 0;
+            size_t language_tag_len = 0;
+            size_t translated_keyword_len = 0;
+            size_t compressed_length = 0;
+            size_t text_length = 0;
+
+            keyword_len = strlen(text->keyword);
+            text_length = strlen(text->text);
+
+            length = keyword_len + 1;
+
+            if(text->type == SPNG_ZTXT)
+            {
+                length += 1; /* compression method */
+            }
+            else if(text->type == SPNG_ITXT)
+            {
+                if(!text->language_tag || !text->translated_keyword) return SPNG_EINTERNAL;
+
+                language_tag_len = strlen(text->language_tag);
+                translated_keyword_len = strlen(text->translated_keyword);
+
+                length += language_tag_len;
+                if(length < language_tag_len) return SPNG_EOVERFLOW;
+
+                length += translated_keyword_len;
+                if(length < translated_keyword_len) return SPNG_EOVERFLOW;
+
+                length += 4; /* compression flag + method + nul for the two strings */
+                if(length < 4) return SPNG_EOVERFLOW;
+            }
+
+            if(text->compression_flag)
+            {
+                ret = spng__deflate_init(ctx, &ctx->text_options);
+                if(ret) return ret;
+
+                z_stream *zstream = &ctx->zstream;
+                uLongf dest_len = deflateBound(zstream, (uLong)text_length);
+
+                compressed_text = spng__malloc(ctx, dest_len);
+
+                if(compressed_text == NULL) return SPNG_EMEM;
+
+                zstream->next_in = (void*)text->text;
+                zstream->avail_in = (uInt)text_length;
+
+                zstream->next_out = compressed_text;
+                zstream->avail_out = dest_len;
+
+                ret = deflate(zstream, Z_FINISH);
+
+                if(ret != Z_STREAM_END)
+                {
+                    spng__free(ctx, compressed_text);
+                    return SPNG_EZLIB;
+                }
+
+                compressed_length = zstream->total_out;
+
+                length += compressed_length;
+                if(length < compressed_length) return SPNG_EOVERFLOW;
+            }
+            else
+            {
+                text_length = strlen(text->text);
+
+                length += text_length;
+                if(length < text_length) return SPNG_EOVERFLOW;
+            }
+
+            ret = write_header(ctx, text_chunk_type, length, &cdata);
+            if(ret)
+            {
+                spng__free(ctx, compressed_text);
+                return ret;
+            }
+
+            memcpy(cdata, text->keyword, keyword_len + 1);
+            cdata += keyword_len + 1;
+
+            if(text->type == SPNG_ITXT)
+            {
+                cdata[0] = text->compression_flag;
+                cdata[1] = 0; /* compression method */
+                cdata += 2;
+
+                memcpy(cdata, text->language_tag, language_tag_len + 1);
+                cdata += language_tag_len + 1;
+
+                memcpy(cdata, text->translated_keyword, translated_keyword_len + 1);
+                cdata += translated_keyword_len + 1;
+            }
+            else if(text->type == SPNG_ZTXT)
+            {
+                cdata[0] = 0; /* compression method */
+                cdata++;
+            }
+
+            if(text->compression_flag) memcpy(cdata, compressed_text, compressed_length);
+            else memcpy(cdata, text->text, text_length);
+
+            spng__free(ctx, compressed_text);
+
+            ret = finish_chunk(ctx);
+            if(ret) return ret;
+        }
+    }
+
+    if(ctx->stored.offs)
+    {
+        write_s32(data,     ctx->offs.x);
+        write_s32(data + 4, ctx->offs.y);
+        data[8] = ctx->offs.unit_specifier;
+
+        ret = write_chunk(ctx, type_offs, data, 9);
+        if(ret) return ret;
+    }
+
+    if(ctx->stored.exif)
+    {
+        ret = write_chunk(ctx, type_exif, ctx->exif.data, ctx->exif.length);
+        if(ret) return ret;
+    }
+
+    ret = write_unknown_chunks(ctx, SPNG_AFTER_PLTE);
+    if(ret) return ret;
+
+    return 0;
+}
+
+static int write_chunks_after_idat(spng_ctx *ctx)
+{
+    if(ctx == NULL) return SPNG_EINTERNAL;
+
+    int ret = write_unknown_chunks(ctx, SPNG_AFTER_IDAT);
+    if(ret) return ret;
+
+    return write_iend(ctx);
+}
+
+/* Compress and write scanline to IDAT stream */
+static int write_idat_bytes(spng_ctx *ctx, const void *scanline, size_t len, int flush)
+{
+    if(ctx == NULL || scanline == NULL) return SPNG_EINTERNAL;
+    if(len > UINT_MAX) return SPNG_EINTERNAL;
+
+    int ret = 0;
+    unsigned char *data = NULL;
+    z_stream *zstream = &ctx->zstream;
+    uint32_t idat_length = SPNG_WRITE_SIZE;
+
+    zstream->next_in = scanline;
+    zstream->avail_in = (uInt)len;
+
+    do
+    {
+        ret = deflate(zstream, flush);
+
+        if(zstream->avail_out == 0)
+        {
+            ret = finish_chunk(ctx);
+            if(ret) return encode_err(ctx, ret);
+
+            ret = write_header(ctx, type_idat, idat_length, &data);
+            if(ret) return encode_err(ctx, ret);
+
+            zstream->next_out = data;
+            zstream->avail_out = idat_length;
+        }
+
+    }while(zstream->avail_in);
+
+    if(ret != Z_OK) return SPNG_EZLIB;
+
+    return 0;
+}
+
+static int finish_idat(spng_ctx *ctx)
+{
+    int ret = 0;
+    unsigned char *data = NULL;
+    z_stream *zstream = &ctx->zstream;
+    uint32_t idat_length = SPNG_WRITE_SIZE;
+
+    while(ret != Z_STREAM_END)
+    {
+        ret = deflate(zstream, Z_FINISH);
+
+        if(ret)
+        {
+            if(ret == Z_STREAM_END) break;
+
+            if(ret != Z_BUF_ERROR) return SPNG_EZLIB;
+        }
+
+        if(zstream->avail_out == 0)
+        {
+            ret = finish_chunk(ctx);
+            if(ret) return encode_err(ctx, ret);
+
+            ret = write_header(ctx, type_idat, idat_length, &data);
+            if(ret) return encode_err(ctx, ret);
+
+            zstream->next_out = data;
+            zstream->avail_out = idat_length;
+        }
+    }
+
+    uint32_t trimmed_length = idat_length - zstream->avail_out;
+
+    ret = trim_chunk(ctx, trimmed_length);
+    if(ret) return ret;
+
+    return finish_chunk(ctx);
+}
+
+static int encode_scanline(spng_ctx *ctx, const void *scanline, size_t len)
+{
+    if(ctx == NULL || scanline == NULL) return SPNG_EINTERNAL;
+
+    int ret, pass = ctx->row_info.pass;
+    uint8_t filter = 0;
+    struct spng_row_info *ri = &ctx->row_info;
+    const struct spng_subimage *sub = ctx->subimage;
+    struct encode_flags f = ctx->encode_flags;
+    unsigned char *filtered_scanline = ctx->filtered_scanline;
+    size_t scanline_width = sub[pass].scanline_width;
+
+    if(len < scanline_width - 1) return SPNG_EINTERNAL;
+
+    /* encode_row() interlaces directly to ctx->scanline */
+    if(scanline != ctx->scanline) memcpy(ctx->scanline, scanline, scanline_width - 1);
+
+    if(f.to_bigendian) u16_row_to_bigendian(ctx->scanline, scanline_width - 1);
+    const int requires_previous = f.filter_choice & (SPNG_FILTER_CHOICE_UP | SPNG_FILTER_CHOICE_AVG | SPNG_FILTER_CHOICE_PAETH);
+
+    /* XXX: exclude 'requires_previous' filters by default for first scanline? */
+    if(!ri->scanline_idx && requires_previous)
+    {
+        /* prev_scanline is all zeros for the first scanline */
+        memset(ctx->prev_scanline, 0, scanline_width);
+    }
+
+    filter = get_best_filter(ctx->prev_scanline, ctx->scanline, scanline_width, ctx->bytes_per_pixel, f.filter_choice);
+
+    if(!filter) filtered_scanline = ctx->scanline;
+
+    filtered_scanline[-1] = filter;
+
+    if(filter)
+    {
+        ret = filter_scanline(filtered_scanline, ctx->prev_scanline, ctx->scanline, scanline_width, ctx->bytes_per_pixel, filter);
+        if(ret) return encode_err(ctx, ret);
+    }
+
+    ret = write_idat_bytes(ctx, filtered_scanline - 1, scanline_width, Z_NO_FLUSH);
+    if(ret) return encode_err(ctx, ret);
+
+    /* The previous scanline is always unfiltered */
+    void *t = ctx->prev_scanline;
+    ctx->prev_scanline = ctx->scanline;
+    ctx->scanline = t;
+
+    ret = update_row_info(ctx);
+
+    if(ret == SPNG_EOI)
+    {
+        int error = finish_idat(ctx);
+        if(error) encode_err(ctx, error);
+
+        if(f.finalize)
+        {
+            error = spng_encode_chunks(ctx);
+            if(error) return encode_err(ctx, error);
+        }
+    }
+
+    return ret;
+}
+
+static int encode_row(spng_ctx *ctx, const void *row, size_t len)
+{
+    if(ctx == NULL || row == NULL) return SPNG_EINTERNAL;
+
+    const int pass = ctx->row_info.pass;
+
+    if(!ctx->ihdr.interlace_method || pass == 6) return encode_scanline(ctx, row, len);
+
+    uint32_t k;
+    const unsigned pixel_size = ctx->pixel_size;
+    const unsigned bit_depth = ctx->ihdr.bit_depth;
+
+    if(bit_depth < 8)
+    {
+        const unsigned samples_per_byte = 8 / bit_depth;
+        const uint8_t mask = (1 << bit_depth) - 1;
+        const unsigned initial_shift = 8 - bit_depth;
+        unsigned shift_amount = initial_shift;
+
+        unsigned char *scanline = ctx->scanline;
+        const unsigned char *row_uc = row;
+        uint8_t sample;
+
+        memset(scanline, 0, ctx->subimage[pass].scanline_width);
+
+        for(k=0; k < ctx->subimage[pass].width; k++)
+        {
+            size_t ioffset = adam7_x_start[pass] + k * adam7_x_delta[pass];
+
+            sample = row_uc[ioffset / samples_per_byte];
+
+            sample = sample >> (initial_shift - ioffset * bit_depth % 8);
+            sample = sample & mask;
+            sample = sample << shift_amount;
+
+            scanline[0] |= sample;
+
+            shift_amount -= bit_depth;
+
+            if(shift_amount > 7)
+            {
+                shift_amount = initial_shift;
+                scanline++;
+            }
+        }
+
+        return encode_scanline(ctx, ctx->scanline, len);
+    }
+
+    for(k=0; k < ctx->subimage[pass].width; k++)
+    {
+        size_t ioffset = (adam7_x_start[pass] + (size_t) k * adam7_x_delta[pass]) * pixel_size;
+
+        memcpy(ctx->scanline + k * pixel_size, (unsigned char*)row + ioffset, pixel_size);
+    }
+
+    return encode_scanline(ctx, ctx->scanline, len);
+}
+
+int spng_encode_scanline(spng_ctx *ctx, const void *scanline, size_t len)
+{
+    if(ctx == NULL || scanline == NULL) return SPNG_EINVAL;
+    if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI;
+    if(len < (ctx->subimage[ctx->row_info.pass].scanline_width -1) ) return SPNG_EBUFSIZ;
+
+    return encode_scanline(ctx, scanline, len);
+}
+
+int spng_encode_row(spng_ctx *ctx, const void *row, size_t len)
+{
+    if(ctx == NULL || row == NULL) return SPNG_EINVAL;
+    if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI;
+    if(len < ctx->image_width) return SPNG_EBUFSIZ;
+
+    return encode_row(ctx, row, len);
+}
+
+int spng_encode_chunks(spng_ctx *ctx)
+{
+    if(ctx == NULL) return 1;
+    if(!ctx->state) return SPNG_EBADSTATE;
+    if(ctx->state < SPNG_STATE_OUTPUT) return SPNG_ENODST;
+    if(!ctx->encode_only) return SPNG_ECTXTYPE;
+
+    int ret = 0;
+
+    if(ctx->state < SPNG_STATE_FIRST_IDAT)
+    {
+        if(!ctx->stored.ihdr) return SPNG_ENOIHDR;
+
+        ret = write_chunks_before_idat(ctx);
+        if(ret) return encode_err(ctx, ret);
+
+        ctx->state = SPNG_STATE_FIRST_IDAT;
+    }
+    else if(ctx->state == SPNG_STATE_FIRST_IDAT)
+    {
+        return 0;
+    }
+    else if(ctx->state == SPNG_STATE_EOI)
+    {
+        ret = write_chunks_after_idat(ctx);
+        if(ret) return encode_err(ctx, ret);
+
+        ctx->state = SPNG_STATE_IEND;
+    }
+    else return SPNG_EOPSTATE;
+
+    return 0;
+}
+
+int spng_encode_image(spng_ctx *ctx, const void *img, size_t len, int fmt, int flags)
+{
+    if(ctx == NULL) return 1;
+    if(!ctx->state) return SPNG_EBADSTATE;
+    if(!ctx->encode_only) return SPNG_ECTXTYPE;
+    if(!ctx->stored.ihdr) return SPNG_ENOIHDR;
+    if( !(fmt == SPNG_FMT_PNG || fmt == SPNG_FMT_RAW) ) return SPNG_EFMT;
+
+    int ret = 0;
+    const struct spng_ihdr *ihdr = &ctx->ihdr;
+    struct encode_flags *encode_flags = &ctx->encode_flags;
+
+    if(ihdr->color_type == SPNG_COLOR_TYPE_INDEXED && !ctx->stored.plte) return SPNG_ENOPLTE;
+
+    ret = calculate_image_width(ihdr, fmt, &ctx->image_width);
+    if(ret) return encode_err(ctx, ret);
+
+    if(ctx->image_width > SIZE_MAX / ihdr->height) ctx->image_size = 0; /* overflow */
+    else ctx->image_size = ctx->image_width * ihdr->height;
+
+    if( !(flags & SPNG_ENCODE_PROGRESSIVE) )
+    {
+        if(img == NULL) return 1;
+        if(!ctx->image_size) return SPNG_EOVERFLOW;
+        if(len != ctx->image_size) return SPNG_EBUFSIZ;
+    }
+
+    ret = spng_encode_chunks(ctx);
+    if(ret) return encode_err(ctx, ret);
+
+    ret = calculate_subimages(ctx);
+    if(ret) return encode_err(ctx, ret);
+
+    if(ihdr->bit_depth < 8) ctx->bytes_per_pixel = 1;
+    else ctx->bytes_per_pixel = num_channels(ihdr) * (ihdr->bit_depth / 8);
+
+    if(spng__optimize(SPNG_FILTER_CHOICE))
+    {
+        /* Filtering would make no difference */
+        if(!ctx->image_options.compression_level)
+        {
+            encode_flags->filter_choice = SPNG_DISABLE_FILTERING;
+        }
+
+        /* Palette indices and low bit-depth images do not benefit from filtering */
+        if(ihdr->color_type == SPNG_COLOR_TYPE_INDEXED || ihdr->bit_depth < 8)
+        {
+            encode_flags->filter_choice = SPNG_DISABLE_FILTERING;
+        }
+    }
+
+    /* This is technically the same as disabling filtering */
+    if(encode_flags->filter_choice == SPNG_FILTER_CHOICE_NONE)
+    {
+        encode_flags->filter_choice = SPNG_DISABLE_FILTERING;
+    }
+
+    if(!encode_flags->filter_choice && spng__optimize(SPNG_IMG_COMPRESSION_STRATEGY))
+    {
+        ctx->image_options.strategy = Z_DEFAULT_STRATEGY;
+    }
+
+    ret = spng__deflate_init(ctx, &ctx->image_options);
+    if(ret) return encode_err(ctx, ret);
+
+    size_t scanline_buf_size = ctx->subimage[ctx->widest_pass].scanline_width;
+
+    scanline_buf_size += 32;
+
+    if(scanline_buf_size < 32) return SPNG_EOVERFLOW;
+
+    ctx->scanline_buf = spng__malloc(ctx, scanline_buf_size);
+    ctx->prev_scanline_buf = spng__malloc(ctx, scanline_buf_size);
+
+    if(ctx->scanline_buf == NULL || ctx->prev_scanline_buf == NULL) return encode_err(ctx, SPNG_EMEM);
+
+    /* Maintain alignment for pixels, filter at [-1] */
+    ctx->scanline = ctx->scanline_buf + 16;
+    ctx->prev_scanline = ctx->prev_scanline_buf + 16;
+
+    if(encode_flags->filter_choice)
+    {
+        ctx->filtered_scanline_buf = spng__malloc(ctx, scanline_buf_size);
+        if(ctx->filtered_scanline_buf == NULL) return encode_err(ctx, SPNG_EMEM);
+
+        ctx->filtered_scanline = ctx->filtered_scanline_buf + 16;
+    }
+
+    struct spng_subimage *sub = ctx->subimage;
+    struct spng_row_info *ri = &ctx->row_info;
+
+    ctx->fmt = fmt;
+
+    z_stream *zstream = &ctx->zstream;
+    zstream->avail_out = SPNG_WRITE_SIZE;
+
+    ret = write_header(ctx, type_idat, zstream->avail_out, &zstream->next_out);
+    if(ret) return encode_err(ctx, ret);
+
+    if(ihdr->interlace_method) encode_flags->interlace = 1;
+
+    if(fmt & (SPNG_FMT_PNG | SPNG_FMT_RAW) ) encode_flags->same_layout = 1;
+
+    if(ihdr->bit_depth == 16 && fmt != SPNG_FMT_RAW) encode_flags->to_bigendian = 1;
+
+    if(flags & SPNG_ENCODE_FINALIZE) encode_flags->finalize = 1;
+
+    while(!sub[ri->pass].width || !sub[ri->pass].height) ri->pass++;
+
+    if(encode_flags->interlace) ri->row_num = adam7_y_start[ri->pass];
+
+    ctx->pixel_size = 4; /* SPNG_FMT_RGBA8 */
+
+    if(fmt == SPNG_FMT_RGBA16) ctx->pixel_size = 8;
+    else if(fmt == SPNG_FMT_RGB8) ctx->pixel_size = 3;
+    else if(fmt == SPNG_FMT_G8) ctx->pixel_size = 1;
+    else if(fmt == SPNG_FMT_GA8) ctx->pixel_size = 2;
+    else if(fmt & (SPNG_FMT_PNG | SPNG_FMT_RAW)) ctx->pixel_size = ctx->bytes_per_pixel;
+
+    ctx->state = SPNG_STATE_ENCODE_INIT;
+
+    if(flags & SPNG_ENCODE_PROGRESSIVE)
+    {
+        encode_flags->progressive = 1;
+
+        return 0;
+    }
+
+    do
+    {
+        size_t ioffset = ri->row_num * ctx->image_width;
+
+        ret = encode_row(ctx, (unsigned char*)img + ioffset, ctx->image_width);
+
+    }while(!ret);
+
+    if(ret != SPNG_EOI) return encode_err(ctx, ret);
+
+    return 0;
+}
+
+spng_ctx *spng_ctx_new(int flags)
+{
+    struct spng_alloc alloc =
+    {
+        .malloc_fn = malloc,
+        .realloc_fn = realloc,
+        .calloc_fn = calloc,
+        .free_fn = free
+    };
+
+    return spng_ctx_new2(&alloc, flags);
+}
+
+spng_ctx *spng_ctx_new2(struct spng_alloc *alloc, int flags)
+{
+    if(alloc == NULL) return NULL;
+    if(flags != (flags & SPNG__CTX_FLAGS_ALL)) return NULL;
+
+    if(alloc->malloc_fn == NULL) return NULL;
+    if(alloc->realloc_fn == NULL) return NULL;
+    if(alloc->calloc_fn == NULL) return NULL;
+    if(alloc->free_fn == NULL) return NULL;
+
+    spng_ctx *ctx = alloc->calloc_fn(1, sizeof(spng_ctx));
+    if(ctx == NULL) return NULL;
+
+    ctx->alloc = *alloc;
+
+    ctx->max_width = spng_u32max;
+    ctx->max_height = spng_u32max;
+
+    ctx->max_chunk_size = spng_u32max;
+    ctx->chunk_cache_limit = SIZE_MAX;
+    ctx->chunk_count_limit = SPNG_MAX_CHUNK_COUNT;
+
+    ctx->state = SPNG_STATE_INIT;
+
+    ctx->crc_action_critical = SPNG_CRC_ERROR;
+    ctx->crc_action_ancillary = SPNG_CRC_DISCARD;
+
+    const struct spng__zlib_options image_defaults =
+    {
+        .compression_level = Z_DEFAULT_COMPRESSION,
+        .window_bits = 15,
+        .mem_level = 8,
+        .strategy = Z_FILTERED,
+        .data_type = 0 /* Z_BINARY */
+    };
+
+    const struct spng__zlib_options text_defaults =
+    {
+        .compression_level = Z_DEFAULT_COMPRESSION,
+        .window_bits = 15,
+        .mem_level = 8,
+        .strategy = Z_DEFAULT_STRATEGY,
+        .data_type = 1 /* Z_TEXT */
+    };
+
+    ctx->image_options = image_defaults;
+    ctx->text_options = text_defaults;
+
+    ctx->optimize_option = ~0;
+    ctx->encode_flags.filter_choice = SPNG_FILTER_CHOICE_ALL;
+
+    ctx->flags = flags;
+
+    if(flags & SPNG_CTX_ENCODER) ctx->encode_only = 1;
+
+    return ctx;
+}
+
+void spng_ctx_free(spng_ctx *ctx)
+{
+    if(ctx == NULL) return;
+
+    if(ctx->streaming && ctx->stream_buf != NULL) spng__free(ctx, ctx->stream_buf);
+
+    if(!ctx->user.exif) spng__free(ctx, ctx->exif.data);
+
+    if(!ctx->user.iccp) spng__free(ctx, ctx->iccp.profile);
+
+    uint32_t i;
+
+    if(ctx->splt_list != NULL && !ctx->user.splt)
+    {
+        for(i=0; i < ctx->n_splt; i++)
+        {
+            spng__free(ctx, ctx->splt_list[i].entries);
+        }
+        spng__free(ctx, ctx->splt_list);
+    }
+
+    if(ctx->text_list != NULL)
+    {
+        for(i=0; i< ctx->n_text; i++)
+        {
+            if(ctx->user.text) break;
+
+            spng__free(ctx, ctx->text_list[i].keyword);
+            if(ctx->text_list[i].compression_flag) spng__free(ctx, ctx->text_list[i].text);
+        }
+        spng__free(ctx, ctx->text_list);
+    }
+
+    if(ctx->chunk_list != NULL && !ctx->user.unknown)
+    {
+        for(i=0; i< ctx->n_chunks; i++)
+        {
+            spng__free(ctx, ctx->chunk_list[i].data);
+        }
+        spng__free(ctx, ctx->chunk_list);
+    }
+
+    if(ctx->deflate) deflateEnd(&ctx->zstream);
+    else inflateEnd(&ctx->zstream);
+
+    if(!ctx->user_owns_out_png) spng__free(ctx, ctx->out_png);
+
+    spng__free(ctx, ctx->gamma_lut16);
+
+    spng__free(ctx, ctx->row_buf);
+    spng__free(ctx, ctx->scanline_buf);
+    spng__free(ctx, ctx->prev_scanline_buf);
+    spng__free(ctx, ctx->filtered_scanline_buf);
+
+    spng_free_fn *free_fn = ctx->alloc.free_fn;
+
+    memset(ctx, 0, sizeof(spng_ctx));
+
+    free_fn(ctx);
+}
+
+static int buffer_read_fn(spng_ctx *ctx, void *user, void *data, size_t n)
+{
+    if(n > ctx->bytes_left) return SPNG_IO_EOF;
+
+    (void)user;
+    (void)data;
+    ctx->data = ctx->data + ctx->last_read_size;
+
+    ctx->last_read_size = n;
+    ctx->bytes_left -= n;
+
+    return 0;
+}
+
+static int file_read_fn(spng_ctx *ctx, void *user, void *data, size_t n)
+{
+    FILE *file = user;
+    (void)ctx;
+
+    if(fread(data, n, 1, file) != 1)
+    {
+        if(feof(file)) return SPNG_IO_EOF;
+        else return SPNG_IO_ERROR;
+    }
+
+    return 0;
+}
+
+static int file_write_fn(spng_ctx *ctx, void *user, void *data, size_t n)
+{
+    FILE *file = user;
+    (void)ctx;
+
+    if(fwrite(data, n, 1, file) != 1) return SPNG_IO_ERROR;
+
+    return 0;
+}
+
+int spng_set_png_buffer(spng_ctx *ctx, const void *buf, size_t size)
+{
+    if(ctx == NULL || buf == NULL) return 1;
+    if(!ctx->state) return SPNG_EBADSTATE;
+    if(ctx->encode_only) return SPNG_ECTXTYPE; /* not supported */
+
+    if(ctx->data != NULL) return SPNG_EBUF_SET;
+
+    ctx->data = buf;
+    ctx->png_base = buf;
+    ctx->data_size = size;
+    ctx->bytes_left = size;
+
+    ctx->read_fn = buffer_read_fn;
+
+    ctx->state = SPNG_STATE_INPUT;
+
+    return 0;
+}
+
+int spng_set_png_stream(spng_ctx *ctx, spng_rw_fn *rw_func, void *user)
+{
+    if(ctx == NULL || rw_func == NULL) return 1;
+    if(!ctx->state) return SPNG_EBADSTATE;
+
+    /* SPNG_STATE_OUTPUT shares the same value */
+    if(ctx->state >= SPNG_STATE_INPUT) return SPNG_EBUF_SET;
+
+    if(ctx->encode_only)
+    {
+        if(ctx->out_png != NULL) return SPNG_EBUF_SET;
+
+        ctx->write_fn = rw_func;
+        ctx->write_ptr = ctx->stream_buf;
+
+        ctx->state = SPNG_STATE_OUTPUT;
+    }
+    else
+    {
+        ctx->stream_buf = spng__malloc(ctx, SPNG_READ_SIZE);
+        if(ctx->stream_buf == NULL) return SPNG_EMEM;
+
+        ctx->read_fn = rw_func;
+        ctx->data = ctx->stream_buf;
+        ctx->data_size = SPNG_READ_SIZE;
+
+        ctx->state = SPNG_STATE_INPUT;
+    }
+
+    ctx->stream_user_ptr = user;
+
+    ctx->streaming = 1;
+
+    return 0;
+}
+
+int spng_set_png_file(spng_ctx *ctx, FILE *file)
+{
+    if(file == NULL) return 1;
+
+    if(ctx->encode_only) return spng_set_png_stream(ctx, file_write_fn, file);
+
+    return spng_set_png_stream(ctx, file_read_fn, file);
+}
+
+void *spng_get_png_buffer(spng_ctx *ctx, size_t *len, int *error)
+{
+    int tmp = 0;
+    error = error ? error : &tmp;
+    *error = 0;
+
+    if(ctx == NULL || !len) *error = SPNG_EINVAL;
+
+    if(*error) return NULL;
+
+    if(!ctx->encode_only) *error = SPNG_ECTXTYPE;
+    else if(!ctx->state) *error = SPNG_EBADSTATE;
+    else if(!ctx->internal_buffer) *error = SPNG_EOPSTATE;
+    else if(ctx->state < SPNG_STATE_EOI) *error = SPNG_EOPSTATE;
+    else if(ctx->state != SPNG_STATE_IEND) *error = SPNG_ENOTFINAL;
+
+    if(*error) return NULL;
+
+    ctx->user_owns_out_png = 1;
+
+    *len = ctx->bytes_encoded;
+
+    return ctx->out_png;
+}
+
+int spng_set_image_limits(spng_ctx *ctx, uint32_t width, uint32_t height)
+{
+    if(ctx == NULL) return 1;
+
+    if(width > spng_u32max || height > spng_u32max) return 1;
+
+    ctx->max_width = width;
+    ctx->max_height = height;
+
+    return 0;
+}
+
+int spng_get_image_limits(spng_ctx *ctx, uint32_t *width, uint32_t *height)
+{
+    if(ctx == NULL || width == NULL || height == NULL) return 1;
+
+    *width = ctx->max_width;
+    *height = ctx->max_height;
+
+    return 0;
+}
+
+int spng_set_chunk_limits(spng_ctx *ctx, size_t chunk_size, size_t cache_limit)
+{
+    if(ctx == NULL || chunk_size > spng_u32max || chunk_size > cache_limit) return 1;
+
+    ctx->max_chunk_size = chunk_size;
+
+    ctx->chunk_cache_limit = cache_limit;
+
+    return 0;
+}
+
+int spng_get_chunk_limits(spng_ctx *ctx, size_t *chunk_size, size_t *cache_limit)
+{
+    if(ctx == NULL || chunk_size == NULL || cache_limit == NULL) return 1;
+
+    *chunk_size = ctx->max_chunk_size;
+
+    *cache_limit = ctx->chunk_cache_limit;
+
+    return 0;
+}
+
+int spng_set_crc_action(spng_ctx *ctx, int critical, int ancillary)
+{
+    if(ctx == NULL) return 1;
+    if(ctx->encode_only) return SPNG_ECTXTYPE;
+
+    if(critical > 2 || critical < 0) return 1;
+    if(ancillary > 2 || ancillary < 0) return 1;
+
+    if(critical == SPNG_CRC_DISCARD) return 1;
+
+    ctx->crc_action_critical = critical;
+    ctx->crc_action_ancillary = ancillary;
+
+    return 0;
+}
+
+int spng_set_option(spng_ctx *ctx, enum spng_option option, int value)
+{
+    if(ctx == NULL) return 1;
+    if(!ctx->state) return SPNG_EBADSTATE;
+
+    switch(option)
+    {
+        case SPNG_KEEP_UNKNOWN_CHUNKS:
+        {
+            ctx->keep_unknown = value ? 1 : 0;
+            break;
+        }
+        case SPNG_IMG_COMPRESSION_LEVEL:
+        {
+            ctx->image_options.compression_level = value;
+            break;
+        }
+        case SPNG_IMG_WINDOW_BITS:
+        {
+            ctx->image_options.window_bits = value;
+            break;
+        }
+        case SPNG_IMG_MEM_LEVEL:
+        {
+            ctx->image_options.mem_level = value;
+            break;
+        }
+        case SPNG_IMG_COMPRESSION_STRATEGY:
+        {
+            ctx->image_options.strategy = value;
+            break;
+        }
+        case SPNG_TEXT_COMPRESSION_LEVEL:
+        {
+            ctx->text_options.compression_level = value;
+            break;
+        }
+        case SPNG_TEXT_WINDOW_BITS:
+        {
+            ctx->text_options.window_bits = value;
+            break;
+        }
+        case SPNG_TEXT_MEM_LEVEL:
+        {
+            ctx->text_options.mem_level = value;
+            break;
+        }
+        case SPNG_TEXT_COMPRESSION_STRATEGY:
+        {
+            ctx->text_options.strategy = value;
+            break;
+        }
+        case SPNG_FILTER_CHOICE:
+        {
+            if(value & ~SPNG_FILTER_CHOICE_ALL) return 1;
+            ctx->encode_flags.filter_choice = value;
+            break;
+        }
+        case SPNG_CHUNK_COUNT_LIMIT:
+        {
+            if(value < 0) return 1;
+            if(value > (int)ctx->chunk_count_total) return 1;
+            ctx->chunk_count_limit = value;
+            break;
+        }
+        case SPNG_ENCODE_TO_BUFFER:
+        {
+            if(value < 0) return 1;
+            if(!ctx->encode_only) return SPNG_ECTXTYPE;
+            if(ctx->state >= SPNG_STATE_OUTPUT) return SPNG_EOPSTATE;
+
+            if(!value) break;
+
+            ctx->internal_buffer = 1;
+            ctx->state = SPNG_STATE_OUTPUT;
+
+            break;
+        }
+        default: return 1;
+    }
+
+    /* Option can no longer be overriden by the library */
+    if(option < 32) ctx->optimize_option &= ~(1 << option);
+
+    return 0;
+}
+
+int spng_get_option(spng_ctx *ctx, enum spng_option option, int *value)
+{
+    if(ctx == NULL || value == NULL) return 1;
+    if(!ctx->state) return SPNG_EBADSTATE;
+
+    switch(option)
+    {
+        case SPNG_KEEP_UNKNOWN_CHUNKS:
+        {
+            *value = ctx->keep_unknown;
+            break;
+        }
+        case SPNG_IMG_COMPRESSION_LEVEL:
+        {
+            *value = ctx->image_options.compression_level;
+            break;
+        }
+            case SPNG_IMG_WINDOW_BITS:
+        {
+            *value = ctx->image_options.window_bits;
+            break;
+        }
+        case SPNG_IMG_MEM_LEVEL:
+        {
+            *value = ctx->image_options.mem_level;
+            break;
+        }
+        case SPNG_IMG_COMPRESSION_STRATEGY:
+        {
+            *value = ctx->image_options.strategy;
+            break;
+        }
+        case SPNG_TEXT_COMPRESSION_LEVEL:
+        {
+            *value = ctx->text_options.compression_level;
+            break;
+        }
+            case SPNG_TEXT_WINDOW_BITS:
+        {
+            *value = ctx->text_options.window_bits;
+            break;
+        }
+        case SPNG_TEXT_MEM_LEVEL:
+        {
+            *value = ctx->text_options.mem_level;
+            break;
+        }
+        case SPNG_TEXT_COMPRESSION_STRATEGY:
+        {
+            *value = ctx->text_options.strategy;
+            break;
+        }
+        case SPNG_FILTER_CHOICE:
+        {
+            *value = ctx->encode_flags.filter_choice;
+            break;
+        }
+        case SPNG_CHUNK_COUNT_LIMIT:
+        {
+            *value = ctx->chunk_count_limit;
+            break;
+        }
+        case SPNG_ENCODE_TO_BUFFER:
+        {
+            if(ctx->internal_buffer) *value = 1;
+            else *value = 0;
+
+            break;
+        }
+        default: return 1;
+    }
+
+    return 0;
+}
+
+int spng_decoded_image_size(spng_ctx *ctx, int fmt, size_t *len)
+{
+    if(ctx == NULL || len == NULL) return 1;
+
+    int ret = read_chunks(ctx, 1);
+    if(ret) return ret;
+
+    ret = check_decode_fmt(&ctx->ihdr, fmt);
+    if(ret) return ret;
+
+    return calculate_image_size(&ctx->ihdr, fmt, len);
+}
+
+int spng_get_ihdr(spng_ctx *ctx, struct spng_ihdr *ihdr)
+{
+    if(ctx == NULL) return 1;
+    int ret = read_chunks(ctx, 1);
+    if(ret) return ret;
+    if(ihdr == NULL) return 1;
+
+    *ihdr = ctx->ihdr;
+
+    return 0;
+}
+
+int spng_get_plte(spng_ctx *ctx, struct spng_plte *plte)
+{
+    SPNG_GET_CHUNK_BOILERPLATE(plte);
+
+    *plte = ctx->plte;
+
+    return 0;
+}
+
+int spng_get_trns(spng_ctx *ctx, struct spng_trns *trns)
+{
+    SPNG_GET_CHUNK_BOILERPLATE(trns);
+
+    *trns = ctx->trns;
+
+    return 0;
+}
+
+int spng_get_chrm(spng_ctx *ctx, struct spng_chrm *chrm)
+{
+    SPNG_GET_CHUNK_BOILERPLATE(chrm);
+
+    chrm->white_point_x = (double)ctx->chrm_int.white_point_x / 100000.0;
+    chrm->white_point_y = (double)ctx->chrm_int.white_point_y / 100000.0;
+    chrm->red_x = (double)ctx->chrm_int.red_x / 100000.0;
+    chrm->red_y = (double)ctx->chrm_int.red_y / 100000.0;
+    chrm->blue_y = (double)ctx->chrm_int.blue_y / 100000.0;
+    chrm->blue_x = (double)ctx->chrm_int.blue_x / 100000.0;
+    chrm->green_x = (double)ctx->chrm_int.green_x / 100000.0;
+    chrm->green_y = (double)ctx->chrm_int.green_y / 100000.0;
+
+    return 0;
+}
+
+int spng_get_chrm_int(spng_ctx *ctx, struct spng_chrm_int *chrm)
+{
+    SPNG_GET_CHUNK_BOILERPLATE(chrm);
+
+    *chrm = ctx->chrm_int;
+
+    return 0;
+}
+
+int spng_get_gama(spng_ctx *ctx, double *gamma)
+{
+    double *gama = gamma;
+    SPNG_GET_CHUNK_BOILERPLATE(gama);
+
+    *gama = (double)ctx->gama / 100000.0;
+
+    return 0;
+}
+
+int spng_get_gama_int(spng_ctx *ctx, uint32_t *gama_int)
+{
+    uint32_t *gama = gama_int;
+    SPNG_GET_CHUNK_BOILERPLATE(gama);
+
+    *gama_int = ctx->gama;
+
+    return 0;
+}
+
+int spng_get_iccp(spng_ctx *ctx, struct spng_iccp *iccp)
+{
+    SPNG_GET_CHUNK_BOILERPLATE(iccp);
+
+    *iccp = ctx->iccp;
+
+    return 0;
+}
+
+int spng_get_sbit(spng_ctx *ctx, struct spng_sbit *sbit)
+{
+    SPNG_GET_CHUNK_BOILERPLATE(sbit);
+
+    *sbit = ctx->sbit;
+
+    return 0;
+}
+
+int spng_get_srgb(spng_ctx *ctx, uint8_t *rendering_intent)
+{
+    uint8_t *srgb = rendering_intent;
+    SPNG_GET_CHUNK_BOILERPLATE(srgb);
+
+    *srgb = ctx->srgb_rendering_intent;
+
+    return 0;
+}
+
+int spng_get_text(spng_ctx *ctx, struct spng_text *text, uint32_t *n_text)
+{
+    if(ctx == NULL) return 1;
+    int ret = read_chunks(ctx, 0);
+    if(ret) return ret;
+    if(!ctx->stored.text) return SPNG_ECHUNKAVAIL;
+    if(n_text == NULL) return 1;
+
+    if(text == NULL)
+    {
+        *n_text = ctx->n_text;
+        return 0;
+    }
+
+    if(*n_text < ctx->n_text) return 1;
+
+    uint32_t i;
+    for(i=0; i< ctx->n_text; i++)
+    {
+        text[i].type = ctx->text_list[i].type;
+        memcpy(&text[i].keyword, ctx->text_list[i].keyword, strlen(ctx->text_list[i].keyword) + 1);
+        text[i].compression_method = 0;
+        text[i].compression_flag = ctx->text_list[i].compression_flag;
+        text[i].language_tag = ctx->text_list[i].language_tag;
+        text[i].translated_keyword = ctx->text_list[i].translated_keyword;
+        text[i].length = ctx->text_list[i].text_length;
+        text[i].text = ctx->text_list[i].text;
+    }
+
+    return ret;
+}
+
+int spng_get_bkgd(spng_ctx *ctx, struct spng_bkgd *bkgd)
+{
+    SPNG_GET_CHUNK_BOILERPLATE(bkgd);
+
+    *bkgd = ctx->bkgd;
+
+    return 0;
+}
+
+int spng_get_hist(spng_ctx *ctx, struct spng_hist *hist)
+{
+    SPNG_GET_CHUNK_BOILERPLATE(hist);
+
+    *hist = ctx->hist;
+
+    return 0;
+}
+
+int spng_get_phys(spng_ctx *ctx, struct spng_phys *phys)
+{
+    SPNG_GET_CHUNK_BOILERPLATE(phys);
+
+    *phys = ctx->phys;
+
+    return 0;
+}
+
+int spng_get_splt(spng_ctx *ctx, struct spng_splt *splt, uint32_t *n_splt)
+{
+    if(ctx == NULL) return 1;
+    int ret = read_chunks(ctx, 0);
+    if(ret) return ret;
+    if(!ctx->stored.splt) return SPNG_ECHUNKAVAIL;
+    if(n_splt == NULL) return 1;
+
+    if(splt == NULL)
+    {
+        *n_splt = ctx->n_splt;
+        return 0;
+    }
+
+    if(*n_splt < ctx->n_splt) return 1;
+
+    memcpy(splt, ctx->splt_list, ctx->n_splt * sizeof(struct spng_splt));
+
+    return 0;
+}
+
+int spng_get_time(spng_ctx *ctx, struct spng_time *time)
+{
+    SPNG_GET_CHUNK_BOILERPLATE(time);
+
+    *time = ctx->time;
+
+    return 0;
+}
+
+int spng_get_unknown_chunks(spng_ctx *ctx, struct spng_unknown_chunk *chunks, uint32_t *n_chunks)
+{
+    if(ctx == NULL) return 1;
+    int ret = read_chunks(ctx, 0);
+    if(ret) return ret;
+    if(!ctx->stored.unknown) return SPNG_ECHUNKAVAIL;
+    if(n_chunks == NULL) return 1;
+
+    if(chunks == NULL)
+    {
+        *n_chunks = ctx->n_chunks;
+        return 0;
+    }
+
+    if(*n_chunks < ctx->n_chunks) return 1;
+
+    memcpy(chunks, ctx->chunk_list, sizeof(struct spng_unknown_chunk));
+
+    return 0;
+}
+
+int spng_get_offs(spng_ctx *ctx, struct spng_offs *offs)
+{
+    SPNG_GET_CHUNK_BOILERPLATE(offs);
+
+    *offs = ctx->offs;
+
+    return 0;
+}
+
+int spng_get_exif(spng_ctx *ctx, struct spng_exif *exif)
+{
+    SPNG_GET_CHUNK_BOILERPLATE(exif);
+
+    *exif = ctx->exif;
+
+    return 0;
+}
+
+int spng_set_ihdr(spng_ctx *ctx, struct spng_ihdr *ihdr)
+{
+    SPNG_SET_CHUNK_BOILERPLATE(ihdr);
+
+    if(ctx->stored.ihdr) return 1;
+
+    ret = check_ihdr(ihdr, ctx->max_width, ctx->max_height);
+    if(ret) return ret;
+
+    ctx->ihdr = *ihdr;
+
+    ctx->stored.ihdr = 1;
+    ctx->user.ihdr = 1;
+
+    return 0;
+}
+
+int spng_set_plte(spng_ctx *ctx, struct spng_plte *plte)
+{
+    SPNG_SET_CHUNK_BOILERPLATE(plte);
+
+    if(!ctx->stored.ihdr) return 1;
+
+    if(check_plte(plte, &ctx->ihdr)) return 1;
+
+    ctx->plte.n_entries = plte->n_entries;
+
+    memcpy(ctx->plte.entries, plte->entries, plte->n_entries * sizeof(struct spng_plte_entry));
+
+    ctx->stored.plte = 1;
+    ctx->user.plte = 1;
+
+    return 0;
+}
+
+int spng_set_trns(spng_ctx *ctx, struct spng_trns *trns)
+{
+    SPNG_SET_CHUNK_BOILERPLATE(trns);
+
+    if(!ctx->stored.ihdr) return SPNG_ENOIHDR;
+
+    if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_GRAYSCALE)
+    {
+        ctx->trns.gray = trns->gray;
+    }
+    else if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_TRUECOLOR)
+    {
+        ctx->trns.red = trns->red;
+        ctx->trns.green = trns->green;
+        ctx->trns.blue = trns->blue;
+    }
+    else if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_INDEXED)
+    {
+        if(!ctx->stored.plte) return SPNG_ETRNS_NO_PLTE;
+        if(trns->n_type3_entries > ctx->plte.n_entries) return 1;
+
+        ctx->trns.n_type3_entries = trns->n_type3_entries;
+        memcpy(ctx->trns.type3_alpha, trns->type3_alpha, trns->n_type3_entries);
+    }
+    else return SPNG_ETRNS_COLOR_TYPE;
+
+    ctx->stored.trns = 1;
+    ctx->user.trns = 1;
+
+    return 0;
+}
+
+int spng_set_chrm(spng_ctx *ctx, struct spng_chrm *chrm)
+{
+    SPNG_SET_CHUNK_BOILERPLATE(chrm);
+
+    struct spng_chrm_int chrm_int;
+
+    chrm_int.white_point_x = (uint32_t)(chrm->white_point_x * 100000.0);
+    chrm_int.white_point_y = (uint32_t)(chrm->white_point_y * 100000.0);
+    chrm_int.red_x = (uint32_t)(chrm->red_x * 100000.0);
+    chrm_int.red_y = (uint32_t)(chrm->red_y * 100000.0);
+    chrm_int.green_x = (uint32_t)(chrm->green_x * 100000.0);
+    chrm_int.green_y = (uint32_t)(chrm->green_y * 100000.0);
+    chrm_int.blue_x = (uint32_t)(chrm->blue_x * 100000.0);
+    chrm_int.blue_y = (uint32_t)(chrm->blue_y * 100000.0);
+
+    if(check_chrm_int(&chrm_int)) return SPNG_ECHRM;
+
+    ctx->chrm_int = chrm_int;
+
+    ctx->stored.chrm = 1;
+    ctx->user.chrm = 1;
+
+    return 0;
+}
+
+int spng_set_chrm_int(spng_ctx *ctx, struct spng_chrm_int *chrm_int)
+{
+    SPNG_SET_CHUNK_BOILERPLATE(chrm_int);
+
+    if(check_chrm_int(chrm_int)) return SPNG_ECHRM;
+
+    ctx->chrm_int = *chrm_int;
+
+    ctx->stored.chrm = 1;
+    ctx->user.chrm = 1;
+
+    return 0;
+}
+
+int spng_set_gama(spng_ctx *ctx, double gamma)
+{
+    SPNG_SET_CHUNK_BOILERPLATE(ctx);
+
+    uint32_t gama = gamma * 100000.0;
+
+    if(!gama) return 1;
+    if(gama > spng_u32max) return 1;
+
+    ctx->gama = gama;
+
+    ctx->stored.gama = 1;
+    ctx->user.gama = 1;
+
+    return 0;
+}
+
+int spng_set_gama_int(spng_ctx *ctx, uint32_t gamma)
+{
+    SPNG_SET_CHUNK_BOILERPLATE(ctx);
+
+    if(!gamma) return 1;
+    if(gamma > spng_u32max) return 1;
+
+    ctx->gama = gamma;
+
+    ctx->stored.gama = 1;
+    ctx->user.gama = 1;
+
+    return 0;
+}
+
+int spng_set_iccp(spng_ctx *ctx, struct spng_iccp *iccp)
+{
+    SPNG_SET_CHUNK_BOILERPLATE(iccp);
+
+    if(check_png_keyword(iccp->profile_name)) return SPNG_EICCP_NAME;
+    if(!iccp->profile_len) return SPNG_ECHUNK_SIZE;
+    if(iccp->profile_len > spng_u32max) return SPNG_ECHUNK_STDLEN;
+
+    if(ctx->iccp.profile && !ctx->user.iccp) spng__free(ctx, ctx->iccp.profile);
+
+    ctx->iccp = *iccp;
+
+    ctx->stored.iccp = 1;
+    ctx->user.iccp = 1;
+
+    return 0;
+}
+
+int spng_set_sbit(spng_ctx *ctx, struct spng_sbit *sbit)
+{
+    SPNG_SET_CHUNK_BOILERPLATE(sbit);
+
+    if(check_sbit(sbit, &ctx->ihdr)) return 1;
+
+    if(!ctx->stored.ihdr) return 1;
+
+    ctx->sbit = *sbit;
+
+    ctx->stored.sbit = 1;
+    ctx->user.sbit = 1;
+
+    return 0;
+}
+
+int spng_set_srgb(spng_ctx *ctx, uint8_t rendering_intent)
+{
+    SPNG_SET_CHUNK_BOILERPLATE(ctx);
+
+    if(rendering_intent > 3) return 1;
+
+    ctx->srgb_rendering_intent = rendering_intent;
+
+    ctx->stored.srgb = 1;
+    ctx->user.srgb = 1;
+
+    return 0;
+}
+
+int spng_set_text(spng_ctx *ctx, struct spng_text *text, uint32_t n_text)
+{
+    if(!n_text) return 1;
+    SPNG_SET_CHUNK_BOILERPLATE(text);
+
+    uint32_t i;
+    for(i=0; i < n_text; i++)
+    {
+        if(check_png_keyword(text[i].keyword)) return SPNG_ETEXT_KEYWORD;
+        if(!text[i].length) return 1;
+        if(text[i].length > UINT_MAX) return 1;
+        if(text[i].text == NULL) return 1;
+
+        if(text[i].type == SPNG_TEXT)
+        {
+            if(ctx->strict && check_png_text(text[i].text, text[i].length)) return 1;
+        }
+        else if(text[i].type == SPNG_ZTXT)
+        {
+            if(ctx->strict && check_png_text(text[i].text, text[i].length)) return 1;
+
+            if(text[i].compression_method != 0) return SPNG_EZTXT_COMPRESSION_METHOD;
+        }
+        else if(text[i].type == SPNG_ITXT)
+        {
+            if(text[i].compression_flag > 1) return SPNG_EITXT_COMPRESSION_FLAG;
+            if(text[i].compression_method != 0) return SPNG_EITXT_COMPRESSION_METHOD;
+            if(text[i].language_tag == NULL) return SPNG_EITXT_LANG_TAG;
+            if(text[i].translated_keyword == NULL) return SPNG_EITXT_TRANSLATED_KEY;
+        }
+        else return 1;
+
+    }
+
+    struct spng_text2 *text_list = spng__calloc(ctx, sizeof(struct spng_text2), n_text);
+
+    if(!text_list) return SPNG_EMEM;
+
+    if(ctx->text_list != NULL)
+    {
+        for(i=0; i < ctx->n_text; i++)
+        {
+            if(ctx->user.text) break;
+
+            spng__free(ctx, ctx->text_list[i].keyword);
+            if(ctx->text_list[i].compression_flag) spng__free(ctx, ctx->text_list[i].text);
+        }
+        spng__free(ctx, ctx->text_list);
+    }
+
+    for(i=0; i < n_text; i++)
+    {
+        text_list[i].type = text[i].type;
+        /* Prevent issues with spng_text.keyword[80] going out of scope */
+        text_list[i].keyword = text_list[i].user_keyword_storage;
+        memcpy(text_list[i].user_keyword_storage, text[i].keyword, strlen(text[i].keyword));
+        text_list[i].text = text[i].text;
+        text_list[i].text_length = text[i].length;
+
+        if(text[i].type == SPNG_ZTXT)
+        {
+            text_list[i].compression_flag = 1;
+        }
+        else if(text[i].type == SPNG_ITXT)
+        {
+            text_list[i].compression_flag = text[i].compression_flag;
+            text_list[i].language_tag = text[i].language_tag;
+            text_list[i].translated_keyword = text[i].translated_keyword;
+        }
+    }
+
+    ctx->text_list = text_list;
+    ctx->n_text = n_text;
+
+    ctx->stored.text = 1;
+    ctx->user.text = 1;
+
+    return 0;
+}
+
+int spng_set_bkgd(spng_ctx *ctx, struct spng_bkgd *bkgd)
+{
+    SPNG_SET_CHUNK_BOILERPLATE(bkgd);
+
+    if(!ctx->stored.ihdr)  return 1;
+
+    if(ctx->ihdr.color_type == 0 || ctx->ihdr.color_type == 4)
+    {
+        ctx->bkgd.gray = bkgd->gray;
+    }
+    else if(ctx->ihdr.color_type == 2 || ctx->ihdr.color_type == 6)
+    {
+        ctx->bkgd.red = bkgd->red;
+        ctx->bkgd.green = bkgd->green;
+        ctx->bkgd.blue = bkgd->blue;
+    }
+    else if(ctx->ihdr.color_type == 3)
+    {
+        if(!ctx->stored.plte) return SPNG_EBKGD_NO_PLTE;
+        if(bkgd->plte_index >= ctx->plte.n_entries) return SPNG_EBKGD_PLTE_IDX;
+
+        ctx->bkgd.plte_index = bkgd->plte_index;
+    }
+
+    ctx->stored.bkgd = 1;
+    ctx->user.bkgd = 1;
+
+    return 0;
+}
+
+int spng_set_hist(spng_ctx *ctx, struct spng_hist *hist)
+{
+    SPNG_SET_CHUNK_BOILERPLATE(hist);
+
+    if(!ctx->stored.plte) return SPNG_EHIST_NO_PLTE;
+
+    ctx->hist = *hist;
+
+    ctx->stored.hist = 1;
+    ctx->user.hist = 1;
+
+    return 0;
+}
+
+int spng_set_phys(spng_ctx *ctx, struct spng_phys *phys)
+{
+    SPNG_SET_CHUNK_BOILERPLATE(phys);
+
+    if(check_phys(phys)) return SPNG_EPHYS;
+
+    ctx->phys = *phys;
+
+    ctx->stored.phys = 1;
+    ctx->user.phys = 1;
+
+    return 0;
+}
+
+int spng_set_splt(spng_ctx *ctx, struct spng_splt *splt, uint32_t n_splt)
+{
+    if(!n_splt) return 1;
+    SPNG_SET_CHUNK_BOILERPLATE(splt);
+
+    uint32_t i;
+    for(i=0; i < n_splt; i++)
+    {
+        if(check_png_keyword(splt[i].name)) return SPNG_ESPLT_NAME;
+        if( !(splt[i].sample_depth == 8 || splt[i].sample_depth == 16) ) return SPNG_ESPLT_DEPTH;
+    }
+
+    if(ctx->stored.splt && !ctx->user.splt)
+    {
+        for(i=0; i < ctx->n_splt; i++)
+        {
+            if(ctx->splt_list[i].entries != NULL) spng__free(ctx, ctx->splt_list[i].entries);
+        }
+        spng__free(ctx, ctx->splt_list);
+    }
+
+    ctx->splt_list = splt;
+    ctx->n_splt = n_splt;
+
+    ctx->stored.splt = 1;
+    ctx->user.splt = 1;
+
+    return 0;
+}
+
+int spng_set_time(spng_ctx *ctx, struct spng_time *time)
+{
+    SPNG_SET_CHUNK_BOILERPLATE(time);
+
+    if(check_time(time)) return SPNG_ETIME;
+
+    ctx->time = *time;
+
+    ctx->stored.time = 1;
+    ctx->user.time = 1;
+
+    return 0;
+}
+
+int spng_set_unknown_chunks(spng_ctx *ctx, struct spng_unknown_chunk *chunks, uint32_t n_chunks)
+{
+    if(!n_chunks) return 1;
+    SPNG_SET_CHUNK_BOILERPLATE(chunks);
+
+    uint32_t i;
+    for(i=0; i < n_chunks; i++)
+    {
+        if(chunks[i].length > spng_u32max) return SPNG_ECHUNK_STDLEN;
+        if(chunks[i].length && chunks[i].data == NULL) return 1;
+
+        switch(chunks[i].location)
+        {
+            case SPNG_AFTER_IHDR:
+            case SPNG_AFTER_PLTE:
+            case SPNG_AFTER_IDAT:
+            break;
+            default: return SPNG_ECHUNK_POS;
+        }
+    }
+
+    if(ctx->stored.unknown && !ctx->user.unknown)
+    {
+        for(i=0; i < ctx->n_chunks; i++)
+        {
+            spng__free(ctx, ctx->chunk_list[i].data);
+        }
+        spng__free(ctx, ctx->chunk_list);
+    }
+
+    ctx->chunk_list = chunks;
+    ctx->n_chunks = n_chunks;
+
+    ctx->stored.unknown = 1;
+    ctx->user.unknown = 1;
+
+    return 0;
+}
+
+int spng_set_offs(spng_ctx *ctx, struct spng_offs *offs)
+{
+    SPNG_SET_CHUNK_BOILERPLATE(offs);
+
+    if(check_offs(offs)) return SPNG_EOFFS;
+
+    ctx->offs = *offs;
+
+    ctx->stored.offs = 1;
+    ctx->user.offs = 1;
+
+    return 0;
+}
+
+int spng_set_exif(spng_ctx *ctx, struct spng_exif *exif)
+{
+    SPNG_SET_CHUNK_BOILERPLATE(exif);
+
+    if(check_exif(exif)) return SPNG_EEXIF;
+
+    if(ctx->exif.data != NULL && !ctx->user.exif) spng__free(ctx, ctx->exif.data);
+
+    ctx->exif = *exif;
+
+    ctx->stored.exif = 1;
+    ctx->user.exif = 1;
+
+    return 0;
+}
+
+const char *spng_strerror(int err)
+{
+    switch(err)
+    {
+        case SPNG_IO_EOF: return "end of stream";
+        case SPNG_IO_ERROR: return "stream error";
+        case SPNG_OK: return "success";
+        case SPNG_EINVAL: return "invalid argument";
+        case SPNG_EMEM: return "out of memory";
+        case SPNG_EOVERFLOW: return "arithmetic overflow";
+        case SPNG_ESIGNATURE: return "invalid signature";
+        case SPNG_EWIDTH: return "invalid image width";
+        case SPNG_EHEIGHT: return "invalid image height";
+        case SPNG_EUSER_WIDTH: return "image width exceeds user limit";
+        case SPNG_EUSER_HEIGHT: return "image height exceeds user limit";
+        case SPNG_EBIT_DEPTH: return "invalid bit depth";
+        case SPNG_ECOLOR_TYPE: return "invalid color type";
+        case SPNG_ECOMPRESSION_METHOD: return "invalid compression method";
+        case SPNG_EFILTER_METHOD: return "invalid filter method";
+        case SPNG_EINTERLACE_METHOD: return "invalid interlace method";
+        case SPNG_EIHDR_SIZE: return "invalid IHDR chunk size";
+        case SPNG_ENOIHDR: return "missing IHDR chunk";
+        case SPNG_ECHUNK_POS: return "invalid chunk position";
+        case SPNG_ECHUNK_SIZE: return "invalid chunk length";
+        case SPNG_ECHUNK_CRC: return "invalid chunk checksum";
+        case SPNG_ECHUNK_TYPE: return "invalid chunk type";
+        case SPNG_ECHUNK_UNKNOWN_CRITICAL: return "unknown critical chunk";
+        case SPNG_EDUP_PLTE: return "duplicate PLTE chunk";
+        case SPNG_EDUP_CHRM: return "duplicate cHRM chunk";
+        case SPNG_EDUP_GAMA: return "duplicate gAMA chunk";
+        case SPNG_EDUP_ICCP: return "duplicate iCCP chunk";
+        case SPNG_EDUP_SBIT: return "duplicate sBIT chunk";
+        case SPNG_EDUP_SRGB: return "duplicate sRGB chunk";
+        case SPNG_EDUP_BKGD: return "duplicate bKGD chunk";
+        case SPNG_EDUP_HIST: return "duplicate hIST chunk";
+        case SPNG_EDUP_TRNS: return "duplicate tRNS chunk";
+        case SPNG_EDUP_PHYS: return "duplicate pHYs chunk";
+        case SPNG_EDUP_TIME: return "duplicate tIME chunk";
+        case SPNG_EDUP_OFFS: return "duplicate oFFs chunk";
+        case SPNG_EDUP_EXIF: return "duplicate eXIf chunk";
+        case SPNG_ECHRM: return "invalid cHRM chunk";
+        case SPNG_EPLTE_IDX: return "invalid palette (PLTE) index";
+        case SPNG_ETRNS_COLOR_TYPE: return "tRNS chunk with incompatible color type";
+        case SPNG_ETRNS_NO_PLTE: return "missing palette (PLTE) for tRNS chunk";
+        case SPNG_EGAMA: return "invalid gAMA chunk";
+        case SPNG_EICCP_NAME: return "invalid iCCP profile name";
+        case SPNG_EICCP_COMPRESSION_METHOD: return "invalid iCCP compression method";
+        case SPNG_ESBIT: return "invalid sBIT chunk";
+        case SPNG_ESRGB: return "invalid sRGB chunk";
+        case SPNG_ETEXT: return "invalid tEXt chunk";
+        case SPNG_ETEXT_KEYWORD: return "invalid tEXt keyword";
+        case SPNG_EZTXT: return "invalid zTXt chunk";
+        case SPNG_EZTXT_COMPRESSION_METHOD: return "invalid zTXt compression method";
+        case SPNG_EITXT: return "invalid iTXt chunk";
+        case SPNG_EITXT_COMPRESSION_FLAG: return "invalid iTXt compression flag";
+        case SPNG_EITXT_COMPRESSION_METHOD: return "invalid iTXt compression method";
+        case SPNG_EITXT_LANG_TAG: return "invalid iTXt language tag";
+        case SPNG_EITXT_TRANSLATED_KEY: return "invalid iTXt translated key";
+        case SPNG_EBKGD_NO_PLTE: return "missing palette for bKGD chunk";
+        case SPNG_EBKGD_PLTE_IDX: return "invalid palette index for bKGD chunk";
+        case SPNG_EHIST_NO_PLTE: return "missing palette for hIST chunk";
+        case SPNG_EPHYS: return "invalid pHYs chunk";
+        case SPNG_ESPLT_NAME: return "invalid suggested palette name";
+        case SPNG_ESPLT_DUP_NAME: return "duplicate suggested palette (sPLT) name";
+        case SPNG_ESPLT_DEPTH: return "invalid suggested palette (sPLT) sample depth";
+        case SPNG_ETIME: return "invalid tIME chunk";
+        case SPNG_EOFFS: return "invalid oFFs chunk";
+        case SPNG_EEXIF: return "invalid eXIf chunk";
+        case SPNG_EIDAT_TOO_SHORT: return "IDAT stream too short";
+        case SPNG_EIDAT_STREAM: return "IDAT stream error";
+        case SPNG_EZLIB: return "zlib error";
+        case SPNG_EFILTER: return "invalid scanline filter";
+        case SPNG_EBUFSIZ: return "invalid buffer size";
+        case SPNG_EIO: return "i/o error";
+        case SPNG_EOF: return "end of file";
+        case SPNG_EBUF_SET: return "buffer already set";
+        case SPNG_EBADSTATE: return "non-recoverable state";
+        case SPNG_EFMT: return "invalid format";
+        case SPNG_EFLAGS: return "invalid flags";
+        case SPNG_ECHUNKAVAIL: return "chunk not available";
+        case SPNG_ENCODE_ONLY: return "encode only context";
+        case SPNG_EOI: return "reached end-of-image state";
+        case SPNG_ENOPLTE: return "missing PLTE for indexed image";
+        case SPNG_ECHUNK_LIMITS: return "reached chunk/cache limits";
+        case SPNG_EZLIB_INIT: return "zlib init error";
+        case SPNG_ECHUNK_STDLEN: return "chunk exceeds maximum standard length";
+        case SPNG_EINTERNAL: return "internal error";
+        case SPNG_ECTXTYPE: return "invalid operation for context type";
+        case SPNG_ENOSRC: return "source PNG not set";
+        case SPNG_ENODST: return "PNG output not set";
+        case SPNG_EOPSTATE: return "invalid operation for state";
+        case SPNG_ENOTFINAL: return "PNG not finalized";
+        default: return "unknown error";
+    }
+}
+
+const char *spng_version_string(void)
+{
+    return SPNG_VERSION_STRING;
+}
+
+#if defined(_MSC_VER)
+    #pragma warning(pop)
+#endif
+
+/* The following SIMD optimizations are derived from libpng source code. */
+
+/*
+* PNG Reference Library License version 2
+*
+* Copyright (c) 1995-2019 The PNG Reference Library Authors.
+* Copyright (c) 2018-2019 Cosmin Truta.
+* Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson.
+* Copyright (c) 1996-1997 Andreas Dilger.
+* Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
+*
+* The software is supplied "as is", without warranty of any kind,
+* express or implied, including, without limitation, the warranties
+* of merchantability, fitness for a particular purpose, title, and
+* non-infringement.  In no event shall the Copyright owners, or
+* anyone distributing the software, be liable for any damages or
+* other liability, whether in contract, tort or otherwise, arising
+* from, out of, or in connection with the software, or the use or
+* other dealings in the software, even if advised of the possibility
+* of such damage.
+*
+* Permission is hereby granted to use, copy, modify, and distribute
+* this software, or portions hereof, for any purpose, without fee,
+* subject to the following restrictions:
+*
+*  1. The origin of this software must not be misrepresented; you
+*     must not claim that you wrote the original software.  If you
+*     use this software in a product, an acknowledgment in the product
+*     documentation would be appreciated, but is not required.
+*
+*  2. Altered source versions must be plainly marked as such, and must
+*     not be misrepresented as being the original software.
+*
+*  3. This Copyright notice may not be removed or altered from any
+*     source or altered source distribution.
+*/
+
+#if defined(SPNG_X86)
+
+#ifndef SPNG_SSE
+    #define SPNG_SSE 1
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+    #if SPNG_SSE == 3
+        #pragma GCC target("ssse3")
+    #elif SPNG_SSE == 4
+        #pragma GCC target("sse4.1")
+    #else
+        #pragma GCC target("sse2")
+    #endif
+#endif
+
+/* SSE2 optimised filter functions
+ * Derived from filter_neon_intrinsics.c
+ *
+ * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 2016-2017 Glenn Randers-Pehrson
+ * Written by Mike Klein and Matt Sarett
+ * Derived from arm/filter_neon_intrinsics.c
+ *
+ * This code is derived from libpng source code.
+ * For conditions of distribution and use, see the disclaimer
+ * and license above.
+ */
+
+#include <immintrin.h>
+#include <inttypes.h>
+#include <string.h>
+
+/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d).
+ * They're positioned like this:
+ *    prev:  c b
+ *    row:   a d
+ * The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be
+ * whichever of a, b, or c is closest to p=a+b-c.
+ */
+
+static __m128i load4(const void* p)
+{
+    int tmp;
+    memcpy(&tmp, p, sizeof(tmp));
+    return _mm_cvtsi32_si128(tmp);
+}
+
+static void store4(void* p, __m128i v)
+{
+    int tmp = _mm_cvtsi128_si32(v);
+    memcpy(p, &tmp, sizeof(int));
+}
+
+static __m128i load3(const void* p)
+{
+    uint32_t tmp = 0;
+    memcpy(&tmp, p, 3);
+    return _mm_cvtsi32_si128(tmp);
+}
+
+static void store3(void* p, __m128i v)
+{
+    int tmp = _mm_cvtsi128_si32(v);
+    memcpy(p, &tmp, 3);
+}
+
+static void defilter_sub3(size_t rowbytes, unsigned char *row)
+{
+    /* The Sub filter predicts each pixel as the previous pixel, a.
+     * There is no pixel to the left of the first pixel.  It's encoded directly.
+     * That works with our main loop if we just say that left pixel was zero.
+     */
+    size_t rb = rowbytes;
+
+    __m128i a, d = _mm_setzero_si128();
+
+    while(rb >= 4)
+    {
+        a = d; d = load4(row);
+        d = _mm_add_epi8(d, a);
+        store3(row, d);
+
+        row += 3;
+        rb  -= 3;
+    }
+
+    if(rb > 0)
+    {
+        a = d; d = load3(row);
+        d = _mm_add_epi8(d, a);
+        store3(row, d);
+    }
+}
+
+static void defilter_sub4(size_t rowbytes, unsigned char *row)
+{
+    /* The Sub filter predicts each pixel as the previous pixel, a.
+     * There is no pixel to the left of the first pixel.  It's encoded directly.
+     * That works with our main loop if we just say that left pixel was zero.
+     */
+    size_t rb = rowbytes+4;
+
+    __m128i a, d = _mm_setzero_si128();
+
+    while(rb > 4)
+    {
+        a = d; d = load4(row);
+        d = _mm_add_epi8(d, a);
+        store4(row, d);
+
+        row += 4;
+        rb  -= 4;
+    }
+}
+
+static void defilter_avg3(size_t rowbytes, unsigned char *row, const unsigned char *prev)
+{
+    /* The Avg filter predicts each pixel as the (truncated) average of a and b.
+     * There's no pixel to the left of the first pixel.  Luckily, it's
+     * predicted to be half of the pixel above it.  So again, this works
+     * perfectly with our loop if we make sure a starts at zero.
+     */
+
+    size_t rb = rowbytes;
+
+    const __m128i zero = _mm_setzero_si128();
+
+    __m128i b;
+    __m128i a, d = zero;
+
+    while(rb >= 4)
+    {
+        __m128i avg;
+               b = load4(prev);
+        a = d; d = load4(row );
+
+        /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
+        avg = _mm_avg_epu8(a,b);
+        /* ...but we can fix it up by subtracting off 1 if it rounded up. */
+        avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a, b),
+                                            _mm_set1_epi8(1)));
+        d = _mm_add_epi8(d, avg);
+        store3(row, d);
+
+        prev += 3;
+        row  += 3;
+        rb   -= 3;
+    }
+
+    if(rb > 0)
+    {
+        __m128i avg;
+               b = load3(prev);
+        a = d; d = load3(row );
+
+        /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
+        avg = _mm_avg_epu8(a, b);
+        /* ...but we can fix it up by subtracting off 1 if it rounded up. */
+        avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a, b),
+                                            _mm_set1_epi8(1)));
+
+        d = _mm_add_epi8(d, avg);
+        store3(row, d);
+    }
+}
+
+static void defilter_avg4(size_t rowbytes, unsigned char *row, const unsigned char *prev)
+{
+    /* The Avg filter predicts each pixel as the (truncated) average of a and b.
+     * There's no pixel to the left of the first pixel.  Luckily, it's
+     * predicted to be half of the pixel above it.  So again, this works
+     * perfectly with our loop if we make sure a starts at zero.
+     */
+    size_t rb = rowbytes+4;
+
+    const __m128i zero = _mm_setzero_si128();
+    __m128i    b;
+    __m128i a, d = zero;
+
+    while(rb > 4)
+    {
+        __m128i avg;
+               b = load4(prev);
+        a = d; d = load4(row );
+
+        /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
+        avg = _mm_avg_epu8(a,b);
+        /* ...but we can fix it up by subtracting off 1 if it rounded up. */
+        avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a, b),
+                                            _mm_set1_epi8(1)));
+
+        d = _mm_add_epi8(d, avg);
+        store4(row, d);
+
+        prev += 4;
+        row  += 4;
+        rb   -= 4;
+    }
+}
+
+/* Returns |x| for 16-bit lanes. */
+#if (SPNG_SSE >= 3) && !defined(_MSC_VER)
+__attribute__((target("ssse3")))
+#endif
+static __m128i abs_i16(__m128i x)
+{
+#if SPNG_SSE >= 3
+    return _mm_abs_epi16(x);
+#else
+    /* Read this all as, return x<0 ? -x : x.
+     * To negate two's complement, you flip all the bits then add 1.
+     */
+    __m128i is_negative = _mm_cmplt_epi16(x, _mm_setzero_si128());
+
+    /* Flip negative lanes. */
+    x = _mm_xor_si128(x, is_negative);
+
+    /* +1 to negative lanes, else +0. */
+    x = _mm_sub_epi16(x, is_negative);
+    return x;
+#endif
+}
+
+/* Bytewise c ? t : e. */
+static __m128i if_then_else(__m128i c, __m128i t, __m128i e)
+{
+#if SPNG_SSE >= 4
+    return _mm_blendv_epi8(e, t, c);
+#else
+    return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e));
+#endif
+}
+
+static void defilter_paeth3(size_t rowbytes, unsigned char *row, const unsigned char *prev)
+{
+    /* Paeth tries to predict pixel d using the pixel to the left of it, a,
+     * and two pixels from the previous row, b and c:
+     *   prev: c b
+     *   row:  a d
+     * The Paeth function predicts d to be whichever of a, b, or c is nearest to
+     * p=a+b-c.
+     *
+     * The first pixel has no left context, and so uses an Up filter, p = b.
+     * This works naturally with our main loop's p = a+b-c if we force a and c
+     * to zero.
+     * Here we zero b and d, which become c and a respectively at the start of
+     * the loop.
+     */
+    size_t rb = rowbytes;
+    const __m128i zero = _mm_setzero_si128();
+    __m128i c, b = zero,
+            a, d = zero;
+
+    while(rb >= 4)
+    {
+        /* It's easiest to do this math (particularly, deal with pc) with 16-bit
+         * intermediates.
+         */
+        __m128i pa,pb,pc,smallest,nearest;
+        c = b; b = _mm_unpacklo_epi8(load4(prev), zero);
+        a = d; d = _mm_unpacklo_epi8(load4(row ), zero);
+
+        /* (p-a) == (a+b-c - a) == (b-c) */
+
+        pa = _mm_sub_epi16(b, c);
+
+        /* (p-b) == (a+b-c - b) == (a-c) */
+        pb = _mm_sub_epi16(a, c);
+
+        /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
+        pc = _mm_add_epi16(pa, pb);
+
+        pa = abs_i16(pa);  /* |p-a| */
+        pb = abs_i16(pb);  /* |p-b| */
+        pc = abs_i16(pc);  /* |p-c| */
+
+        smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
+
+        /* Paeth breaks ties favoring a over b over c. */
+        nearest  = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
+                            if_then_else(_mm_cmpeq_epi16(smallest, pb), b, c));
+
+        /* Note `_epi8`: we need addition to wrap modulo 255. */
+        d = _mm_add_epi8(d, nearest);
+        store3(row, _mm_packus_epi16(d, d));
+
+        prev += 3;
+        row  += 3;
+        rb   -= 3;
+    }
+
+    if(rb > 0)
+    {
+        /* It's easiest to do this math (particularly, deal with pc) with 16-bit
+         * intermediates.
+         */
+        __m128i pa, pb, pc, smallest, nearest;
+        c = b; b = _mm_unpacklo_epi8(load3(prev), zero);
+        a = d; d = _mm_unpacklo_epi8(load3(row ), zero);
+
+        /* (p-a) == (a+b-c - a) == (b-c) */
+        pa = _mm_sub_epi16(b, c);
+
+        /* (p-b) == (a+b-c - b) == (a-c) */
+        pb = _mm_sub_epi16(a, c);
+
+        /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
+        pc = _mm_add_epi16(pa, pb);
+
+        pa = abs_i16(pa);  /* |p-a| */
+        pb = abs_i16(pb);  /* |p-b| */
+        pc = abs_i16(pc);  /* |p-c| */
+
+        smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
+
+        /* Paeth breaks ties favoring a over b over c. */
+        nearest  = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
+                            if_then_else(_mm_cmpeq_epi16(smallest, pb), b, c));
+
+        /* Note `_epi8`: we need addition to wrap modulo 255. */
+        d = _mm_add_epi8(d, nearest);
+        store3(row, _mm_packus_epi16(d, d));
+    }
+}
+
+static void defilter_paeth4(size_t rowbytes, unsigned char *row, const unsigned char *prev)
+{
+    /* Paeth tries to predict pixel d using the pixel to the left of it, a,
+     * and two pixels from the previous row, b and c:
+     *   prev: c b
+     *   row:  a d
+     * The Paeth function predicts d to be whichever of a, b, or c is nearest to
+     * p=a+b-c.
+     *
+     * The first pixel has no left context, and so uses an Up filter, p = b.
+     * This works naturally with our main loop's p = a+b-c if we force a and c
+     * to zero.
+     * Here we zero b and d, which become c and a respectively at the start of
+     * the loop.
+     */
+    size_t rb = rowbytes+4;
+
+    const __m128i zero = _mm_setzero_si128();
+    __m128i pa, pb, pc, smallest, nearest;
+    __m128i c, b = zero,
+            a, d = zero;
+
+    while(rb > 4)
+    {
+        /* It's easiest to do this math (particularly, deal with pc) with 16-bit
+         * intermediates.
+         */
+        c = b; b = _mm_unpacklo_epi8(load4(prev), zero);
+        a = d; d = _mm_unpacklo_epi8(load4(row ), zero);
+
+        /* (p-a) == (a+b-c - a) == (b-c) */
+        pa = _mm_sub_epi16(b, c);
+
+        /* (p-b) == (a+b-c - b) == (a-c) */
+        pb = _mm_sub_epi16(a, c);
+
+        /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
+        pc = _mm_add_epi16(pa, pb);
+
+        pa = abs_i16(pa);  /* |p-a| */
+        pb = abs_i16(pb);  /* |p-b| */
+        pc = abs_i16(pc);  /* |p-c| */
+
+        smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
+
+        /* Paeth breaks ties favoring a over b over c. */
+        nearest  = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
+                            if_then_else(_mm_cmpeq_epi16(smallest, pb), b, c));
+
+        /* Note `_epi8`: we need addition to wrap modulo 255. */
+        d = _mm_add_epi8(d, nearest);
+        store4(row, _mm_packus_epi16(d, d));
+
+        prev += 4;
+        row  += 4;
+        rb   -= 4;
+    }
+}
+
+#endif /* SPNG_X86 */
+
+
+#if defined(SPNG_ARM)
+
+/* NEON optimised filter functions
+ * Derived from filter_neon_intrinsics.c
+ *
+ * Copyright (c) 2018 Cosmin Truta
+ * Copyright (c) 2014,2016 Glenn Randers-Pehrson
+ * Written by James Yu <james.yu at linaro.org>, October 2013.
+ * Based on filter_neon.S, written by Mans Rullgard, 2011.
+ *
+ * This code is derived from libpng source code.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in this file.
+ */
+
+#define png_aligncast(type, value) ((void*)(value))
+#define png_aligncastconst(type, value) ((const void*)(value))
+
+/* libpng row pointers are not necessarily aligned to any particular boundary,
+ * however this code will only work with appropriate alignment. mips/mips_init.c
+ * checks for this (and will not compile unless it is done). This code uses
+ * variants of png_aligncast to avoid compiler warnings.
+ */
+#define png_ptr(type,pointer) png_aligncast(type *,pointer)
+#define png_ptrc(type,pointer) png_aligncastconst(const type *,pointer)
+
+/* The following relies on a variable 'temp_pointer' being declared with type
+ * 'type'.  This is written this way just to hide the GCC strict aliasing
+ * warning; note that the code is safe because there never is an alias between
+ * the input and output pointers.
+ */
+#define png_ldr(type,pointer)\
+   (temp_pointer = png_ptr(type,pointer), *temp_pointer)
+
+
+#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64)
+    #include <arm64_neon.h>
+#else
+    #include <arm_neon.h>
+#endif
+
+static void defilter_sub3(size_t rowbytes, unsigned char *row)
+{
+    unsigned char *rp = row;
+    unsigned char *rp_stop = row + rowbytes;
+
+    uint8x16_t vtmp = vld1q_u8(rp);
+    uint8x8x2_t *vrpt = png_ptr(uint8x8x2_t, &vtmp);
+    uint8x8x2_t vrp = *vrpt;
+
+    uint8x8x4_t vdest;
+    vdest.val[3] = vdup_n_u8(0);
+
+    for (; rp < rp_stop;)
+    {
+        uint8x8_t vtmp1, vtmp2;
+        uint32x2_t *temp_pointer;
+
+        vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3);
+        vdest.val[0] = vadd_u8(vdest.val[3], vrp.val[0]);
+        vtmp2 = vext_u8(vrp.val[0], vrp.val[1], 6);
+        vdest.val[1] = vadd_u8(vdest.val[0], vtmp1);
+
+        vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
+        vdest.val[2] = vadd_u8(vdest.val[1], vtmp2);
+        vdest.val[3] = vadd_u8(vdest.val[2], vtmp1);
+
+        vtmp = vld1q_u8(rp + 12);
+        vrpt = png_ptr(uint8x8x2_t, &vtmp);
+        vrp = *vrpt;
+
+        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0);
+        rp += 3;
+        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0);
+        rp += 3;
+        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0);
+        rp += 3;
+        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0);
+        rp += 3;
+    }
+}
+
+static void defilter_sub4(size_t rowbytes, unsigned char *row)
+{
+    unsigned char *rp = row;
+    unsigned char *rp_stop = row + rowbytes;
+
+    uint8x8x4_t vdest;
+    vdest.val[3] = vdup_n_u8(0);
+
+    for (; rp < rp_stop; rp += 16)
+    {
+        uint32x2x4_t vtmp = vld4_u32(png_ptr(uint32_t,rp));
+        uint8x8x4_t *vrpt = png_ptr(uint8x8x4_t,&vtmp);
+        uint8x8x4_t vrp = *vrpt;
+        uint32x2x4_t *temp_pointer;
+        uint32x2x4_t vdest_val;
+
+        vdest.val[0] = vadd_u8(vdest.val[3], vrp.val[0]);
+        vdest.val[1] = vadd_u8(vdest.val[0], vrp.val[1]);
+        vdest.val[2] = vadd_u8(vdest.val[1], vrp.val[2]);
+        vdest.val[3] = vadd_u8(vdest.val[2], vrp.val[3]);
+
+        vdest_val = png_ldr(uint32x2x4_t, &vdest);
+        vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0);
+    }
+}
+
+static void defilter_avg3(size_t rowbytes, unsigned char *row, const unsigned char *prev_row)
+{
+    unsigned char *rp = row;
+    const unsigned char *pp = prev_row;
+    unsigned char *rp_stop = row + rowbytes;
+
+    uint8x16_t vtmp;
+    uint8x8x2_t *vrpt;
+    uint8x8x2_t vrp;
+    uint8x8x4_t vdest;
+    vdest.val[3] = vdup_n_u8(0);
+
+    vtmp = vld1q_u8(rp);
+    vrpt = png_ptr(uint8x8x2_t,&vtmp);
+    vrp = *vrpt;
+
+    for (; rp < rp_stop; pp += 12)
+    {
+        uint8x8_t vtmp1, vtmp2, vtmp3;
+
+        uint8x8x2_t *vppt;
+        uint8x8x2_t vpp;
+
+        uint32x2_t *temp_pointer;
+
+        vtmp = vld1q_u8(pp);
+        vppt = png_ptr(uint8x8x2_t,&vtmp);
+        vpp = *vppt;
+
+        vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3);
+        vdest.val[0] = vhadd_u8(vdest.val[3], vpp.val[0]);
+        vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
+
+        vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 3);
+        vtmp3 = vext_u8(vrp.val[0], vrp.val[1], 6);
+        vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2);
+        vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
+
+        vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 6);
+        vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
+
+        vtmp = vld1q_u8(rp + 12);
+        vrpt = png_ptr(uint8x8x2_t,&vtmp);
+        vrp = *vrpt;
+
+        vdest.val[2] = vhadd_u8(vdest.val[1], vtmp2);
+        vdest.val[2] = vadd_u8(vdest.val[2], vtmp3);
+
+        vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1);
+
+        vdest.val[3] = vhadd_u8(vdest.val[2], vtmp2);
+        vdest.val[3] = vadd_u8(vdest.val[3], vtmp1);
+
+        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0);
+        rp += 3;
+        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0);
+        rp += 3;
+        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0);
+        rp += 3;
+        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0);
+        rp += 3;
+    }
+}
+
+static void defilter_avg4(size_t rowbytes, unsigned char *row, const unsigned char *prev_row)
+{
+    unsigned char *rp = row;
+    unsigned char *rp_stop = row + rowbytes;
+    const unsigned char *pp = prev_row;
+
+    uint8x8x4_t vdest;
+    vdest.val[3] = vdup_n_u8(0);
+
+    for (; rp < rp_stop; rp += 16, pp += 16)
+    {
+        uint32x2x4_t vtmp;
+        uint8x8x4_t *vrpt, *vppt;
+        uint8x8x4_t vrp, vpp;
+        uint32x2x4_t *temp_pointer;
+        uint32x2x4_t vdest_val;
+
+        vtmp = vld4_u32(png_ptr(uint32_t,rp));
+        vrpt = png_ptr(uint8x8x4_t,&vtmp);
+        vrp = *vrpt;
+        vtmp = vld4_u32(png_ptrc(uint32_t,pp));
+        vppt = png_ptr(uint8x8x4_t,&vtmp);
+        vpp = *vppt;
+
+        vdest.val[0] = vhadd_u8(vdest.val[3], vpp.val[0]);
+        vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
+        vdest.val[1] = vhadd_u8(vdest.val[0], vpp.val[1]);
+        vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]);
+        vdest.val[2] = vhadd_u8(vdest.val[1], vpp.val[2]);
+        vdest.val[2] = vadd_u8(vdest.val[2], vrp.val[2]);
+        vdest.val[3] = vhadd_u8(vdest.val[2], vpp.val[3]);
+        vdest.val[3] = vadd_u8(vdest.val[3], vrp.val[3]);
+
+        vdest_val = png_ldr(uint32x2x4_t, &vdest);
+        vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0);
+    }
+}
+
+static uint8x8_t paeth_arm(uint8x8_t a, uint8x8_t b, uint8x8_t c)
+{
+    uint8x8_t d, e;
+    uint16x8_t p1, pa, pb, pc;
+
+    p1 = vaddl_u8(a, b); /* a + b */
+    pc = vaddl_u8(c, c); /* c * 2 */
+    pa = vabdl_u8(b, c); /* pa */
+    pb = vabdl_u8(a, c); /* pb */
+    pc = vabdq_u16(p1, pc); /* pc */
+
+    p1 = vcleq_u16(pa, pb); /* pa <= pb */
+    pa = vcleq_u16(pa, pc); /* pa <= pc */
+    pb = vcleq_u16(pb, pc); /* pb <= pc */
+
+    p1 = vandq_u16(p1, pa); /* pa <= pb && pa <= pc */
+
+    d = vmovn_u16(pb);
+    e = vmovn_u16(p1);
+
+    d = vbsl_u8(d, b, c);
+    e = vbsl_u8(e, a, d);
+
+    return e;
+}
+
+static void defilter_paeth3(size_t rowbytes, unsigned char *row, const unsigned char *prev_row)
+{
+    unsigned char *rp = row;
+    const unsigned char *pp = prev_row;
+    unsigned char *rp_stop = row + rowbytes;
+
+    uint8x16_t vtmp;
+    uint8x8x2_t *vrpt;
+    uint8x8x2_t vrp;
+    uint8x8_t vlast = vdup_n_u8(0);
+    uint8x8x4_t vdest;
+    vdest.val[3] = vdup_n_u8(0);
+
+    vtmp = vld1q_u8(rp);
+    vrpt = png_ptr(uint8x8x2_t,&vtmp);
+    vrp = *vrpt;
+
+    for (; rp < rp_stop; pp += 12)
+    {
+        uint8x8x2_t *vppt;
+        uint8x8x2_t vpp;
+        uint8x8_t vtmp1, vtmp2, vtmp3;
+        uint32x2_t *temp_pointer;
+
+        vtmp = vld1q_u8(pp);
+        vppt = png_ptr(uint8x8x2_t,&vtmp);
+        vpp = *vppt;
+
+        vdest.val[0] = paeth_arm(vdest.val[3], vpp.val[0], vlast);
+        vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
+
+        vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3);
+        vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 3);
+        vdest.val[1] = paeth_arm(vdest.val[0], vtmp2, vpp.val[0]);
+        vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
+
+        vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 6);
+        vtmp3 = vext_u8(vpp.val[0], vpp.val[1], 6);
+        vdest.val[2] = paeth_arm(vdest.val[1], vtmp3, vtmp2);
+        vdest.val[2] = vadd_u8(vdest.val[2], vtmp1);
+
+        vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
+        vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1);
+
+        vtmp = vld1q_u8(rp + 12);
+        vrpt = png_ptr(uint8x8x2_t,&vtmp);
+        vrp = *vrpt;
+
+        vdest.val[3] = paeth_arm(vdest.val[2], vtmp2, vtmp3);
+        vdest.val[3] = vadd_u8(vdest.val[3], vtmp1);
+
+        vlast = vtmp2;
+
+        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0);
+        rp += 3;
+        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0);
+        rp += 3;
+        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0);
+        rp += 3;
+        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0);
+        rp += 3;
+    }
+}
+
+static void defilter_paeth4(size_t rowbytes, unsigned char *row, const unsigned char *prev_row)
+{
+    unsigned char *rp = row;
+    unsigned char *rp_stop = row + rowbytes;
+    const unsigned char *pp = prev_row;
+
+    uint8x8_t vlast = vdup_n_u8(0);
+    uint8x8x4_t vdest;
+    vdest.val[3] = vdup_n_u8(0);
+
+    for (; rp < rp_stop; rp += 16, pp += 16)
+    {
+        uint32x2x4_t vtmp;
+        uint8x8x4_t *vrpt, *vppt;
+        uint8x8x4_t vrp, vpp;
+        uint32x2x4_t *temp_pointer;
+        uint32x2x4_t vdest_val;
+
+        vtmp = vld4_u32(png_ptr(uint32_t,rp));
+        vrpt = png_ptr(uint8x8x4_t,&vtmp);
+        vrp = *vrpt;
+        vtmp = vld4_u32(png_ptrc(uint32_t,pp));
+        vppt = png_ptr(uint8x8x4_t,&vtmp);
+        vpp = *vppt;
+
+        vdest.val[0] = paeth_arm(vdest.val[3], vpp.val[0], vlast);
+        vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
+        vdest.val[1] = paeth_arm(vdest.val[0], vpp.val[1], vpp.val[0]);
+        vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]);
+        vdest.val[2] = paeth_arm(vdest.val[1], vpp.val[2], vpp.val[1]);
+        vdest.val[2] = vadd_u8(vdest.val[2], vrp.val[2]);
+        vdest.val[3] = paeth_arm(vdest.val[2], vpp.val[3], vpp.val[2]);
+        vdest.val[3] = vadd_u8(vdest.val[3], vrp.val[3]);
+
+        vlast = vpp.val[3];
+
+        vdest_val = png_ldr(uint32x2x4_t, &vdest);
+        vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0);
+    }
+}
+
+/* NEON optimised palette expansion functions
+ * Derived from palette_neon_intrinsics.c
+ *
+ * Copyright (c) 2018-2019 Cosmin Truta
+ * Copyright (c) 2017-2018 Arm Holdings. All rights reserved.
+ * Written by Richard Townsend <Richard.Townsend@arm.com>, February 2017.
+ *
+ * This code is derived from libpng source code.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in this file.
+ *
+ * Related: https://developer.arm.com/documentation/101964/latest/Color-palette-expansion
+ *
+ * The functions were refactored to iterate forward.
+ *
+ */
+
+/* Expands a palettized row into RGBA8. */
+static uint32_t expand_palette_rgba8_neon(unsigned char *row, const unsigned char *scanline, const unsigned char *plte, uint32_t width)
+{
+    const uint32_t scanline_stride = 4;
+    const uint32_t row_stride = scanline_stride * 4;
+    const uint32_t count = width / scanline_stride;
+    const uint32_t *palette = (const uint32_t*)plte;
+
+    if(!count) return 0;
+
+    uint32_t i;
+    uint32x4_t cur;
+    for(i=0; i < count; i++, scanline += scanline_stride)
+    {
+        cur = vld1q_dup_u32 (palette + scanline[0]);
+        cur = vld1q_lane_u32(palette + scanline[1], cur, 1);
+        cur = vld1q_lane_u32(palette + scanline[2], cur, 2);
+        cur = vld1q_lane_u32(palette + scanline[3], cur, 3);
+        vst1q_u32((uint32_t*)(row + i * row_stride), cur);
+    }
+
+    return count * scanline_stride;
+}
+
+/* Expands a palettized row into RGB8. */
+static uint32_t expand_palette_rgb8_neon(unsigned char *row, const unsigned char *scanline, const unsigned char *plte, uint32_t width)
+{
+    const uint32_t scanline_stride = 8;
+    const uint32_t row_stride = scanline_stride * 3;
+    const uint32_t count = width / scanline_stride;
+
+    if(!count) return 0;
+
+    uint32_t i;
+    uint8x8x3_t cur;
+    for(i=0; i < count; i++, scanline += scanline_stride)
+    {
+        cur = vld3_dup_u8 (plte + 3 * scanline[0]);
+        cur = vld3_lane_u8(plte + 3 * scanline[1], cur, 1);
+        cur = vld3_lane_u8(plte + 3 * scanline[2], cur, 2);
+        cur = vld3_lane_u8(plte + 3 * scanline[3], cur, 3);
+        cur = vld3_lane_u8(plte + 3 * scanline[4], cur, 4);
+        cur = vld3_lane_u8(plte + 3 * scanline[5], cur, 5);
+        cur = vld3_lane_u8(plte + 3 * scanline[6], cur, 6);
+        cur = vld3_lane_u8(plte + 3 * scanline[7], cur, 7);
+        vst3_u8(row + i * row_stride, cur);
+    }
+
+    return count * scanline_stride;
+}
+
+#endif /* SPNG_ARM */
diff --git a/cmake/externals/libspng/spng.h b/cmake/externals/libspng/spng.h
new file mode 100644
index 000000000..8f946337b
--- /dev/null
+++ b/cmake/externals/libspng/spng.h
@@ -0,0 +1,537 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+#ifndef SPNG_H
+#define SPNG_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(SPNG_STATIC)
+    #if defined(SPNG__BUILD)
+        #define SPNG_API __declspec(dllexport)
+    #else
+        #define SPNG_API __declspec(dllimport)
+    #endif
+#else
+    #define SPNG_API
+#endif
+
+#if defined(_MSC_VER)
+    #define SPNG_CDECL __cdecl
+#else
+    #define SPNG_CDECL
+#endif
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define SPNG_VERSION_MAJOR 0
+#define SPNG_VERSION_MINOR 7
+#define SPNG_VERSION_PATCH 4
+
+enum spng_errno
+{
+    SPNG_IO_ERROR = -2,
+    SPNG_IO_EOF = -1,
+    SPNG_OK = 0,
+    SPNG_EINVAL,
+    SPNG_EMEM,
+    SPNG_EOVERFLOW,
+    SPNG_ESIGNATURE,
+    SPNG_EWIDTH,
+    SPNG_EHEIGHT,
+    SPNG_EUSER_WIDTH,
+    SPNG_EUSER_HEIGHT,
+    SPNG_EBIT_DEPTH,
+    SPNG_ECOLOR_TYPE,
+    SPNG_ECOMPRESSION_METHOD,
+    SPNG_EFILTER_METHOD,
+    SPNG_EINTERLACE_METHOD,
+    SPNG_EIHDR_SIZE,
+    SPNG_ENOIHDR,
+    SPNG_ECHUNK_POS,
+    SPNG_ECHUNK_SIZE,
+    SPNG_ECHUNK_CRC,
+    SPNG_ECHUNK_TYPE,
+    SPNG_ECHUNK_UNKNOWN_CRITICAL,
+    SPNG_EDUP_PLTE,
+    SPNG_EDUP_CHRM,
+    SPNG_EDUP_GAMA,
+    SPNG_EDUP_ICCP,
+    SPNG_EDUP_SBIT,
+    SPNG_EDUP_SRGB,
+    SPNG_EDUP_BKGD,
+    SPNG_EDUP_HIST,
+    SPNG_EDUP_TRNS,
+    SPNG_EDUP_PHYS,
+    SPNG_EDUP_TIME,
+    SPNG_EDUP_OFFS,
+    SPNG_EDUP_EXIF,
+    SPNG_ECHRM,
+    SPNG_EPLTE_IDX,
+    SPNG_ETRNS_COLOR_TYPE,
+    SPNG_ETRNS_NO_PLTE,
+    SPNG_EGAMA,
+    SPNG_EICCP_NAME,
+    SPNG_EICCP_COMPRESSION_METHOD,
+    SPNG_ESBIT,
+    SPNG_ESRGB,
+    SPNG_ETEXT,
+    SPNG_ETEXT_KEYWORD,
+    SPNG_EZTXT,
+    SPNG_EZTXT_COMPRESSION_METHOD,
+    SPNG_EITXT,
+    SPNG_EITXT_COMPRESSION_FLAG,
+    SPNG_EITXT_COMPRESSION_METHOD,
+    SPNG_EITXT_LANG_TAG,
+    SPNG_EITXT_TRANSLATED_KEY,
+    SPNG_EBKGD_NO_PLTE,
+    SPNG_EBKGD_PLTE_IDX,
+    SPNG_EHIST_NO_PLTE,
+    SPNG_EPHYS,
+    SPNG_ESPLT_NAME,
+    SPNG_ESPLT_DUP_NAME,
+    SPNG_ESPLT_DEPTH,
+    SPNG_ETIME,
+    SPNG_EOFFS,
+    SPNG_EEXIF,
+    SPNG_EIDAT_TOO_SHORT,
+    SPNG_EIDAT_STREAM,
+    SPNG_EZLIB,
+    SPNG_EFILTER,
+    SPNG_EBUFSIZ,
+    SPNG_EIO,
+    SPNG_EOF,
+    SPNG_EBUF_SET,
+    SPNG_EBADSTATE,
+    SPNG_EFMT,
+    SPNG_EFLAGS,
+    SPNG_ECHUNKAVAIL,
+    SPNG_ENCODE_ONLY,
+    SPNG_EOI,
+    SPNG_ENOPLTE,
+    SPNG_ECHUNK_LIMITS,
+    SPNG_EZLIB_INIT,
+    SPNG_ECHUNK_STDLEN,
+    SPNG_EINTERNAL,
+    SPNG_ECTXTYPE,
+    SPNG_ENOSRC,
+    SPNG_ENODST,
+    SPNG_EOPSTATE,
+    SPNG_ENOTFINAL,
+};
+
+enum spng_text_type
+{
+    SPNG_TEXT = 1,
+    SPNG_ZTXT = 2,
+    SPNG_ITXT = 3
+};
+
+enum spng_color_type
+{
+    SPNG_COLOR_TYPE_GRAYSCALE = 0,
+    SPNG_COLOR_TYPE_TRUECOLOR = 2,
+    SPNG_COLOR_TYPE_INDEXED = 3,
+    SPNG_COLOR_TYPE_GRAYSCALE_ALPHA = 4,
+    SPNG_COLOR_TYPE_TRUECOLOR_ALPHA = 6
+};
+
+enum spng_filter
+{
+    SPNG_FILTER_NONE = 0,
+    SPNG_FILTER_SUB = 1,
+    SPNG_FILTER_UP = 2,
+    SPNG_FILTER_AVERAGE = 3,
+    SPNG_FILTER_PAETH = 4
+};
+
+enum spng_filter_choice
+{
+    SPNG_DISABLE_FILTERING = 0,
+    SPNG_FILTER_CHOICE_NONE = 8,
+    SPNG_FILTER_CHOICE_SUB = 16,
+    SPNG_FILTER_CHOICE_UP = 32,
+    SPNG_FILTER_CHOICE_AVG = 64,
+    SPNG_FILTER_CHOICE_PAETH = 128,
+    SPNG_FILTER_CHOICE_ALL = (8|16|32|64|128)
+};
+
+enum spng_interlace_method
+{
+    SPNG_INTERLACE_NONE = 0,
+    SPNG_INTERLACE_ADAM7 = 1
+};
+
+/* Channels are always in byte-order */
+enum spng_format
+{
+    SPNG_FMT_RGBA8 = 1,
+    SPNG_FMT_RGBA16 = 2,
+    SPNG_FMT_RGB8 = 4,
+
+    /* Partially implemented, see documentation */
+    SPNG_FMT_GA8 = 16,
+    SPNG_FMT_GA16 = 32,
+    SPNG_FMT_G8 = 64,
+
+    /* No conversion or scaling */
+    SPNG_FMT_PNG = 256,
+    SPNG_FMT_RAW = 512  /* big-endian (everything else is host-endian) */
+};
+
+enum spng_ctx_flags
+{
+    SPNG_CTX_IGNORE_ADLER32 = 1, /* Ignore checksum in DEFLATE streams */
+    SPNG_CTX_ENCODER = 2 /* Create an encoder context */
+};
+
+enum spng_decode_flags
+{
+    SPNG_DECODE_USE_TRNS = 1, /* Deprecated */
+    SPNG_DECODE_USE_GAMA = 2, /* Deprecated */
+    SPNG_DECODE_USE_SBIT = 8, /* Undocumented */
+
+    SPNG_DECODE_TRNS = 1, /* Apply transparency */
+    SPNG_DECODE_GAMMA = 2, /* Apply gamma correction */
+    SPNG_DECODE_PROGRESSIVE = 256 /* Initialize for progressive reads */
+};
+
+enum spng_crc_action
+{
+    /* Default for critical chunks */
+    SPNG_CRC_ERROR = 0,
+
+    /* Discard chunk, invalid for critical chunks.
+       Since v0.6.2: default for ancillary chunks */
+    SPNG_CRC_DISCARD = 1,
+
+    /* Ignore and don't calculate checksum.
+       Since v0.6.2: also ignores checksums in DEFLATE streams */
+    SPNG_CRC_USE = 2
+};
+
+enum spng_encode_flags
+{
+    SPNG_ENCODE_PROGRESSIVE = 1, /* Initialize for progressive writes */
+    SPNG_ENCODE_FINALIZE = 2, /* Finalize PNG after encoding image */
+};
+
+struct spng_ihdr
+{
+    uint32_t width;
+    uint32_t height;
+    uint8_t bit_depth;
+    uint8_t color_type;
+    uint8_t compression_method;
+    uint8_t filter_method;
+    uint8_t interlace_method;
+};
+
+struct spng_plte_entry
+{
+    uint8_t red;
+    uint8_t green;
+    uint8_t blue;
+
+    uint8_t alpha; /* Reserved for internal use */
+};
+
+struct spng_plte
+{
+    uint32_t n_entries;
+    struct spng_plte_entry entries[256];
+};
+
+struct spng_trns
+{
+    uint16_t gray;
+
+    uint16_t red;
+    uint16_t green;
+    uint16_t blue;
+
+    uint32_t n_type3_entries;
+    uint8_t type3_alpha[256];
+};
+
+struct spng_chrm_int
+{
+    uint32_t white_point_x;
+    uint32_t white_point_y;
+    uint32_t red_x;
+    uint32_t red_y;
+    uint32_t green_x;
+    uint32_t green_y;
+    uint32_t blue_x;
+    uint32_t blue_y;
+};
+
+struct spng_chrm
+{
+    double white_point_x;
+    double white_point_y;
+    double red_x;
+    double red_y;
+    double green_x;
+    double green_y;
+    double blue_x;
+    double blue_y;
+};
+
+struct spng_iccp
+{
+    char profile_name[80];
+    size_t profile_len;
+    char *profile;
+};
+
+struct spng_sbit
+{
+    uint8_t grayscale_bits;
+    uint8_t red_bits;
+    uint8_t green_bits;
+    uint8_t blue_bits;
+    uint8_t alpha_bits;
+};
+
+struct spng_text
+{
+    char keyword[80];
+    int type;
+
+    size_t length;
+    char *text;
+
+    uint8_t compression_flag; /* iTXt only */
+    uint8_t compression_method; /* iTXt, ztXt only */
+    char *language_tag; /* iTXt only */
+    char *translated_keyword; /* iTXt only */
+};
+
+struct spng_bkgd
+{
+    uint16_t gray; /* Only for gray/gray alpha */
+    uint16_t red;
+    uint16_t green;
+    uint16_t blue;
+    uint16_t plte_index; /* Only for indexed color */
+};
+
+struct spng_hist
+{
+    uint16_t frequency[256];
+};
+
+struct spng_phys
+{
+    uint32_t ppu_x, ppu_y;
+    uint8_t unit_specifier;
+};
+
+struct spng_splt_entry
+{
+    uint16_t red;
+    uint16_t green;
+    uint16_t blue;
+    uint16_t alpha;
+    uint16_t frequency;
+};
+
+struct spng_splt
+{
+    char name[80];
+    uint8_t sample_depth;
+    uint32_t n_entries;
+    struct spng_splt_entry *entries;
+};
+
+struct spng_time
+{
+    uint16_t year;
+    uint8_t month;
+    uint8_t day;
+    uint8_t hour;
+    uint8_t minute;
+    uint8_t second;
+};
+
+struct spng_offs
+{
+    int32_t x, y;
+    uint8_t unit_specifier;
+};
+
+struct spng_exif
+{
+    size_t length;
+    char *data;
+};
+
+struct spng_chunk
+{
+    size_t offset;
+    uint32_t length;
+    uint8_t type[4];
+    uint32_t crc;
+};
+
+enum spng_location
+{
+    SPNG_AFTER_IHDR = 1,
+    SPNG_AFTER_PLTE = 2,
+    SPNG_AFTER_IDAT = 8,
+};
+
+struct spng_unknown_chunk
+{
+    uint8_t type[4];
+    size_t length;
+    void *data;
+    enum spng_location location;
+};
+
+enum spng_option
+{
+    SPNG_KEEP_UNKNOWN_CHUNKS = 1,
+
+    SPNG_IMG_COMPRESSION_LEVEL,
+    SPNG_IMG_WINDOW_BITS,
+    SPNG_IMG_MEM_LEVEL,
+    SPNG_IMG_COMPRESSION_STRATEGY,
+
+    SPNG_TEXT_COMPRESSION_LEVEL,
+    SPNG_TEXT_WINDOW_BITS,
+    SPNG_TEXT_MEM_LEVEL,
+    SPNG_TEXT_COMPRESSION_STRATEGY,
+
+    SPNG_FILTER_CHOICE,
+    SPNG_CHUNK_COUNT_LIMIT,
+    SPNG_ENCODE_TO_BUFFER,
+};
+
+typedef void* SPNG_CDECL spng_malloc_fn(size_t size);
+typedef void* SPNG_CDECL spng_realloc_fn(void* ptr, size_t size);
+typedef void* SPNG_CDECL spng_calloc_fn(size_t count, size_t size);
+typedef void SPNG_CDECL spng_free_fn(void* ptr);
+
+struct spng_alloc
+{
+    spng_malloc_fn *malloc_fn;
+    spng_realloc_fn *realloc_fn;
+    spng_calloc_fn *calloc_fn;
+    spng_free_fn *free_fn;
+};
+
+struct spng_row_info
+{
+    uint32_t scanline_idx;
+    uint32_t row_num; /* deinterlaced row index */
+    int pass;
+    uint8_t filter;
+};
+
+typedef struct spng_ctx spng_ctx;
+
+typedef int spng_read_fn(spng_ctx *ctx, void *user, void *dest, size_t length);
+typedef int spng_write_fn(spng_ctx *ctx, void *user, void *src, size_t length);
+
+typedef int spng_rw_fn(spng_ctx *ctx, void *user, void *dst_src, size_t length);
+
+SPNG_API spng_ctx *spng_ctx_new(int flags);
+SPNG_API spng_ctx *spng_ctx_new2(struct spng_alloc *alloc, int flags);
+SPNG_API void spng_ctx_free(spng_ctx *ctx);
+
+SPNG_API int spng_set_png_buffer(spng_ctx *ctx, const void *buf, size_t size);
+SPNG_API int spng_set_png_stream(spng_ctx *ctx, spng_rw_fn *rw_func, void *user);
+SPNG_API int spng_set_png_file(spng_ctx *ctx, FILE *file);
+
+SPNG_API void *spng_get_png_buffer(spng_ctx *ctx, size_t *len, int *error);
+
+SPNG_API int spng_set_image_limits(spng_ctx *ctx, uint32_t width, uint32_t height);
+SPNG_API int spng_get_image_limits(spng_ctx *ctx, uint32_t *width, uint32_t *height);
+
+SPNG_API int spng_set_chunk_limits(spng_ctx *ctx, size_t chunk_size, size_t cache_size);
+SPNG_API int spng_get_chunk_limits(spng_ctx *ctx, size_t *chunk_size, size_t *cache_size);
+
+SPNG_API int spng_set_crc_action(spng_ctx *ctx, int critical, int ancillary);
+
+SPNG_API int spng_set_option(spng_ctx *ctx, enum spng_option option, int value);
+SPNG_API int spng_get_option(spng_ctx *ctx, enum spng_option option, int *value);
+
+SPNG_API int spng_decoded_image_size(spng_ctx *ctx, int fmt, size_t *len);
+
+/* Decode */
+SPNG_API int spng_decode_image(spng_ctx *ctx, void *out, size_t len, int fmt, int flags);
+
+/* Progressive decode */
+SPNG_API int spng_decode_scanline(spng_ctx *ctx, void *out, size_t len);
+SPNG_API int spng_decode_row(spng_ctx *ctx, void *out, size_t len);
+SPNG_API int spng_decode_chunks(spng_ctx *ctx);
+
+/* Encode/decode */
+SPNG_API int spng_get_row_info(spng_ctx *ctx, struct spng_row_info *row_info);
+
+/* Encode */
+SPNG_API int spng_encode_image(spng_ctx *ctx, const void *img, size_t len, int fmt, int flags);
+
+/* Progressive encode */
+SPNG_API int spng_encode_scanline(spng_ctx *ctx, const void *scanline, size_t len);
+SPNG_API int spng_encode_row(spng_ctx *ctx, const void *row, size_t len);
+SPNG_API int spng_encode_chunks(spng_ctx *ctx);
+
+SPNG_API int spng_get_ihdr(spng_ctx *ctx, struct spng_ihdr *ihdr);
+SPNG_API int spng_get_plte(spng_ctx *ctx, struct spng_plte *plte);
+SPNG_API int spng_get_trns(spng_ctx *ctx, struct spng_trns *trns);
+SPNG_API int spng_get_chrm(spng_ctx *ctx, struct spng_chrm *chrm);
+SPNG_API int spng_get_chrm_int(spng_ctx *ctx, struct spng_chrm_int *chrm_int);
+SPNG_API int spng_get_gama(spng_ctx *ctx, double *gamma);
+SPNG_API int spng_get_gama_int(spng_ctx *ctx, uint32_t *gama_int);
+SPNG_API int spng_get_iccp(spng_ctx *ctx, struct spng_iccp *iccp);
+SPNG_API int spng_get_sbit(spng_ctx *ctx, struct spng_sbit *sbit);
+SPNG_API int spng_get_srgb(spng_ctx *ctx, uint8_t *rendering_intent);
+SPNG_API int spng_get_text(spng_ctx *ctx, struct spng_text *text, uint32_t *n_text);
+SPNG_API int spng_get_bkgd(spng_ctx *ctx, struct spng_bkgd *bkgd);
+SPNG_API int spng_get_hist(spng_ctx *ctx, struct spng_hist *hist);
+SPNG_API int spng_get_phys(spng_ctx *ctx, struct spng_phys *phys);
+SPNG_API int spng_get_splt(spng_ctx *ctx, struct spng_splt *splt, uint32_t *n_splt);
+SPNG_API int spng_get_time(spng_ctx *ctx, struct spng_time *time);
+SPNG_API int spng_get_unknown_chunks(spng_ctx *ctx, struct spng_unknown_chunk *chunks, uint32_t *n_chunks);
+
+/* Official extensions */
+SPNG_API int spng_get_offs(spng_ctx *ctx, struct spng_offs *offs);
+SPNG_API int spng_get_exif(spng_ctx *ctx, struct spng_exif *exif);
+
+
+SPNG_API int spng_set_ihdr(spng_ctx *ctx, struct spng_ihdr *ihdr);
+SPNG_API int spng_set_plte(spng_ctx *ctx, struct spng_plte *plte);
+SPNG_API int spng_set_trns(spng_ctx *ctx, struct spng_trns *trns);
+SPNG_API int spng_set_chrm(spng_ctx *ctx, struct spng_chrm *chrm);
+SPNG_API int spng_set_chrm_int(spng_ctx *ctx, struct spng_chrm_int *chrm_int);
+SPNG_API int spng_set_gama(spng_ctx *ctx, double gamma);
+SPNG_API int spng_set_gama_int(spng_ctx *ctx, uint32_t gamma);
+SPNG_API int spng_set_iccp(spng_ctx *ctx, struct spng_iccp *iccp);
+SPNG_API int spng_set_sbit(spng_ctx *ctx, struct spng_sbit *sbit);
+SPNG_API int spng_set_srgb(spng_ctx *ctx, uint8_t rendering_intent);
+SPNG_API int spng_set_text(spng_ctx *ctx, struct spng_text *text, uint32_t n_text);
+SPNG_API int spng_set_bkgd(spng_ctx *ctx, struct spng_bkgd *bkgd);
+SPNG_API int spng_set_hist(spng_ctx *ctx, struct spng_hist *hist);
+SPNG_API int spng_set_phys(spng_ctx *ctx, struct spng_phys *phys);
+SPNG_API int spng_set_splt(spng_ctx *ctx, struct spng_splt *splt, uint32_t n_splt);
+SPNG_API int spng_set_time(spng_ctx *ctx, struct spng_time *time);
+SPNG_API int spng_set_unknown_chunks(spng_ctx *ctx, struct spng_unknown_chunk *chunks, uint32_t n_chunks);
+
+/* Official extensions */
+SPNG_API int spng_set_offs(spng_ctx *ctx, struct spng_offs *offs);
+SPNG_API int spng_set_exif(spng_ctx *ctx, struct spng_exif *exif);
+
+
+SPNG_API const char *spng_strerror(int err);
+SPNG_API const char *spng_version_string(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SPNG_H */
diff --git a/cmake/externals/opencv.cmake b/cmake/externals/opencv.cmake
index b972266ac..e3fa7e904 100644
--- a/cmake/externals/opencv.cmake
+++ b/cmake/externals/opencv.cmake
@@ -1,167 +1,16 @@
-set(BUILD_ANDROID_EXAMPLES      OFF CACHE INTERNAL "")
-set(BUILD_ANDROID_PROJECTS      OFF CACHE INTERNAL "")
-set(BUILD_ANDROID_SERVICE       OFF CACHE INTERNAL "")
-set(BUILD_DOCS                  OFF CACHE INTERNAL "")
-set(BUILD_FAT_JAVA_LIB          OFF CACHE INTERNAL "")
-set(BUILD_IPP_IW                OFF CACHE INTERNAL "")
-set(BUILD_ITT                   OFF CACHE INTERNAL "")
-set(BUILD_JASPER                OFF CACHE INTERNAL "")
-set(BUILD_JAVA                  OFF CACHE INTERNAL "")
-set(BUILD_JPEG                  OFF CACHE INTERNAL "")
-set(BUILD_OBJC                  OFF CACHE INTERNAL "")
-set(BUILD_OPENJPEG              OFF CACHE INTERNAL "")
-set(BUILD_PNG                   OFF CACHE INTERNAL "")
-set(BUILD_opencv_apps           OFF CACHE INTERNAL "")
-set(BUILD_opencv_calib3d        OFF CACHE INTERNAL "")
-set(BUILD_opencv_dnn            OFF CACHE INTERNAL "")
-set(BUILD_opencv_features2d     OFF CACHE INTERNAL "")
-set(BUILD_opencv_flann          OFF CACHE INTERNAL "")
-set(BUILD_opencv_gapi           OFF CACHE INTERNAL "")
-set(BUILD_opencv_highgui        OFF CACHE INTERNAL "")
-set(BUILD_opencv_imgcodecs      OFF CACHE INTERNAL "")
-set(BUILD_opencv_java           OFF CACHE INTERNAL "")
-set(BUILD_opencv_js             OFF CACHE INTERNAL "")
-set(BUILD_opencv_ml             OFF CACHE INTERNAL "")
-set(BUILD_opencv_objc           OFF CACHE INTERNAL "")
-set(BUILD_opencv_objdetect      OFF CACHE INTERNAL "")
-set(BUILD_opencv_photo          OFF CACHE INTERNAL "")
-set(BUILD_opencv_python2        OFF CACHE INTERNAL "")
-set(BUILD_opencv_python3        OFF CACHE INTERNAL "")
-set(BUILD_opencv_stitching      OFF CACHE INTERNAL "")
-set(BUILD_opencv_ts             OFF CACHE INTERNAL "")
-set(BUILD_opencv_video          OFF CACHE INTERNAL "")
-set(BUILD_opencv_videoio        OFF CACHE INTERNAL "")
-set(BUILD_opencv_world          OFF CACHE INTERNAL "")
-set(BUILD_OPENEXR               OFF CACHE INTERNAL "")
-set(BUILD_TBB                   OFF CACHE INTERNAL "")
-set(BUILD_TIFF                  OFF CACHE INTERNAL "")
-set(BUILD_WEBP                  OFF CACHE INTERNAL "")
-set(BUILD_WITH_STATIC_CRT       OFF CACHE INTERNAL "")
-if(OCOS_BUILD_APPLE_FRAMEWORK)
-  # tell OpenCV to build zlib so we can link to the static library
-  set(BUILD_ZLIB                ON  CACHE INTERNAL "")
-else()
-  set(BUILD_ZLIB                OFF CACHE INTERNAL "")
-endif()
-set(ENABLE_FAST_MATH            OFF CACHE INTERNAL "")
-set(ENABLE_PRECOMPILED_HEADERS  OFF CACHE INTERNAL "")
-set(WITH_ANDROID_MEDIANDK       OFF CACHE INTERNAL "")
-set(WITH_AVFOUNDATION           OFF CACHE INTERNAL "")
-set(WITH_CAP_IOS                OFF CACHE INTERNAL "")
-set(WITH_CAROTENE               OFF CACHE INTERNAL "")
-set(WITH_CLP                    OFF CACHE INTERNAL "")
-set(WITH_CPUFEATURES            OFF CACHE INTERNAL "")
-set(WITH_DIRECTX                OFF CACHE INTERNAL "")
-set(WITH_DSHOW                  OFF CACHE INTERNAL "")
-set(WITH_EIGEN                  OFF CACHE INTERNAL "")
-set(WITH_FFMPEG                 OFF CACHE INTERNAL "")
-set(WITH_GDCM                   OFF CACHE INTERNAL "")
-set(WITH_GSTREAMER              OFF CACHE INTERNAL "")
-set(WITH_GTK                    OFF CACHE INTERNAL "")
-set(WITH_HALIDE                 OFF CACHE INTERNAL "")
-set(WITH_HPX                    OFF CACHE INTERNAL "")
-set(WITH_IMGCODEC_HDR           OFF CACHE INTERNAL "")
-set(WITH_IMGCODEC_PFM           OFF CACHE INTERNAL "")
-set(WITH_IMGCODEC_PXM           OFF CACHE INTERNAL "")
-set(WITH_IMGCODEC_SUNRASTER     OFF CACHE INTERNAL "")
-set(WITH_INF_ENGINE             OFF CACHE INTERNAL "")
-set(WITH_IPP                    OFF CACHE INTERNAL "")
-set(WITH_ITT                    OFF CACHE INTERNAL "")
-set(WITH_JASPER                 OFF CACHE INTERNAL "")
-set(WITH_JPEG                   OFF CACHE INTERNAL "")
-set(WITH_MSMF                   OFF CACHE INTERNAL "")
-set(WITH_NGRAPH                 OFF CACHE INTERNAL "")
-set(WITH_ONNX                   OFF CACHE INTERNAL "")
-set(WITH_OPENCL                 OFF CACHE INTERNAL "")
-set(WITH_OPENCL_SVM             OFF CACHE INTERNAL "")
-set(WITH_OPENEXR                OFF CACHE INTERNAL "")
-set(WITH_OPENJPEG               OFF CACHE INTERNAL "")
-set(WITH_OPENMP                 OFF CACHE INTERNAL "")
-set(WITH_OPENVX                 OFF CACHE INTERNAL "")
-set(WITH_PNG                    OFF CACHE INTERNAL "")
-set(WITH_PROTOBUF               OFF CACHE INTERNAL "")
-set(WITH_PTHREADS_PF            OFF CACHE INTERNAL "")
-set(WITH_QUIRC                  OFF CACHE INTERNAL "")
-set(WITH_TBB                    OFF CACHE INTERNAL "")
-set(WITH_TENGINE                OFF CACHE INTERNAL "")
-set(WITH_TIFF                   OFF CACHE INTERNAL "")
-set(WITH_V4L                    OFF CACHE INTERNAL "")
-set(WITH_VULKAN                 OFF CACHE INTERNAL "")
-set(WITH_WEBP                   OFF CACHE INTERNAL "")
-set(WITH_WIN32UI                OFF CACHE INTERNAL "")
-
 if (OCOS_ENABLE_OPENCV_CODECS)
-  set(BUILD_opencv_imgcodecs  ON CACHE INTERNAL "")
-
-  set(BUILD_JPEG              ON CACHE INTERNAL "")
-  set(BUILD_PNG               ON CACHE INTERNAL "")
-
-  set(WITH_JPEG               ON CACHE INTERNAL "")
-  set(WITH_PNG                ON CACHE INTERNAL "")
-endif()
-
-set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "")
-set(BUILD_DOCS        OFF CACHE INTERNAL "")
-set(BUILD_EXAMPLES    OFF CACHE INTERNAL "")
-set(BUILD_TESTS       OFF CACHE INTERNAL "")
-
-if(IOS)
-  # copy what OpenCV's platforms/ios/build_framework.py does and set CPU_BASELINE=DETECT
-  # https://github.com/opencv/opencv/blob/4223495e6cd67011f86b8ecd9be1fa105018f3b1/platforms/ios/build_framework.py#L253
-  set(CPU_BASELINE DETECT)
-endif()
+    # Include the subdirectories for libspng and libjpeg
+    add_subdirectory(${PROJECT_SOURCE_DIR}/cmake/externals/libspng)
+    add_subdirectory(${PROJECT_SOURCE_DIR}/cmake/externals/libjpeg)
 
-if (CMAKE_SYSTEM_NAME MATCHES "Darwin")
-  # error   _png_do_read_transformations in liblibpng.a(pngrtran.c.o) not found for architecture arm64
-  # workaround to disable NEON optimizations
-  add_definitions(-DPNG_ARM_NEON_IMPLEMENTATION=0)
-  add_definitions(-DPNG_ARM_NEON_OPT=0)
-endif()
-
-if (MSVC AND CMAKE_GENERATOR_PLATFORM)
-  string(TOLOWER ${CMAKE_GENERATOR_PLATFORM} _GEN_PLATFORM)
-  if (${_GEN_PLATFORM} MATCHES "arm|arm64")
-    set(OPENCV_SKIP_SYSTEM_PROCESSOR_DETECTION ON)
-  endif()
-endif()
-
-FetchContent_Declare(
-    opencv
-    GIT_REPOSITORY https://github.com/opencv/opencv.git
-    GIT_TAG        4.5.4
-    GIT_SHALLOW    TRUE
-    -DBUILD_DOCS:BOOL=FALSE
-    -DBUILD_EXAMPLES:BOOL=FALSE
-    -DBUILD_TESTS:BOOL=FALSE
-    -DBUILD_SHARED_LIBS:BOOL=FALSE
-    -DCMAKE_INSTALL_PREFIX:PATH=${CMAKE_CURRENT_BINARY_DIR}/opencv
-    -DCV_TRACE:BOOL=FALSE
-    PATCH_COMMAND git checkout . && git apply --whitespace=fix --ignore-space-change --ignore-whitespace ${CMAKE_CURRENT_SOURCE_DIR}/cmake/externals/opencv-no-rtti.patch
-)
-
-FetchContent_MakeAvailable(opencv)
-set(opencv_INCLUDE_DIRS "")
-list(APPEND opencv_INCLUDE_DIRS ${OPENCV_CONFIG_FILE_INCLUDE_DIR})
-list(APPEND opencv_INCLUDE_DIRS
-    ${OPENCV_MODULE_opencv_core_LOCATION}/include
-    ${OPENCV_MODULE_opencv_imgproc_LOCATION}/include)
-set(opencv_LIBS "")
-list(APPEND opencv_LIBS opencv_core opencv_imgproc)
-
-if (OCOS_ENABLE_OPENCV_CODECS)
-    list(APPEND opencv_INCLUDE_DIRS ${OPENCV_MODULE_opencv_imgcodecs_LOCATION}/include)
-    list(APPEND opencv_LIBS opencv_imgcodecs)
-endif()
+    # Specify where the static libraries are built
+    set(LIBRARY_OUTPUT_PATH ${CMAKE_CURRENT_BINARY_DIR}/cvout)
 
-# unset it to avoid affecting other projects.
-unset(EXECUTABLE_OUTPUT_PATH CACHE)
+    # Add the libraries
+    add_library(libspng STATIC IMPORTED)
+    add_library(libjpeg STATIC IMPORTED)
 
-if (CMAKE_SYSTEM_NAME MATCHES "Windows")
-    set(opencv_projs gen_opencv_java_source gen_opencv_js_source gen_opencv_python_source)
-    list(APPEND opencv_projs gen_opencv_objc_source gen_opencv_objc_source_ios gen_opencv_objc_source_osx)
-    list(APPEND opencv_projs opencv_highgui_plugins opencv_videoio_plugins)
-    foreach(p ${opencv_projs})
-        set_target_properties(${p} PROPERTIES FOLDER "externals/opencv")
-        set_target_properties(${p} PROPERTIES EXCLUDE_FROM_ALL TRUE EXCLUDE_FROM_DEFAULT_BUILD TRUE)
-    endforeach()
-endif()
+    # Set the properties for the libraries
+    set_property(TARGET libspng PROPERTY IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/cmake/externals/cvout/libspng.a)
+    set_property(TARGET libjpeg PROPERTY IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/cmake/externals/cvout/libjpeg.a)
+endif()
\ No newline at end of file

From 181dc854091b06266469d620a7a8cf3b713c09cd Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Wed, 15 May 2024 13:26:53 -0700
Subject: [PATCH 2/3] try with fetch content populate

---
 cmake/externals/cvout/.gitkeep         |    0
 cmake/externals/libjpeg/CMakeLists.txt |   30 -
 cmake/externals/libjpeg/README         |  371 --
 cmake/externals/libjpeg/change.log     |  502 --
 cmake/externals/libjpeg/jaricom.c      |  153 -
 cmake/externals/libjpeg/jcapimin.c     |  288 -
 cmake/externals/libjpeg/jcapistd.c     |  162 -
 cmake/externals/libjpeg/jcarith.c      |  945 ----
 cmake/externals/libjpeg/jccoefct.c     |  456 --
 cmake/externals/libjpeg/jccolor.c      |  598 --
 cmake/externals/libjpeg/jcdctmgr.c     |  466 --
 cmake/externals/libjpeg/jchuff.c       | 1656 ------
 cmake/externals/libjpeg/jcinit.c       |  249 -
 cmake/externals/libjpeg/jcmainct.c     |  297 -
 cmake/externals/libjpeg/jcmarker.c     |  717 ---
 cmake/externals/libjpeg/jcmaster.c     |  675 ---
 cmake/externals/libjpeg/jcomapi.c      |  244 -
 cmake/externals/libjpeg/jconfig.h      |   64 -
 cmake/externals/libjpeg/jcparam.c      |  591 --
 cmake/externals/libjpeg/jcprepct.c     |  358 --
 cmake/externals/libjpeg/jcsample.c     |  545 --
 cmake/externals/libjpeg/jctrans.c      |  399 --
 cmake/externals/libjpeg/jdapimin.c     |  412 --
 cmake/externals/libjpeg/jdapistd.c     |  276 -
 cmake/externals/libjpeg/jdarith.c      |  796 ---
 cmake/externals/libjpeg/jdatadst.c     |  263 -
 cmake/externals/libjpeg/jdatasrc.c     |  271 -
 cmake/externals/libjpeg/jdcoefct.c     |  744 ---
 cmake/externals/libjpeg/jdcolor.c      |  769 ---
 cmake/externals/libjpeg/jdct.h         |  409 --
 cmake/externals/libjpeg/jddctmgr.c     |  384 --
 cmake/externals/libjpeg/jdhuff.c       | 1559 ------
 cmake/externals/libjpeg/jdinput.c      |  657 ---
 cmake/externals/libjpeg/jdmainct.c     |  511 --
 cmake/externals/libjpeg/jdmarker.c     | 1505 -----
 cmake/externals/libjpeg/jdmaster.c     |  532 --
 cmake/externals/libjpeg/jdmerge.c      |  437 --
 cmake/externals/libjpeg/jdpostct.c     |  290 -
 cmake/externals/libjpeg/jdsample.c     |  341 --
 cmake/externals/libjpeg/jdtrans.c      |  140 -
 cmake/externals/libjpeg/jerror.c       |  253 -
 cmake/externals/libjpeg/jerror.h       |  304 --
 cmake/externals/libjpeg/jfdctflt.c     |  176 -
 cmake/externals/libjpeg/jfdctfst.c     |  232 -
 cmake/externals/libjpeg/jfdctint.c     | 4415 ---------------
 cmake/externals/libjpeg/jidctflt.c     |  238 -
 cmake/externals/libjpeg/jidctfst.c     |  351 --
 cmake/externals/libjpeg/jidctint.c     | 5240 ------------------
 cmake/externals/libjpeg/jinclude.h     |  157 -
 cmake/externals/libjpeg/jmemansi.c     |  167 -
 cmake/externals/libjpeg/jmemmgr.c      | 1115 ----
 cmake/externals/libjpeg/jmemnobs.c     |  113 -
 cmake/externals/libjpeg/jmemsys.h      |  198 -
 cmake/externals/libjpeg/jmorecfg.h     |  457 --
 cmake/externals/libjpeg/jpegint.h      |  445 --
 cmake/externals/libjpeg/jpeglib.h      | 1183 ----
 cmake/externals/libjpeg/jquant1.c      |  851 ---
 cmake/externals/libjpeg/jquant2.c      | 1311 -----
 cmake/externals/libjpeg/jutils.c       |  224 -
 cmake/externals/libjpeg/jversion.h     |   14 -
 cmake/externals/libspng/CMakeLists.txt |   39 -
 cmake/externals/libspng/LICENSE        |   25 -
 cmake/externals/libspng/spng.c         | 6980 ------------------------
 cmake/externals/libspng/spng.h         |  537 --
 cmake/externals/opencv.cmake           |  179 +-
 65 files changed, 167 insertions(+), 45099 deletions(-)
 delete mode 100644 cmake/externals/cvout/.gitkeep
 delete mode 100644 cmake/externals/libjpeg/CMakeLists.txt
 delete mode 100644 cmake/externals/libjpeg/README
 delete mode 100644 cmake/externals/libjpeg/change.log
 delete mode 100644 cmake/externals/libjpeg/jaricom.c
 delete mode 100644 cmake/externals/libjpeg/jcapimin.c
 delete mode 100644 cmake/externals/libjpeg/jcapistd.c
 delete mode 100644 cmake/externals/libjpeg/jcarith.c
 delete mode 100644 cmake/externals/libjpeg/jccoefct.c
 delete mode 100644 cmake/externals/libjpeg/jccolor.c
 delete mode 100644 cmake/externals/libjpeg/jcdctmgr.c
 delete mode 100644 cmake/externals/libjpeg/jchuff.c
 delete mode 100644 cmake/externals/libjpeg/jcinit.c
 delete mode 100644 cmake/externals/libjpeg/jcmainct.c
 delete mode 100644 cmake/externals/libjpeg/jcmarker.c
 delete mode 100644 cmake/externals/libjpeg/jcmaster.c
 delete mode 100644 cmake/externals/libjpeg/jcomapi.c
 delete mode 100644 cmake/externals/libjpeg/jconfig.h
 delete mode 100644 cmake/externals/libjpeg/jcparam.c
 delete mode 100644 cmake/externals/libjpeg/jcprepct.c
 delete mode 100644 cmake/externals/libjpeg/jcsample.c
 delete mode 100644 cmake/externals/libjpeg/jctrans.c
 delete mode 100644 cmake/externals/libjpeg/jdapimin.c
 delete mode 100644 cmake/externals/libjpeg/jdapistd.c
 delete mode 100644 cmake/externals/libjpeg/jdarith.c
 delete mode 100644 cmake/externals/libjpeg/jdatadst.c
 delete mode 100644 cmake/externals/libjpeg/jdatasrc.c
 delete mode 100644 cmake/externals/libjpeg/jdcoefct.c
 delete mode 100644 cmake/externals/libjpeg/jdcolor.c
 delete mode 100644 cmake/externals/libjpeg/jdct.h
 delete mode 100644 cmake/externals/libjpeg/jddctmgr.c
 delete mode 100644 cmake/externals/libjpeg/jdhuff.c
 delete mode 100644 cmake/externals/libjpeg/jdinput.c
 delete mode 100644 cmake/externals/libjpeg/jdmainct.c
 delete mode 100644 cmake/externals/libjpeg/jdmarker.c
 delete mode 100644 cmake/externals/libjpeg/jdmaster.c
 delete mode 100644 cmake/externals/libjpeg/jdmerge.c
 delete mode 100644 cmake/externals/libjpeg/jdpostct.c
 delete mode 100644 cmake/externals/libjpeg/jdsample.c
 delete mode 100644 cmake/externals/libjpeg/jdtrans.c
 delete mode 100644 cmake/externals/libjpeg/jerror.c
 delete mode 100644 cmake/externals/libjpeg/jerror.h
 delete mode 100644 cmake/externals/libjpeg/jfdctflt.c
 delete mode 100644 cmake/externals/libjpeg/jfdctfst.c
 delete mode 100644 cmake/externals/libjpeg/jfdctint.c
 delete mode 100644 cmake/externals/libjpeg/jidctflt.c
 delete mode 100644 cmake/externals/libjpeg/jidctfst.c
 delete mode 100644 cmake/externals/libjpeg/jidctint.c
 delete mode 100644 cmake/externals/libjpeg/jinclude.h
 delete mode 100644 cmake/externals/libjpeg/jmemansi.c
 delete mode 100644 cmake/externals/libjpeg/jmemmgr.c
 delete mode 100644 cmake/externals/libjpeg/jmemnobs.c
 delete mode 100644 cmake/externals/libjpeg/jmemsys.h
 delete mode 100644 cmake/externals/libjpeg/jmorecfg.h
 delete mode 100644 cmake/externals/libjpeg/jpegint.h
 delete mode 100644 cmake/externals/libjpeg/jpeglib.h
 delete mode 100644 cmake/externals/libjpeg/jquant1.c
 delete mode 100644 cmake/externals/libjpeg/jquant2.c
 delete mode 100644 cmake/externals/libjpeg/jutils.c
 delete mode 100644 cmake/externals/libjpeg/jversion.h
 delete mode 100644 cmake/externals/libspng/CMakeLists.txt
 delete mode 100644 cmake/externals/libspng/LICENSE
 delete mode 100644 cmake/externals/libspng/spng.c
 delete mode 100644 cmake/externals/libspng/spng.h

diff --git a/cmake/externals/cvout/.gitkeep b/cmake/externals/cvout/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/cmake/externals/libjpeg/CMakeLists.txt b/cmake/externals/libjpeg/CMakeLists.txt
deleted file mode 100644
index 4e88164cc..000000000
--- a/cmake/externals/libjpeg/CMakeLists.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-# ----------------------------------------------------------------------------
-#  CMake file for libjpeg. See root CMakeLists.txt
-#
-# ----------------------------------------------------------------------------
-project(JPEG_LIBRARY)
-
-file(GLOB lib_srcs *.c)
-file(GLOB lib_hdrs *.h)
-
-# ----------------------------------------------------------------------------------
-#         Define the library target:
-# ----------------------------------------------------------------------------------
-
-add_library(JPEG_LIBRARY STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${lib_srcs} ${lib_hdrs})
-
-if(CV_GCC OR CV_CLANG)
-  set_source_files_properties(jcdctmgr.c PROPERTIES COMPILE_FLAGS "-O1")
-endif()
-
-set_target_properties(JPEG_LIBRARY
-  PROPERTIES OUTPUT_NAME JPEG_LIBRARY
-  DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
-  COMPILE_PDB_NAME JPEG_LIBRARY
-  COMPILE_PDB_NAME_DEBUG "JPEG_LIBRARY${OPENCV_DEBUG_POSTFIX}"
-  ARCHIVE_OUTPUT_DIRECTORY "${3P_LIBRARY_OUTPUT_PATH}"
-  )
-
-if(ENABLE_SOLUTION_FOLDERS)
-  set_target_properties(JPEG_LIBRARY PROPERTIES FOLDER "3rdparty")
-endif()
diff --git a/cmake/externals/libjpeg/README b/cmake/externals/libjpeg/README
deleted file mode 100644
index 13b245c5c..000000000
--- a/cmake/externals/libjpeg/README
+++ /dev/null
@@ -1,371 +0,0 @@
-The Independent JPEG Group's JPEG software
-==========================================
-
-README for release 9d of 12-Jan-2020
-====================================
-
-This distribution contains the ninth public release of the Independent JPEG
-Group's free JPEG software.  You are welcome to redistribute this software and
-to use it for any purpose, subject to the conditions under LEGAL ISSUES, below.
-
-This software is the work of Tom Lane, Guido Vollbeding, Philip Gladstone,
-Bill Allombert, Jim Boucher, Lee Crocker, Bob Friesenhahn, Ben Jackson,
-John Korejwa, Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi,
-Ge' Weijers, and other members of the Independent JPEG Group.
-
-IJG is not affiliated with the ISO/IEC JTC1/SC29/WG1 standards committee
-(previously known as JPEG, together with ITU-T SG16).
-
-
-DOCUMENTATION ROADMAP
-=====================
-
-This file contains the following sections:
-
-OVERVIEW            General description of JPEG and the IJG software.
-LEGAL ISSUES        Copyright, lack of warranty, terms of distribution.
-REFERENCES          Where to learn more about JPEG.
-ARCHIVE LOCATIONS   Where to find newer versions of this software.
-ACKNOWLEDGMENTS     Special thanks.
-FILE FORMAT WARS    Software *not* to get.
-TO DO               Plans for future IJG releases.
-
-Other documentation files in the distribution are:
-
-User documentation:
-  install.txt       How to configure and install the IJG software.
-  usage.txt         Usage instructions for cjpeg, djpeg, jpegtran,
-                    rdjpgcom, and wrjpgcom.
-  *.1               Unix-style man pages for programs (same info as usage.txt).
-  wizard.txt        Advanced usage instructions for JPEG wizards only.
-  change.log        Version-to-version change highlights.
-Programmer and internal documentation:
-  libjpeg.txt       How to use the JPEG library in your own programs.
-  example.c         Sample code for calling the JPEG library.
-  structure.txt     Overview of the JPEG library's internal structure.
-  filelist.txt      Road map of IJG files.
-  coderules.txt     Coding style rules --- please read if you contribute code.
-
-Please read at least the files install.txt and usage.txt.  Some information
-can also be found in the JPEG FAQ (Frequently Asked Questions) article.  See
-ARCHIVE LOCATIONS below to find out where to obtain the FAQ article.
-
-If you want to understand how the JPEG code works, we suggest reading one or
-more of the REFERENCES, then looking at the documentation files (in roughly
-the order listed) before diving into the code.
-
-
-OVERVIEW
-========
-
-This package contains C software to implement JPEG image encoding, decoding,
-and transcoding.  JPEG (pronounced "jay-peg") is a standardized compression
-method for full-color and grayscale images.
-
-This software implements JPEG baseline, extended-sequential, and progressive
-compression processes.  Provision is made for supporting all variants of these
-processes, although some uncommon parameter settings aren't implemented yet.
-We have made no provision for supporting the hierarchical or lossless
-processes defined in the standard.
-
-We provide a set of library routines for reading and writing JPEG image files,
-plus two sample applications "cjpeg" and "djpeg", which use the library to
-perform conversion between JPEG and some other popular image file formats.
-The library is intended to be reused in other applications.
-
-In order to support file conversion and viewing software, we have included
-considerable functionality beyond the bare JPEG coding/decoding capability;
-for example, the color quantization modules are not strictly part of JPEG
-decoding, but they are essential for output to colormapped file formats or
-colormapped displays.  These extra functions can be compiled out of the
-library if not required for a particular application.
-
-We have also included "jpegtran", a utility for lossless transcoding between
-different JPEG processes, and "rdjpgcom" and "wrjpgcom", two simple
-applications for inserting and extracting textual comments in JFIF files.
-
-The emphasis in designing this software has been on achieving portability and
-flexibility, while also making it fast enough to be useful.  In particular,
-the software is not intended to be read as a tutorial on JPEG.  (See the
-REFERENCES section for introductory material.)  Rather, it is intended to
-be reliable, portable, industrial-strength code.  We do not claim to have
-achieved that goal in every aspect of the software, but we strive for it.
-
-We welcome the use of this software as a component of commercial products.
-No royalty is required, but we do ask for an acknowledgement in product
-documentation, as described under LEGAL ISSUES.
-
-
-LEGAL ISSUES
-============
-
-In plain English:
-
-1. We don't promise that this software works.  (But if you find any bugs,
-   please let us know!)
-2. You can use this software for whatever you want.  You don't have to pay us.
-3. You may not pretend that you wrote this software.  If you use it in a
-   program, you must acknowledge somewhere in your documentation that
-   you've used the IJG code.
-
-In legalese:
-
-The authors make NO WARRANTY or representation, either express or implied,
-with respect to this software, its quality, accuracy, merchantability, or
-fitness for a particular purpose.  This software is provided "AS IS", and you,
-its user, assume the entire risk as to its quality and accuracy.
-
-This software is copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
-All Rights Reserved except as specified below.
-
-Permission is hereby granted to use, copy, modify, and distribute this
-software (or portions thereof) for any purpose, without fee, subject to these
-conditions:
-(1) If any part of the source code for this software is distributed, then this
-README file must be included, with this copyright and no-warranty notice
-unaltered; and any additions, deletions, or changes to the original files
-must be clearly indicated in accompanying documentation.
-(2) If only executable code is distributed, then the accompanying
-documentation must state that "this software is based in part on the work of
-the Independent JPEG Group".
-(3) Permission for use of this software is granted only if the user accepts
-full responsibility for any undesirable consequences; the authors accept
-NO LIABILITY for damages of any kind.
-
-These conditions apply to any software derived from or based on the IJG code,
-not just to the unmodified library.  If you use our work, you ought to
-acknowledge us.
-
-Permission is NOT granted for the use of any IJG author's name or company name
-in advertising or publicity relating to this software or products derived from
-it.  This software may be referred to only as "the Independent JPEG Group's
-software".
-
-We specifically permit and encourage the use of this software as the basis of
-commercial products, provided that all warranty or liability claims are
-assumed by the product vendor.
-
-
-The Unix configuration script "configure" was produced with GNU Autoconf.
-It is copyright by the Free Software Foundation but is freely distributable.
-The same holds for its supporting scripts (config.guess, config.sub,
-ltmain.sh).  Another support script, install-sh, is copyright by X Consortium
-but is also freely distributable.
-
-
-REFERENCES
-==========
-
-We recommend reading one or more of these references before trying to
-understand the innards of the JPEG software.
-
-The best short technical introduction to the JPEG compression algorithm is
-	Wallace, Gregory K.  "The JPEG Still Picture Compression Standard",
-	Communications of the ACM, April 1991 (vol. 34 no. 4), pp. 30-44.
-(Adjacent articles in that issue discuss MPEG motion picture compression,
-applications of JPEG, and related topics.)  If you don't have the CACM issue
-handy, a PDF file containing a revised version of Wallace's article is
-available at http://www.ijg.org/files/Wallace.JPEG.pdf.  The file (actually
-a preprint for an article that appeared in IEEE Trans. Consumer Electronics)
-omits the sample images that appeared in CACM, but it includes corrections
-and some added material.  Note: the Wallace article is copyright ACM and IEEE,
-and it may not be used for commercial purposes.
-
-A somewhat less technical, more leisurely introduction to JPEG can be found in
-"The Data Compression Book" by Mark Nelson and Jean-loup Gailly, published by
-M&T Books (New York), 2nd ed. 1996, ISBN 1-55851-434-1.  This book provides
-good explanations and example C code for a multitude of compression methods
-including JPEG.  It is an excellent source if you are comfortable reading C
-code but don't know much about data compression in general.  The book's JPEG
-sample code is far from industrial-strength, but when you are ready to look
-at a full implementation, you've got one here...
-
-The best currently available description of JPEG is the textbook "JPEG Still
-Image Data Compression Standard" by William B. Pennebaker and Joan L.
-Mitchell, published by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1.
-Price US$59.95, 638 pp.  The book includes the complete text of the ISO JPEG
-standards (DIS 10918-1 and draft DIS 10918-2).
-Although this is by far the most detailed and comprehensive exposition of
-JPEG publicly available, we point out that it is still missing an explanation
-of the most essential properties and algorithms of the underlying DCT
-technology.
-If you think that you know about DCT-based JPEG after reading this book,
-then you are in delusion.  The real fundamentals and corresponding potential
-of DCT-based JPEG are not publicly known so far, and that is the reason for
-all the mistaken developments taking place in the image coding domain.
-
-The original JPEG standard is divided into two parts, Part 1 being the actual
-specification, while Part 2 covers compliance testing methods.  Part 1 is
-titled "Digital Compression and Coding of Continuous-tone Still Images,
-Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS
-10918-1, ITU-T T.81.  Part 2 is titled "Digital Compression and Coding of
-Continuous-tone Still Images, Part 2: Compliance testing" and has document
-numbers ISO/IEC IS 10918-2, ITU-T T.83.
-IJG JPEG 8 introduced an implementation of the JPEG SmartScale extension
-which is specified in two documents:  A contributed document at ITU and ISO
-with title "ITU-T JPEG-Plus Proposal for Extending ITU-T T.81 for Advanced
-Image Coding", April 2006, Geneva, Switzerland.  The latest version of this
-document is Revision 3.  And a contributed document ISO/IEC JTC1/SC29/WG1 N
-5799 with title "Evolution of JPEG", June/July 2011, Berlin, Germany.
-IJG JPEG 9 introduces a reversible color transform for improved lossless
-compression which is described in a contributed document ISO/IEC JTC1/SC29/
-WG1 N 6080 with title "JPEG 9 Lossless Coding", June/July 2012, Paris,
-France.
-
-The JPEG standard does not specify all details of an interchangeable file
-format.  For the omitted details we follow the "JFIF" conventions, version 2.
-JFIF version 1 has been adopted as Recommendation ITU-T T.871 (05/2011) :
-Information technology - Digital compression and coding of continuous-tone
-still images: JPEG File Interchange Format (JFIF).  It is available as a
-free download in PDF file format from http://www.itu.int/rec/T-REC-T.871.
-A PDF file of the older JFIF document is available at
-http://www.w3.org/Graphics/JPEG/jfif3.pdf.
-
-The TIFF 6.0 file format specification can be obtained by FTP from
-ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz.  The JPEG incorporation scheme
-found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems.
-IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6).
-Instead, we recommend the JPEG design proposed by TIFF Technical Note #2
-(Compression tag 7).  Copies of this Note can be obtained from
-http://www.ijg.org/files/.  It is expected that the next revision
-of the TIFF spec will replace the 6.0 JPEG design with the Note's design.
-Although IJG's own code does not support TIFF/JPEG, the free libtiff library
-uses our library to implement TIFF/JPEG per the Note.
-
-
-ARCHIVE LOCATIONS
-=================
-
-The "official" archive site for this software is www.ijg.org.
-The most recent released version can always be found there in
-directory "files".  This particular version will be archived as
-http://www.ijg.org/files/jpegsrc.v9d.tar.gz, and in Windows-compatible
-"zip" archive format as http://www.ijg.org/files/jpegsr9d.zip.
-
-The JPEG FAQ (Frequently Asked Questions) article is a source of some
-general information about JPEG.
-It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/
-and other news.answers archive sites, including the official news.answers
-archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/.
-If you don't have Web or FTP access, send e-mail to mail-server@rtfm.mit.edu
-with body
-	send usenet/news.answers/jpeg-faq/part1
-	send usenet/news.answers/jpeg-faq/part2
-
-
-ACKNOWLEDGMENTS
-===============
-
-Thank to Juergen Bruder for providing me with a copy of the common DCT
-algorithm article, only to find out that I had come to the same result
-in a more direct and comprehensible way with a more generative approach.
-
-Thank to Istvan Sebestyen and Joan L. Mitchell for inviting me to the
-ITU JPEG (Study Group 16) meeting in Geneva, Switzerland.
-
-Thank to Thomas Wiegand and Gary Sullivan for inviting me to the
-Joint Video Team (MPEG & ITU) meeting in Geneva, Switzerland.
-
-Thank to Thomas Richter and Daniel Lee for inviting me to the
-ISO/IEC JTC1/SC29/WG1 (previously known as JPEG, together with ITU-T SG16)
-meeting in Berlin, Germany.
-
-Thank to John Korejwa and Massimo Ballerini for inviting me to
-fruitful consultations in Boston, MA and Milan, Italy.
-
-Thank to Hendrik Elstner, Roland Fassauer, Simone Zuck, Guenther
-Maier-Gerber, Walter Stoeber, Fred Schmitz, and Norbert Braunagel
-for corresponding business development.
-
-Thank to Nico Zschach and Dirk Stelling of the technical support team
-at the Digital Images company in Halle for providing me with extra
-equipment for configuration tests.
-
-Thank to Richard F. Lyon (then of Foveon Inc.) for fruitful
-communication about JPEG configuration in Sigma Photo Pro software.
-
-Thank to Andrew Finkenstadt for hosting the ijg.org site.
-
-Thank to Thomas G. Lane for the original design and development of
-this singular software package.
-
-Thank to Lars Goehler, Andreas Heinecke, Sebastian Fuss, Yvonne Roebert,
-Andrej Werner, and Ulf-Dietrich Braumann for support and public relations.
-
-
-FILE FORMAT WARS
-================
-
-The ISO/IEC JTC1/SC29/WG1 standards committee (previously known as JPEG,
-together with ITU-T SG16) currently promotes different formats containing
-the name "JPEG" which is misleading because these formats are incompatible
-with original DCT-based JPEG and are based on faulty technologies.
-IJG therefore does not and will not support such momentary mistakes
-(see REFERENCES).
-There exist also distributions under the name "OpenJPEG" promoting such
-kind of formats which is misleading because they don't support original
-JPEG images.
-We have no sympathy for the promotion of inferior formats.  Indeed, one of
-the original reasons for developing this free software was to help force
-convergence on common, interoperable format standards for JPEG files.
-Don't use an incompatible file format!
-(In any case, our decoder will remain capable of reading existing JPEG
-image files indefinitely.)
-
-The ISO committee pretends to be "responsible for the popular JPEG" in their
-public reports which is not true because they don't respond to actual
-requirements for the maintenance of the original JPEG specification.
-Furthermore, the ISO committee pretends to "ensure interoperability" with
-their standards which is not true because their "standards" support only
-application-specific and proprietary use cases and contain mathematically
-incorrect code.
-
-There are currently different distributions in circulation containing the
-name "libjpeg" which is misleading because they don't have the features and
-are incompatible with formats supported by actual IJG libjpeg distributions.
-One of those fakes is released by members of the ISO committee and just uses
-the name of libjpeg for misdirection of people, similar to the abuse of the
-name JPEG as described above, while having nothing in common with actual IJG
-libjpeg distributions and containing mathematically incorrect code.
-The other one claims to be a "derivative" or "fork" of the original libjpeg,
-but violates the license conditions as described under LEGAL ISSUES above
-and violates basic C programming properties.
-We have no sympathy for the release of misleading, incorrect and illegal
-distributions derived from obsolete code bases.
-Don't use an obsolete code base!
-
-According to the UCC (Uniform Commercial Code) law, IJG has the lawful and
-legal right to foreclose on certain standardization bodies and other
-institutions or corporations that knowingly perform substantial and
-systematic deceptive acts and practices, fraud, theft, and damaging of the
-value of the people of this planet without their knowing, willing and
-intentional consent.
-The titles, ownership, and rights of these institutions and all their assets
-are now duly secured and held in trust for the free people of this planet.
-People of the planet, on every country, may have a financial interest in
-the assets of these former principals, agents, and beneficiaries of the
-foreclosed institutions and corporations.
-IJG asserts what is: that each man, woman, and child has unalienable value
-and rights granted and deposited in them by the Creator and not any one of
-the people is subordinate to any artificial principality, corporate fiction
-or the special interest of another without their appropriate knowing,
-willing and intentional consent made by contract or accommodation agreement.
-IJG expresses that which already was.
-The people have already determined and demanded that public administration
-entities, national governments, and their supporting judicial systems must
-be fully transparent, accountable, and liable.
-IJG has secured the value for all concerned free people of the planet.
-
-A partial list of foreclosed institutions and corporations ("Hall of Shame")
-is currently prepared and will be published later.
-
-
-TO DO
-=====
-
-Version 9 is the second release of a new generation JPEG standard
-to overcome the limitations of the original JPEG specification,
-and is the first true source reference JPEG codec.
-More features are being prepared for coming releases...
-
-Please send bug reports, offers of help, etc. to jpeg-info@jpegclub.org.
diff --git a/cmake/externals/libjpeg/change.log b/cmake/externals/libjpeg/change.log
deleted file mode 100644
index ed8ec6afc..000000000
--- a/cmake/externals/libjpeg/change.log
+++ /dev/null
@@ -1,502 +0,0 @@
-CHANGE LOG for Independent JPEG Group's JPEG software
-
-
-Version 9d  12-Jan-2020
------------------------
-
-Optimize the optimal Huffman code table generation to produce
-slightly smaller files.  Thank to John Korejwa for suggestion.
-Note: Requires rebuild of testimgp.jpg.
-
-Decoding Huffman: Use default tables if tables are not defined.
-Thank to Simone Azzalin for report (Motion JPEG),
-and to Martin Strunz for hint.
-
-Add sanity check in optimal Huffman code table generation.
-Thank to Adam Farley for suggestion.
-
-rdtarga.c: use read_byte(), with EOF check, instead of getc()
-in read_*_pixel().
-Thank to Chijin Zhou for cjpeg potential vulnerability report.
-
-jmemnobs.c: respect the max_memory_to_use setting in
-jpeg_mem_available() computation.  Thank to Sheng Shu and
-Dongdong She for djpeg potential vulnerability report.
-
-jdarith.c, jdhuff.c: avoid left shift of negative value
-compiler warning in decode_mcu_AC_refine().
-Thank to Indu Bhagat for suggestion.
-
-Add x64 (64-bit) platform support, avoid compiler warnings.
-Thank to Jonathan Potter, Feiyun Wang, and Sheng Shu for suggestion.
-
-Adjust libjpeg version specification for pkg-config file.
-Thank to Chen Chen for suggestion.
-
-Restore GIF read and write support from libjpeg version 6a.
-Thank to Wolfgang Werner (W.W.) Heinz for suggestion.
-
-Improve consistency in raw (downsampled) image data processing mode.
-Thank to Zhongyuan Zhou for hint.
-
-Avoid out of bounds array read (AC derived table pointers)
-in start pass in jdhuff.c.  Thank to Peng Li for report.
-
-Improve code sanity (jdhuff.c).
-Thank to Reza Mirzazade farkhani for reports.
-
-Add jpegtran -drop option; add options to the crop extension and wipe
-to fill the extra area with content from the source image region,
-instead of gray out.
-
-
-Version 9c  14-Jan-2018
------------------------
-
-jpegtran: add an option to the -wipe switch to fill the region
-with the average of adjacent blocks, instead of gray out.
-Thank to Caitlyn Feddock and Maddie Ziegler for inspiration.
-
-Make range extension bits adjustable (in jpegint.h).
-Thank to Robin Watts for suggestion.
-
-Provide macros for fflush() and ferror() in jinclude.h in order
-to facilitate adaption by applications using an own FILE class.
-Thank to Gerhard Huber for suggestion.
-
-Add libjpeg pkg-config file.  Thank to Mark Lavi, Vincent Torri,
-Patrick McMunn, and Huw Davies for suggestion.
-
-Add sanity checks in cjpeg image reader modules.
-Thank to Bingchang, Liu for reports.
-
-
-Version 9b  17-Jan-2016
------------------------
-
-Improvements and optimizations in DCT and color calculations.
-Normalize range limit array composition and access pattern.
-Thank to Sia Furler and Maddie Ziegler for inspiration.
-
-Use merged upsample with scaled DCT sizes larger than 8.
-Thank to Taylor Hatala for inspiration.
-
-Check for excessive comment lengths in argument parsing in wrjpgcom.c.
-Thank to Julian Cohen for hint.
-
-Add makefile.b32 for use with Borland C++ 32-bit (bcc32).
-Thank to Joe Slater for contribution.
-
-Document 'f' specifier for jpegtran -crop specification.
-Thank to Michele Martone for suggestion.
-
-Use defined value from header instead of hardwired number in rdswitch.c.
-Thank to Robert Sprowson for hint.
-
-
-Version 9a  19-Jan-2014
------------------------
-
-Add support for wide gamut color spaces (JFIF version 2).
-Improve clarity and accuracy in color conversion modules.
-Note: Requires rebuild of test images.
-
-Extend the bit depth support to all values from 8 to 12
-(BITS_IN_JSAMPLE configuration option in jmorecfg.h).
-jpegtran now supports N bits sample data precision with all N from 8 to 12
-in a single instance.  Thank to Roland Fassauer for inspiration.
-
-Try to resolve issues with new boolean type definition.
-Thank also to v4hn for suggestion.
-
-Enable option to use default Huffman tables for lossless compression
-(for hardware solution), and in this case improve lossless RGB compression
-with reversible color transform.  Thank to Benny Alexandar for hint.
-
-Extend the entropy decoding structure, so that extraneous bytes between
-compressed scan data and following marker can be reported correctly.
-Thank to Nigel Tao for hint.
-
-Add jpegtran -wipe option and extension for -crop.
-Thank to Andrew Senior, David Clunie, and Josef Schmid for suggestion.
-
-
-Version 9  13-Jan-2013
-----------------------
-
-Add cjpeg -rgb1 option to create an RGB JPEG file, and insert
-a simple reversible color transform into the processing which
-significantly improves the compression.
-The recommended command for lossless coding of RGB images is now
-cjpeg -rgb1 -block 1 -arithmetic.
-As said, this option improves the compression significantly, but
-the files are not compatible with JPEG decoders prior to IJG v9
-due to the included color transform.
-The used color transform and marker signaling is compatible with
-other JPEG standards (e.g., JPEG-LS part 2).
-
-Remove the automatic de-ANSI-fication support (Automake 1.12).
-Thank also to Nitin A Kamble for suggestion.
-
-Add remark for jpeg_mem_dest() in jdatadst.c.
-Thank to Elie-Gregoire Khoury for the hint.
-
-Support files with invalid component identifiers (created
-by Adobe PDF).  Thank to Robin Watts for the suggestion.
-
-Adapt full buffer case in jcmainct.c for use with scaled DCT.
-Thank to Sergii Biloshytskyi for the suggestion.
-
-Add type identifier for declaration of noreturn functions.
-Thank to Brett L. Moore for the suggestion.
-
-Correct argument type in format string, avoid compiler warnings.
-Thank to Vincent Torri for hint.
-
-Add missing #include directives in configuration checks, avoid
-configuration errors.  Thank to John Spencer for the hint.
-
-
-Version 8d  15-Jan-2012
------------------------
-
-Add cjpeg -rgb option to create RGB JPEG files.
-Using this switch suppresses the conversion from RGB
-colorspace input to the default YCbCr JPEG colorspace.
-This feature allows true lossless JPEG coding of RGB color images.
-The recommended command for this purpose is currently
-cjpeg -rgb -block 1 -arithmetic.
-SmartScale capable decoder (introduced with IJG JPEG 8) required.
-Thank to Michael Koch for the initial suggestion.
-
-Add option to disable the region adjustment in the transupp crop code.
-Thank to Jeffrey Friedl for the suggestion.
-
-Thank to Richard Jones and Edd Dawson for various minor corrections.
-
-Thank to Akim Demaille for configure.ac cleanup.
-
-
-Version 8c  16-Jan-2011
------------------------
-
-Add option to compression library and cjpeg (-block N) to use
-different DCT block size.
-All N from 1 to 16 are possible.  Default is 8 (baseline format).
-Larger values produce higher compression,
-smaller values produce higher quality.
-SmartScale capable decoder (introduced with IJG JPEG 8) required.
-
-
-Version 8b  16-May-2010
------------------------
-
-Repair problem in new memory source manager with corrupt JPEG data.
-Thank to Ted Campbell and Samuel Chun for the report.
-
-Repair problem in Makefile.am test target.
-Thank to anonymous user for the report.
-
-Support MinGW installation with automatic configure.
-Thank to Volker Grabsch for the suggestion.
-
-
-Version 8a  28-Feb-2010
------------------------
-
-Writing tables-only datastreams via jpeg_write_tables works again.
-
-Support 32-bit BMPs (RGB image with Alpha channel) for read in cjpeg.
-Thank to Brett Blackham for the suggestion.
-
-Improve accuracy in floating point IDCT calculation.
-Thank to Robert Hooke for the hint.
-
-
-Version 8  10-Jan-2010
-----------------------
-
-jpegtran now supports the same -scale option as djpeg for "lossless" resize.
-An implementation of the JPEG SmartScale extension is required for this
-feature.  A (draft) specification of the JPEG SmartScale extension is
-available as a contributed document at ITU and ISO.  Revision 2 or later
-of the document is required (latest document version is Revision 3).
-The SmartScale extension will enable more features beside lossless resize
-in future implementations, as described in the document (new compression
-options).
-
-Add sanity check in BMP reader module to avoid cjpeg crash for empty input
-image (thank to Isaev Ildar of ISP RAS, Moscow, RU for reporting this error).
-
-Add data source and destination managers for read from and write to
-memory buffers.  New API functions jpeg_mem_src and jpeg_mem_dest.
-Thank to Roberto Boni from Italy for the suggestion.
-
-
-Version 7  27-Jun-2009
-----------------------
-
-New scaled DCTs implemented.
-djpeg now supports scalings N/8 with all N from 1 to 16.
-cjpeg now supports scalings 8/N with all N from 1 to 16.
-Scaled DCTs with size larger than 8 are now also used for resolving the
-common 2x2 chroma subsampling case without additional spatial resampling.
-Separate spatial resampling for those kind of files is now only necessary
-for N>8 scaling cases.
-Furthermore, separate scaled DCT functions are provided for direct resolving
-of the common asymmetric subsampling cases (2x1 and 1x2) without additional
-spatial resampling.
-
-cjpeg -quality option has been extended for support of separate quality
-settings for luminance and chrominance (or in general, for every provided
-quantization table slot).
-New API function jpeg_default_qtables() and q_scale_factor array in library.
-
-Added -nosmooth option to cjpeg, complementary to djpeg.
-New variable "do_fancy_downsampling" in library, complement to fancy
-upsampling.  Fancy upsampling now uses direct DCT scaling with sizes
-larger than 8.  The old method is not reversible and has been removed.
-
-Support arithmetic entropy encoding and decoding.
-Added files jaricom.c, jcarith.c, jdarith.c.
-
-Straighten the file structure:
-Removed files jidctred.c, jcphuff.c, jchuff.h, jdphuff.c, jdhuff.h.
-
-jpegtran has a new "lossless" cropping feature.
-
-Implement -perfect option in jpegtran, new API function
-jtransform_perfect_transform() in transupp. (DP 204_perfect.dpatch)
-
-Better error messages for jpegtran fopen failure.
-(DP 203_jpegtran_errmsg.dpatch)
-
-Fix byte order issue with 16bit PPM/PGM files in rdppm.c/wrppm.c:
-according to Netpbm, the de facto standard implementation of the PNM formats,
-the most significant byte is first. (DP 203_rdppm.dpatch)
-
-Add -raw option to rdjpgcom not to mangle the output.
-(DP 205_rdjpgcom_raw.dpatch)
-
-Make rdjpgcom locale aware. (DP 201_rdjpgcom_locale.dpatch)
-
-Add extern "C" to jpeglib.h.
-This avoids the need to put extern "C" { ... } around #include "jpeglib.h"
-in your C++ application.  Defining the symbol DONT_USE_EXTERN_C in the
-configuration prevents this. (DP 202_jpeglib.h_c++.dpatch)
-
-
-Version 6b  27-Mar-1998
------------------------
-
-jpegtran has new features for lossless image transformations (rotation
-and flipping) as well as "lossless" reduction to grayscale.
-
-jpegtran now copies comments by default; it has a -copy switch to enable
-copying all APPn blocks as well, or to suppress comments.  (Formerly it
-always suppressed comments and APPn blocks.)  jpegtran now also preserves
-JFIF version and resolution information.
-
-New decompressor library feature: COM and APPn markers found in the input
-file can be saved in memory for later use by the application.  (Before,
-you had to code this up yourself with a custom marker processor.)
-
-There is an unused field "void * client_data" now in compress and decompress
-parameter structs; this may be useful in some applications.
-
-JFIF version number information is now saved by the decoder and accepted by
-the encoder.  jpegtran uses this to copy the source file's version number,
-to ensure "jpegtran -copy all" won't create bogus files that contain JFXX
-extensions but claim to be version 1.01.  Applications that generate their
-own JFXX extension markers also (finally) have a supported way to cause the
-encoder to emit JFIF version number 1.02.
-
-djpeg's trace mode reports JFIF 1.02 thumbnail images as such, rather
-than as unknown APP0 markers.
-
-In -verbose mode, djpeg and rdjpgcom will try to print the contents of
-APP12 markers as text.  Some digital cameras store useful text information
-in APP12 markers.
-
-Handling of truncated data streams is more robust: blocks beyond the one in
-which the error occurs will be output as uniform gray, or left unchanged
-if decoding a progressive JPEG.  The appearance no longer depends on the
-Huffman tables being used.
-
-Huffman tables are checked for validity much more carefully than before.
-
-To avoid the Unisys LZW patent, djpeg's GIF output capability has been
-changed to produce "uncompressed GIFs", and cjpeg's GIF input capability
-has been removed altogether.  We're not happy about it either, but there
-seems to be no good alternative.
-
-The configure script now supports building libjpeg as a shared library
-on many flavors of Unix (all the ones that GNU libtool knows how to
-build shared libraries for).  Use "./configure --enable-shared" to
-try this out.
-
-New jconfig file and makefiles for Microsoft Visual C++ and Developer Studio.
-Also, a jconfig file and a build script for Metrowerks CodeWarrior
-on Apple Macintosh.  makefile.dj has been updated for DJGPP v2, and there
-are miscellaneous other minor improvements in the makefiles.
-
-jmemmac.c now knows how to create temporary files following Mac System 7
-conventions.
-
-djpeg's -map switch is now able to read raw-format PPM files reliably.
-
-cjpeg -progressive -restart no longer generates any unnecessary DRI markers.
-
-Multiple calls to jpeg_simple_progression for a single JPEG object
-no longer leak memory.
-
-
-Version 6a  7-Feb-96
---------------------
-
-Library initialization sequence modified to detect version mismatches
-and struct field packing mismatches between library and calling application.
-This change requires applications to be recompiled, but does not require
-any application source code change.
-
-All routine declarations changed to the style "GLOBAL(type) name ...",
-that is, GLOBAL, LOCAL, METHODDEF, EXTERN are now macros taking the
-routine's return type as an argument.  This makes it possible to add
-Microsoft-style linkage keywords to all the routines by changing just
-these macros.  Note that any application code that was using these macros
-will have to be changed.
-
-DCT coefficient quantization tables are now stored in normal array order
-rather than zigzag order.  Application code that calls jpeg_add_quant_table,
-or otherwise manipulates quantization tables directly, will need to be
-changed.  If you need to make such code work with either older or newer
-versions of the library, a test like "#if JPEG_LIB_VERSION >= 61" is
-recommended.
-
-djpeg's trace capability now dumps DQT tables in natural order, not zigzag
-order.  This allows the trace output to be made into a "-qtables" file
-more easily.
-
-New system-dependent memory manager module for use on Apple Macintosh.
-
-Fix bug in cjpeg's -smooth option: last one or two scanlines would be
-duplicates of the prior line unless the image height mod 16 was 1 or 2.
-
-Repair minor problems in VMS, BCC, MC6 makefiles.
-
-New configure script based on latest GNU Autoconf.
-
-Correct the list of include files needed by MetroWerks C for ccommand().
-
-Numerous small documentation updates.
-
-
-Version 6  2-Aug-95
--------------------
-
-Progressive JPEG support: library can read and write full progressive JPEG
-files.  A "buffered image" mode supports incremental decoding for on-the-fly
-display of progressive images.  Simply recompiling an existing IJG-v5-based
-decoder with v6 should allow it to read progressive files, though of course
-without any special progressive display.
-
-New "jpegtran" application performs lossless transcoding between different
-JPEG formats; primarily, it can be used to convert baseline to progressive
-JPEG and vice versa.  In support of jpegtran, the library now allows lossless
-reading and writing of JPEG files as DCT coefficient arrays.  This ability
-may be of use in other applications.
-
-Notes for programmers:
-* We changed jpeg_start_decompress() to be able to suspend; this makes all
-decoding modes available to suspending-input applications.  However,
-existing applications that use suspending input will need to be changed
-to check the return value from jpeg_start_decompress().  You don't need to
-do anything if you don't use a suspending data source.
-* We changed the interface to the virtual array routines: access_virt_array
-routines now take a count of the number of rows to access this time.  The
-last parameter to request_virt_array routines is now interpreted as the
-maximum number of rows that may be accessed at once, but not necessarily
-the height of every access.
-
-
-Version 5b  15-Mar-95
----------------------
-
-Correct bugs with grayscale images having v_samp_factor > 1.
-
-jpeg_write_raw_data() now supports output suspension.
-
-Correct bugs in "configure" script for case of compiling in
-a directory other than the one containing the source files.
-
-Repair bug in jquant1.c: sometimes didn't use as many colors as it could.
-
-Borland C makefile and jconfig file work under either MS-DOS or OS/2.
-
-Miscellaneous improvements to documentation.
-
-
-Version 5a  7-Dec-94
---------------------
-
-Changed color conversion roundoff behavior so that grayscale values are
-represented exactly.  (This causes test image files to change.)
-
-Make ordered dither use 16x16 instead of 4x4 pattern for a small quality
-improvement.
-
-New configure script based on latest GNU Autoconf.
-Fix configure script to handle CFLAGS correctly.
-Rename *.auto files to *.cfg, so that configure script still works if
-file names have been truncated for DOS.
-
-Fix bug in rdbmp.c: didn't allow for extra data between header and image.
-
-Modify rdppm.c/wrppm.c to handle 2-byte raw PPM/PGM formats for 12-bit data.
-
-Fix several bugs in rdrle.c.
-
-NEED_SHORT_EXTERNAL_NAMES option was broken.
-
-Revise jerror.h/jerror.c for more flexibility in message table.
-
-Repair oversight in jmemname.c NO_MKTEMP case: file could be there
-but unreadable.
-
-
-Version 5  24-Sep-94
---------------------
-
-Version 5 represents a nearly complete redesign and rewrite of the IJG
-software.  Major user-visible changes include:
-  * Automatic configuration simplifies installation for most Unix systems.
-  * A range of speed vs. image quality tradeoffs are supported.
-    This includes resizing of an image during decompression: scaling down
-    by a factor of 1/2, 1/4, or 1/8 is handled very efficiently.
-  * New programs rdjpgcom and wrjpgcom allow insertion and extraction
-    of text comments in a JPEG file.
-
-The application programmer's interface to the library has changed completely.
-Notable improvements include:
-  * We have eliminated the use of callback routines for handling the
-    uncompressed image data.  The application now sees the library as a
-    set of routines that it calls to read or write image data on a
-    scanline-by-scanline basis.
-  * The application image data is represented in a conventional interleaved-
-    pixel format, rather than as a separate array for each color channel.
-    This can save a copying step in many programs.
-  * The handling of compressed data has been cleaned up: the application can
-    supply routines to source or sink the compressed data.  It is possible to
-    suspend processing on source/sink buffer overrun, although this is not
-    supported in all operating modes.
-  * All static state has been eliminated from the library, so that multiple
-    instances of compression or decompression can be active concurrently.
-  * JPEG abbreviated datastream formats are supported, ie, quantization and
-    Huffman tables can be stored separately from the image data.
-  * And not only that, but the documentation of the library has improved
-    considerably!
-
-
-The last widely used release before the version 5 rewrite was version 4A of
-18-Feb-93.  Change logs before that point have been discarded, since they
-are not of much interest after the rewrite.
diff --git a/cmake/externals/libjpeg/jaricom.c b/cmake/externals/libjpeg/jaricom.c
deleted file mode 100644
index 690068861..000000000
--- a/cmake/externals/libjpeg/jaricom.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * jaricom.c
- *
- * Developed 1997-2011 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains probability estimation tables for common use in
- * arithmetic entropy encoding and decoding routines.
- *
- * This data represents Table D.3 in the JPEG spec (D.2 in the draft),
- * ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81, and Table 24
- * in the JBIG spec, ISO/IEC IS 11544 and CCITT Recommendation ITU-T T.82.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-/* The following #define specifies the packing of the four components
- * into the compact INT32 representation.
- * Note that this formula must match the actual arithmetic encoder
- * and decoder implementation.  The implementation has to be changed
- * if this formula is changed.
- * The current organization is leaned on Markus Kuhn's JBIG
- * implementation (jbig_tab.c).
- */
-
-#define V(i,a,b,c,d) (((INT32)a << 16) | ((INT32)c << 8) | ((INT32)d << 7) | b)
-
-const INT32 jpeg_aritab[113+1] = {
-/*
- * Index, Qe_Value, Next_Index_LPS, Next_Index_MPS, Switch_MPS
- */
-  V(   0, 0x5a1d,   1,   1, 1 ),
-  V(   1, 0x2586,  14,   2, 0 ),
-  V(   2, 0x1114,  16,   3, 0 ),
-  V(   3, 0x080b,  18,   4, 0 ),
-  V(   4, 0x03d8,  20,   5, 0 ),
-  V(   5, 0x01da,  23,   6, 0 ),
-  V(   6, 0x00e5,  25,   7, 0 ),
-  V(   7, 0x006f,  28,   8, 0 ),
-  V(   8, 0x0036,  30,   9, 0 ),
-  V(   9, 0x001a,  33,  10, 0 ),
-  V(  10, 0x000d,  35,  11, 0 ),
-  V(  11, 0x0006,   9,  12, 0 ),
-  V(  12, 0x0003,  10,  13, 0 ),
-  V(  13, 0x0001,  12,  13, 0 ),
-  V(  14, 0x5a7f,  15,  15, 1 ),
-  V(  15, 0x3f25,  36,  16, 0 ),
-  V(  16, 0x2cf2,  38,  17, 0 ),
-  V(  17, 0x207c,  39,  18, 0 ),
-  V(  18, 0x17b9,  40,  19, 0 ),
-  V(  19, 0x1182,  42,  20, 0 ),
-  V(  20, 0x0cef,  43,  21, 0 ),
-  V(  21, 0x09a1,  45,  22, 0 ),
-  V(  22, 0x072f,  46,  23, 0 ),
-  V(  23, 0x055c,  48,  24, 0 ),
-  V(  24, 0x0406,  49,  25, 0 ),
-  V(  25, 0x0303,  51,  26, 0 ),
-  V(  26, 0x0240,  52,  27, 0 ),
-  V(  27, 0x01b1,  54,  28, 0 ),
-  V(  28, 0x0144,  56,  29, 0 ),
-  V(  29, 0x00f5,  57,  30, 0 ),
-  V(  30, 0x00b7,  59,  31, 0 ),
-  V(  31, 0x008a,  60,  32, 0 ),
-  V(  32, 0x0068,  62,  33, 0 ),
-  V(  33, 0x004e,  63,  34, 0 ),
-  V(  34, 0x003b,  32,  35, 0 ),
-  V(  35, 0x002c,  33,   9, 0 ),
-  V(  36, 0x5ae1,  37,  37, 1 ),
-  V(  37, 0x484c,  64,  38, 0 ),
-  V(  38, 0x3a0d,  65,  39, 0 ),
-  V(  39, 0x2ef1,  67,  40, 0 ),
-  V(  40, 0x261f,  68,  41, 0 ),
-  V(  41, 0x1f33,  69,  42, 0 ),
-  V(  42, 0x19a8,  70,  43, 0 ),
-  V(  43, 0x1518,  72,  44, 0 ),
-  V(  44, 0x1177,  73,  45, 0 ),
-  V(  45, 0x0e74,  74,  46, 0 ),
-  V(  46, 0x0bfb,  75,  47, 0 ),
-  V(  47, 0x09f8,  77,  48, 0 ),
-  V(  48, 0x0861,  78,  49, 0 ),
-  V(  49, 0x0706,  79,  50, 0 ),
-  V(  50, 0x05cd,  48,  51, 0 ),
-  V(  51, 0x04de,  50,  52, 0 ),
-  V(  52, 0x040f,  50,  53, 0 ),
-  V(  53, 0x0363,  51,  54, 0 ),
-  V(  54, 0x02d4,  52,  55, 0 ),
-  V(  55, 0x025c,  53,  56, 0 ),
-  V(  56, 0x01f8,  54,  57, 0 ),
-  V(  57, 0x01a4,  55,  58, 0 ),
-  V(  58, 0x0160,  56,  59, 0 ),
-  V(  59, 0x0125,  57,  60, 0 ),
-  V(  60, 0x00f6,  58,  61, 0 ),
-  V(  61, 0x00cb,  59,  62, 0 ),
-  V(  62, 0x00ab,  61,  63, 0 ),
-  V(  63, 0x008f,  61,  32, 0 ),
-  V(  64, 0x5b12,  65,  65, 1 ),
-  V(  65, 0x4d04,  80,  66, 0 ),
-  V(  66, 0x412c,  81,  67, 0 ),
-  V(  67, 0x37d8,  82,  68, 0 ),
-  V(  68, 0x2fe8,  83,  69, 0 ),
-  V(  69, 0x293c,  84,  70, 0 ),
-  V(  70, 0x2379,  86,  71, 0 ),
-  V(  71, 0x1edf,  87,  72, 0 ),
-  V(  72, 0x1aa9,  87,  73, 0 ),
-  V(  73, 0x174e,  72,  74, 0 ),
-  V(  74, 0x1424,  72,  75, 0 ),
-  V(  75, 0x119c,  74,  76, 0 ),
-  V(  76, 0x0f6b,  74,  77, 0 ),
-  V(  77, 0x0d51,  75,  78, 0 ),
-  V(  78, 0x0bb6,  77,  79, 0 ),
-  V(  79, 0x0a40,  77,  48, 0 ),
-  V(  80, 0x5832,  80,  81, 1 ),
-  V(  81, 0x4d1c,  88,  82, 0 ),
-  V(  82, 0x438e,  89,  83, 0 ),
-  V(  83, 0x3bdd,  90,  84, 0 ),
-  V(  84, 0x34ee,  91,  85, 0 ),
-  V(  85, 0x2eae,  92,  86, 0 ),
-  V(  86, 0x299a,  93,  87, 0 ),
-  V(  87, 0x2516,  86,  71, 0 ),
-  V(  88, 0x5570,  88,  89, 1 ),
-  V(  89, 0x4ca9,  95,  90, 0 ),
-  V(  90, 0x44d9,  96,  91, 0 ),
-  V(  91, 0x3e22,  97,  92, 0 ),
-  V(  92, 0x3824,  99,  93, 0 ),
-  V(  93, 0x32b4,  99,  94, 0 ),
-  V(  94, 0x2e17,  93,  86, 0 ),
-  V(  95, 0x56a8,  95,  96, 1 ),
-  V(  96, 0x4f46, 101,  97, 0 ),
-  V(  97, 0x47e5, 102,  98, 0 ),
-  V(  98, 0x41cf, 103,  99, 0 ),
-  V(  99, 0x3c3d, 104, 100, 0 ),
-  V( 100, 0x375e,  99,  93, 0 ),
-  V( 101, 0x5231, 105, 102, 0 ),
-  V( 102, 0x4c0f, 106, 103, 0 ),
-  V( 103, 0x4639, 107, 104, 0 ),
-  V( 104, 0x415e, 103,  99, 0 ),
-  V( 105, 0x5627, 105, 106, 1 ),
-  V( 106, 0x50e7, 108, 107, 0 ),
-  V( 107, 0x4b85, 109, 103, 0 ),
-  V( 108, 0x5597, 110, 109, 0 ),
-  V( 109, 0x504f, 111, 107, 0 ),
-  V( 110, 0x5a10, 110, 111, 1 ),
-  V( 111, 0x5522, 112, 109, 0 ),
-  V( 112, 0x59eb, 112, 111, 1 ),
-/*
- * This last entry is used for fixed probability estimate of 0.5
- * as suggested in Section 10.3 Table 5 of ITU-T Rec. T.851.
- */
-  V( 113, 0x5a1d, 113, 113, 0 )
-};
diff --git a/cmake/externals/libjpeg/jcapimin.c b/cmake/externals/libjpeg/jcapimin.c
deleted file mode 100644
index 639ce86f4..000000000
--- a/cmake/externals/libjpeg/jcapimin.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * jcapimin.c
- *
- * Copyright (C) 1994-1998, Thomas G. Lane.
- * Modified 2003-2010 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains application interface code for the compression half
- * of the JPEG library.  These are the "minimum" API routines that may be
- * needed in either the normal full-compression case or the transcoding-only
- * case.
- *
- * Most of the routines intended to be called directly by an application
- * are in this file or in jcapistd.c.  But also see jcparam.c for
- * parameter-setup helper routines, jcomapi.c for routines shared by
- * compression and decompression, and jctrans.c for the transcoding case.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Initialization of a JPEG compression object.
- * The error manager must already be set up (in case memory manager fails).
- */
-
-GLOBAL(void)
-jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize)
-{
-  int i;
-
-  /* Guard against version mismatches between library and caller. */
-  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
-  if (version != JPEG_LIB_VERSION)
-    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
-  if (structsize != SIZEOF(struct jpeg_compress_struct))
-    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
-	     (int) SIZEOF(struct jpeg_compress_struct), (int) structsize);
-
-  /* For debugging purposes, we zero the whole master structure.
-   * But the application has already set the err pointer, and may have set
-   * client_data, so we have to save and restore those fields.
-   * Note: if application hasn't set client_data, tools like Purify may
-   * complain here.
-   */
-  {
-    struct jpeg_error_mgr * err = cinfo->err;
-    void * client_data = cinfo->client_data; /* ignore Purify complaint here */
-    MEMZERO(cinfo, SIZEOF(struct jpeg_compress_struct));
-    cinfo->err = err;
-    cinfo->client_data = client_data;
-  }
-  cinfo->is_decompressor = FALSE;
-
-  /* Initialize a memory manager instance for this object */
-  jinit_memory_mgr((j_common_ptr) cinfo);
-
-  /* Zero out pointers to permanent structures. */
-  cinfo->progress = NULL;
-  cinfo->dest = NULL;
-
-  cinfo->comp_info = NULL;
-
-  for (i = 0; i < NUM_QUANT_TBLS; i++) {
-    cinfo->quant_tbl_ptrs[i] = NULL;
-    cinfo->q_scale_factor[i] = 100;
-  }
-
-  for (i = 0; i < NUM_HUFF_TBLS; i++) {
-    cinfo->dc_huff_tbl_ptrs[i] = NULL;
-    cinfo->ac_huff_tbl_ptrs[i] = NULL;
-  }
-
-  /* Must do it here for emit_dqt in case jpeg_write_tables is used */
-  cinfo->block_size = DCTSIZE;
-  cinfo->natural_order = jpeg_natural_order;
-  cinfo->lim_Se = DCTSIZE2-1;
-
-  cinfo->script_space = NULL;
-
-  cinfo->input_gamma = 1.0;	/* in case application forgets */
-
-  /* OK, I'm ready */
-  cinfo->global_state = CSTATE_START;
-}
-
-
-/*
- * Destruction of a JPEG compression object
- */
-
-GLOBAL(void)
-jpeg_destroy_compress (j_compress_ptr cinfo)
-{
-  jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
-}
-
-
-/*
- * Abort processing of a JPEG compression operation,
- * but don't destroy the object itself.
- */
-
-GLOBAL(void)
-jpeg_abort_compress (j_compress_ptr cinfo)
-{
-  jpeg_abort((j_common_ptr) cinfo); /* use common routine */
-}
-
-
-/*
- * Forcibly suppress or un-suppress all quantization and Huffman tables.
- * Marks all currently defined tables as already written (if suppress)
- * or not written (if !suppress).  This will control whether they get emitted
- * by a subsequent jpeg_start_compress call.
- *
- * This routine is exported for use by applications that want to produce
- * abbreviated JPEG datastreams.  It logically belongs in jcparam.c, but
- * since it is called by jpeg_start_compress, we put it here --- otherwise
- * jcparam.o would be linked whether the application used it or not.
- */
-
-GLOBAL(void)
-jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress)
-{
-  int i;
-  JQUANT_TBL * qtbl;
-  JHUFF_TBL * htbl;
-
-  for (i = 0; i < NUM_QUANT_TBLS; i++) {
-    if ((qtbl = cinfo->quant_tbl_ptrs[i]) != NULL)
-      qtbl->sent_table = suppress;
-  }
-
-  for (i = 0; i < NUM_HUFF_TBLS; i++) {
-    if ((htbl = cinfo->dc_huff_tbl_ptrs[i]) != NULL)
-      htbl->sent_table = suppress;
-    if ((htbl = cinfo->ac_huff_tbl_ptrs[i]) != NULL)
-      htbl->sent_table = suppress;
-  }
-}
-
-
-/*
- * Finish JPEG compression.
- *
- * If a multipass operating mode was selected, this may do a great deal of
- * work including most of the actual output.
- */
-
-GLOBAL(void)
-jpeg_finish_compress (j_compress_ptr cinfo)
-{
-  JDIMENSION iMCU_row;
-
-  if (cinfo->global_state == CSTATE_SCANNING ||
-      cinfo->global_state == CSTATE_RAW_OK) {
-    /* Terminate first pass */
-    if (cinfo->next_scanline < cinfo->image_height)
-      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
-    (*cinfo->master->finish_pass) (cinfo);
-  } else if (cinfo->global_state != CSTATE_WRCOEFS)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  /* Perform any remaining passes */
-  while (! cinfo->master->is_last_pass) {
-    (*cinfo->master->prepare_for_pass) (cinfo);
-    for (iMCU_row = 0; iMCU_row < cinfo->total_iMCU_rows; iMCU_row++) {
-      if (cinfo->progress != NULL) {
-	cinfo->progress->pass_counter = (long) iMCU_row;
-	cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows;
-	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-      }
-      /* We bypass the main controller and invoke coef controller directly;
-       * all work is being done from the coefficient buffer.
-       */
-      if (! (*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE) NULL))
-	ERREXIT(cinfo, JERR_CANT_SUSPEND);
-    }
-    (*cinfo->master->finish_pass) (cinfo);
-  }
-  /* Write EOI, do final cleanup */
-  (*cinfo->marker->write_file_trailer) (cinfo);
-  (*cinfo->dest->term_destination) (cinfo);
-  /* We can use jpeg_abort to release memory and reset global_state */
-  jpeg_abort((j_common_ptr) cinfo);
-}
-
-
-/*
- * Write a special marker.
- * This is only recommended for writing COM or APPn markers.
- * Must be called after jpeg_start_compress() and before
- * first call to jpeg_write_scanlines() or jpeg_write_raw_data().
- */
-
-GLOBAL(void)
-jpeg_write_marker (j_compress_ptr cinfo, int marker,
-		   const JOCTET *dataptr, unsigned int datalen)
-{
-  JMETHOD(void, write_marker_byte, (j_compress_ptr info, int val));
-
-  if (cinfo->next_scanline != 0 ||
-      (cinfo->global_state != CSTATE_SCANNING &&
-       cinfo->global_state != CSTATE_RAW_OK &&
-       cinfo->global_state != CSTATE_WRCOEFS))
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
-  write_marker_byte = cinfo->marker->write_marker_byte;	/* copy for speed */
-  while (datalen--) {
-    (*write_marker_byte) (cinfo, *dataptr);
-    dataptr++;
-  }
-}
-
-/* Same, but piecemeal. */
-
-GLOBAL(void)
-jpeg_write_m_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
-{
-  if (cinfo->next_scanline != 0 ||
-      (cinfo->global_state != CSTATE_SCANNING &&
-       cinfo->global_state != CSTATE_RAW_OK &&
-       cinfo->global_state != CSTATE_WRCOEFS))
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
-}
-
-GLOBAL(void)
-jpeg_write_m_byte (j_compress_ptr cinfo, int val)
-{
-  (*cinfo->marker->write_marker_byte) (cinfo, val);
-}
-
-
-/*
- * Alternate compression function: just write an abbreviated table file.
- * Before calling this, all parameters and a data destination must be set up.
- *
- * To produce a pair of files containing abbreviated tables and abbreviated
- * image data, one would proceed as follows:
- *
- *		initialize JPEG object
- *		set JPEG parameters
- *		set destination to table file
- *		jpeg_write_tables(cinfo);
- *		set destination to image file
- *		jpeg_start_compress(cinfo, FALSE);
- *		write data...
- *		jpeg_finish_compress(cinfo);
- *
- * jpeg_write_tables has the side effect of marking all tables written
- * (same as jpeg_suppress_tables(..., TRUE)).  Thus a subsequent start_compress
- * will not re-emit the tables unless it is passed write_all_tables=TRUE.
- */
-
-GLOBAL(void)
-jpeg_write_tables (j_compress_ptr cinfo)
-{
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  /* (Re)initialize error mgr and destination modules */
-  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
-  (*cinfo->dest->init_destination) (cinfo);
-  /* Initialize the marker writer ... bit of a crock to do it here. */
-  jinit_marker_writer(cinfo);
-  /* Write them tables! */
-  (*cinfo->marker->write_tables_only) (cinfo);
-  /* And clean up. */
-  (*cinfo->dest->term_destination) (cinfo);
-  /*
-   * In library releases up through v6a, we called jpeg_abort() here to free
-   * any working memory allocated by the destination manager and marker
-   * writer.  Some applications had a problem with that: they allocated space
-   * of their own from the library memory manager, and didn't want it to go
-   * away during write_tables.  So now we do nothing.  This will cause a
-   * memory leak if an app calls write_tables repeatedly without doing a full
-   * compression cycle or otherwise resetting the JPEG object.  However, that
-   * seems less bad than unexpectedly freeing memory in the normal case.
-   * An app that prefers the old behavior can call jpeg_abort for itself after
-   * each call to jpeg_write_tables().
-   */
-}
diff --git a/cmake/externals/libjpeg/jcapistd.c b/cmake/externals/libjpeg/jcapistd.c
deleted file mode 100644
index 0917afa97..000000000
--- a/cmake/externals/libjpeg/jcapistd.c
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * jcapistd.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2013 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains application interface code for the compression half
- * of the JPEG library.  These are the "standard" API routines that are
- * used in the normal full-compression case.  They are not used by a
- * transcoding-only application.  Note that if an application links in
- * jpeg_start_compress, it will end up linking in the entire compressor.
- * We thus must separate this file from jcapimin.c to avoid linking the
- * whole compression library into a transcoder.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Compression initialization.
- * Before calling this, all parameters and a data destination must be set up.
- *
- * We require a write_all_tables parameter as a failsafe check when writing
- * multiple datastreams from the same compression object.  Since prior runs
- * will have left all the tables marked sent_table=TRUE, a subsequent run
- * would emit an abbreviated stream (no tables) by default.  This may be what
- * is wanted, but for safety's sake it should not be the default behavior:
- * programmers should have to make a deliberate choice to emit abbreviated
- * images.  Therefore the documentation and examples should encourage people
- * to pass write_all_tables=TRUE; then it will take active thought to do the
- * wrong thing.
- */
-
-GLOBAL(void)
-jpeg_start_compress (j_compress_ptr cinfo, boolean write_all_tables)
-{
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  if (write_all_tables)
-    jpeg_suppress_tables(cinfo, FALSE);	/* mark all tables to be written */
-
-  /* (Re)initialize error mgr and destination modules */
-  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
-  (*cinfo->dest->init_destination) (cinfo);
-  /* Perform master selection of active modules */
-  jinit_compress_master(cinfo);
-  /* Set up for the first pass */
-  (*cinfo->master->prepare_for_pass) (cinfo);
-  /* Ready for application to drive first pass through jpeg_write_scanlines
-   * or jpeg_write_raw_data.
-   */
-  cinfo->next_scanline = 0;
-  cinfo->global_state = (cinfo->raw_data_in ? CSTATE_RAW_OK : CSTATE_SCANNING);
-}
-
-
-/*
- * Write some scanlines of data to the JPEG compressor.
- *
- * The return value will be the number of lines actually written.
- * This should be less than the supplied num_lines only in case that
- * the data destination module has requested suspension of the compressor,
- * or if more than image_height scanlines are passed in.
- *
- * Note: we warn about excess calls to jpeg_write_scanlines() since
- * this likely signals an application programmer error.  However,
- * excess scanlines passed in the last valid call are *silently* ignored,
- * so that the application need not adjust num_lines for end-of-image
- * when using a multiple-scanline buffer.
- */
-
-GLOBAL(JDIMENSION)
-jpeg_write_scanlines (j_compress_ptr cinfo, JSAMPARRAY scanlines,
-		      JDIMENSION num_lines)
-{
-  JDIMENSION row_ctr, rows_left;
-
-  if (cinfo->global_state != CSTATE_SCANNING)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  if (cinfo->next_scanline >= cinfo->image_height)
-    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
-
-  /* Call progress monitor hook if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->pass_counter = (long) cinfo->next_scanline;
-    cinfo->progress->pass_limit = (long) cinfo->image_height;
-    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-  }
-
-  /* Give master control module another chance if this is first call to
-   * jpeg_write_scanlines.  This lets output of the frame/scan headers be
-   * delayed so that application can write COM, etc, markers between
-   * jpeg_start_compress and jpeg_write_scanlines.
-   */
-  if (cinfo->master->call_pass_startup)
-    (*cinfo->master->pass_startup) (cinfo);
-
-  /* Ignore any extra scanlines at bottom of image. */
-  rows_left = cinfo->image_height - cinfo->next_scanline;
-  if (num_lines > rows_left)
-    num_lines = rows_left;
-
-  row_ctr = 0;
-  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, num_lines);
-  cinfo->next_scanline += row_ctr;
-  return row_ctr;
-}
-
-
-/*
- * Alternate entry point to write raw data.
- * Processes exactly one iMCU row per call, unless suspended.
- */
-
-GLOBAL(JDIMENSION)
-jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data,
-		     JDIMENSION num_lines)
-{
-  JDIMENSION lines_per_iMCU_row;
-
-  if (cinfo->global_state != CSTATE_RAW_OK)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  if (cinfo->next_scanline >= cinfo->image_height) {
-    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
-    return 0;
-  }
-
-  /* Call progress monitor hook if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->pass_counter = (long) cinfo->next_scanline;
-    cinfo->progress->pass_limit = (long) cinfo->image_height;
-    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-  }
-
-  /* Give master control module another chance if this is first call to
-   * jpeg_write_raw_data.  This lets output of the frame/scan headers be
-   * delayed so that application can write COM, etc, markers between
-   * jpeg_start_compress and jpeg_write_raw_data.
-   */
-  if (cinfo->master->call_pass_startup)
-    (*cinfo->master->pass_startup) (cinfo);
-
-  /* Verify that at least one iMCU row has been passed. */
-  lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_v_scaled_size;
-  if (num_lines < lines_per_iMCU_row)
-    ERREXIT(cinfo, JERR_BUFFER_SIZE);
-
-  /* Directly compress the row. */
-  if (! (*cinfo->coef->compress_data) (cinfo, data)) {
-    /* If compressor did not consume the whole row, suspend processing. */
-    return 0;
-  }
-
-  /* OK, we processed one iMCU row. */
-  cinfo->next_scanline += lines_per_iMCU_row;
-  return lines_per_iMCU_row;
-}
diff --git a/cmake/externals/libjpeg/jcarith.c b/cmake/externals/libjpeg/jcarith.c
deleted file mode 100644
index 1b45089a3..000000000
--- a/cmake/externals/libjpeg/jcarith.c
+++ /dev/null
@@ -1,945 +0,0 @@
-/*
- * jcarith.c
- *
- * Developed 1997-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains portable arithmetic entropy encoding routines for JPEG
- * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81).
- *
- * Both sequential and progressive modes are supported in this single module.
- *
- * Suspension is not currently supported in this module.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Expanded entropy encoder object for arithmetic encoding. */
-
-typedef struct {
-  struct jpeg_entropy_encoder pub; /* public fields */
-
-  INT32 c; /* C register, base of coding interval, layout as in sec. D.1.3 */
-  INT32 a;               /* A register, normalized size of coding interval */
-  INT32 sc;        /* counter for stacked 0xFF values which might overflow */
-  INT32 zc;          /* counter for pending 0x00 output values which might *
-                          * be discarded at the end ("Pacman" termination) */
-  int ct;  /* bit shift counter, determines when next byte will be written */
-  int buffer;                /* buffer for most recent output byte != 0xFF */
-
-  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
-  int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
-
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
-  int next_restart_num;		/* next restart number to write (0-7) */
-
-  /* Pointers to statistics areas (these workspaces have image lifespan) */
-  unsigned char * dc_stats[NUM_ARITH_TBLS];
-  unsigned char * ac_stats[NUM_ARITH_TBLS];
-
-  /* Statistics bin for coding with fixed probability 0.5 */
-  unsigned char fixed_bin[4];
-} arith_entropy_encoder;
-
-typedef arith_entropy_encoder * arith_entropy_ptr;
-
-/* The following two definitions specify the allocation chunk size
- * for the statistics area.
- * According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
- * 49 statistics bins for DC, and 245 statistics bins for AC coding.
- *
- * We use a compact representation with 1 byte per statistics bin,
- * thus the numbers directly represent byte sizes.
- * This 1 byte per statistics bin contains the meaning of the MPS
- * (more probable symbol) in the highest bit (mask 0x80), and the
- * index into the probability estimation state machine table
- * in the lower bits (mask 0x7F).
- */
-
-#define DC_STAT_BINS 64
-#define AC_STAT_BINS 256
-
-/* NOTE: Uncomment the following #define if you want to use the
- * given formula for calculating the AC conditioning parameter Kx
- * for spectral selection progressive coding in section G.1.3.2
- * of the spec (Kx = Kmin + SRL (8 + Se - Kmin) 4).
- * Although the spec and P&M authors claim that this "has proven
- * to give good results for 8 bit precision samples", I'm not
- * convinced yet that this is really beneficial.
- * Early tests gave only very marginal compression enhancements
- * (a few - around 5 or so - bytes even for very large files),
- * which would turn out rather negative if we'd suppress the
- * DAC (Define Arithmetic Conditioning) marker segments for
- * the default parameters in the future.
- * Note that currently the marker writing module emits 12-byte
- * DAC segments for a full-component scan in a color image.
- * This is not worth worrying about IMHO. However, since the
- * spec defines the default values to be used if the tables
- * are omitted (unlike Huffman tables, which are required
- * anyway), one might optimize this behaviour in the future,
- * and then it would be disadvantageous to use custom tables if
- * they don't provide sufficient gain to exceed the DAC size.
- *
- * On the other hand, I'd consider it as a reasonable result
- * that the conditioning has no significant influence on the
- * compression performance. This means that the basic
- * statistical model is already rather stable.
- *
- * Thus, at the moment, we use the default conditioning values
- * anyway, and do not use the custom formula.
- *
-#define CALCULATE_SPECTRAL_CONDITIONING
- */
-
-/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
- * We assume that int right shift is unsigned if INT32 right shift is,
- * which should be safe.
- */
-
-#ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define ISHIFT_TEMPS	int ishift_temp;
-#define IRIGHT_SHIFT(x,shft)  \
-	((ishift_temp = (x)) < 0 ? \
-	 (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
-	 (ishift_temp >> (shft)))
-#else
-#define ISHIFT_TEMPS
-#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
-#endif
-
-
-LOCAL(void)
-emit_byte (int val, j_compress_ptr cinfo)
-/* Write next output byte; we do not support suspension in this module. */
-{
-  struct jpeg_destination_mgr * dest = cinfo->dest;
-
-  *dest->next_output_byte++ = (JOCTET) val;
-  if (--dest->free_in_buffer == 0)
-    if (! (*dest->empty_output_buffer) (cinfo))
-      ERREXIT(cinfo, JERR_CANT_SUSPEND);
-}
-
-
-/*
- * Finish up at the end of an arithmetic-compressed scan.
- */
-
-METHODDEF(void)
-finish_pass (j_compress_ptr cinfo)
-{
-  arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
-  INT32 temp;
-
-  /* Section D.1.8: Termination of encoding */
-
-  /* Find the e->c in the coding interval with the largest
-   * number of trailing zero bits */
-  if ((temp = (e->a - 1 + e->c) & 0xFFFF0000L) < e->c)
-    e->c = temp + 0x8000L;
-  else
-    e->c = temp;
-  /* Send remaining bytes to output */
-  e->c <<= e->ct;
-  if (e->c & 0xF8000000L) {
-    /* One final overflow has to be handled */
-    if (e->buffer >= 0) {
-      if (e->zc)
-	do emit_byte(0x00, cinfo);
-	while (--e->zc);
-      emit_byte(e->buffer + 1, cinfo);
-      if (e->buffer + 1 == 0xFF)
-	emit_byte(0x00, cinfo);
-    }
-    e->zc += e->sc;  /* carry-over converts stacked 0xFF bytes to 0x00 */
-    e->sc = 0;
-  } else {
-    if (e->buffer == 0)
-      ++e->zc;
-    else if (e->buffer >= 0) {
-      if (e->zc)
-	do emit_byte(0x00, cinfo);
-	while (--e->zc);
-      emit_byte(e->buffer, cinfo);
-    }
-    if (e->sc) {
-      if (e->zc)
-	do emit_byte(0x00, cinfo);
-	while (--e->zc);
-      do {
-	emit_byte(0xFF, cinfo);
-	emit_byte(0x00, cinfo);
-      } while (--e->sc);
-    }
-  }
-  /* Output final bytes only if they are not 0x00 */
-  if (e->c & 0x7FFF800L) {
-    if (e->zc)  /* output final pending zero bytes */
-      do emit_byte(0x00, cinfo);
-      while (--e->zc);
-    emit_byte((int) ((e->c >> 19) & 0xFF), cinfo);
-    if (((e->c >> 19) & 0xFF) == 0xFF)
-      emit_byte(0x00, cinfo);
-    if (e->c & 0x7F800L) {
-      emit_byte((int) ((e->c >> 11) & 0xFF), cinfo);
-      if (((e->c >> 11) & 0xFF) == 0xFF)
-	emit_byte(0x00, cinfo);
-    }
-  }
-}
-
-
-/*
- * The core arithmetic encoding routine (common in JPEG and JBIG).
- * This needs to go as fast as possible.
- * Machine-dependent optimization facilities
- * are not utilized in this portable implementation.
- * However, this code should be fairly efficient and
- * may be a good base for further optimizations anyway.
- *
- * Parameter 'val' to be encoded may be 0 or 1 (binary decision).
- *
- * Note: I've added full "Pacman" termination support to the
- * byte output routines, which is equivalent to the optional
- * Discard_final_zeros procedure (Figure D.15) in the spec.
- * Thus, we always produce the shortest possible output
- * stream compliant to the spec (no trailing zero bytes,
- * except for FF stuffing).
- *
- * I've also introduced a new scheme for accessing
- * the probability estimation state machine table,
- * derived from Markus Kuhn's JBIG implementation.
- */
-
-LOCAL(void)
-arith_encode (j_compress_ptr cinfo, unsigned char *st, int val) 
-{
-  register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
-  register unsigned char nl, nm;
-  register INT32 qe, temp;
-  register int sv;
-
-  /* Fetch values from our compact representation of Table D.3(D.2):
-   * Qe values and probability estimation state machine
-   */
-  sv = *st;
-  qe = jpeg_aritab[sv & 0x7F];	/* => Qe_Value */
-  nl = qe & 0xFF; qe >>= 8;	/* Next_Index_LPS + Switch_MPS */
-  nm = qe & 0xFF; qe >>= 8;	/* Next_Index_MPS */
-
-  /* Encode & estimation procedures per sections D.1.4 & D.1.5 */
-  e->a -= qe;
-  if (val != (sv >> 7)) {
-    /* Encode the less probable symbol */
-    if (e->a >= qe) {
-      /* If the interval size (qe) for the less probable symbol (LPS)
-       * is larger than the interval size for the MPS, then exchange
-       * the two symbols for coding efficiency, otherwise code the LPS
-       * as usual: */
-      e->c += e->a;
-      e->a = qe;
-    }
-    *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
-  } else {
-    /* Encode the more probable symbol */
-    if (e->a >= 0x8000L)
-      return;  /* A >= 0x8000 -> ready, no renormalization required */
-    if (e->a < qe) {
-      /* If the interval size (qe) for the less probable symbol (LPS)
-       * is larger than the interval size for the MPS, then exchange
-       * the two symbols for coding efficiency: */
-      e->c += e->a;
-      e->a = qe;
-    }
-    *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
-  }
-
-  /* Renormalization & data output per section D.1.6 */
-  do {
-    e->a <<= 1;
-    e->c <<= 1;
-    if (--e->ct == 0) {
-      /* Another byte is ready for output */
-      temp = e->c >> 19;
-      if (temp > 0xFF) {
-	/* Handle overflow over all stacked 0xFF bytes */
-	if (e->buffer >= 0) {
-	  if (e->zc)
-	    do emit_byte(0x00, cinfo);
-	    while (--e->zc);
-	  emit_byte(e->buffer + 1, cinfo);
-	  if (e->buffer + 1 == 0xFF)
-	    emit_byte(0x00, cinfo);
-	}
-	e->zc += e->sc;  /* carry-over converts stacked 0xFF bytes to 0x00 */
-	e->sc = 0;
-	/* Note: The 3 spacer bits in the C register guarantee
-	 * that the new buffer byte can't be 0xFF here
-	 * (see page 160 in the P&M JPEG book). */
-	/* New output byte, might overflow later */
-	e->buffer = (int) (temp & 0xFF);
-      } else if (temp == 0xFF) {
-	++e->sc;  /* stack 0xFF byte (which might overflow later) */
-      } else {
-	/* Output all stacked 0xFF bytes, they will not overflow any more */
-	if (e->buffer == 0)
-	  ++e->zc;
-	else if (e->buffer >= 0) {
-	  if (e->zc)
-	    do emit_byte(0x00, cinfo);
-	    while (--e->zc);
-	  emit_byte(e->buffer, cinfo);
-	}
-	if (e->sc) {
-	  if (e->zc)
-	    do emit_byte(0x00, cinfo);
-	    while (--e->zc);
-	  do {
-	    emit_byte(0xFF, cinfo);
-	    emit_byte(0x00, cinfo);
-	  } while (--e->sc);
-	}
-	/* New output byte (can still overflow) */
-	e->buffer = (int) (temp & 0xFF);
-      }
-      e->c &= 0x7FFFFL;
-      e->ct += 8;
-    }
-  } while (e->a < 0x8000L);
-}
-
-
-/*
- * Emit a restart marker & resynchronize predictions.
- */
-
-LOCAL(void)
-emit_restart (j_compress_ptr cinfo, int restart_num)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  int ci;
-  jpeg_component_info * compptr;
-
-  finish_pass(cinfo);
-
-  emit_byte(0xFF, cinfo);
-  emit_byte(JPEG_RST0 + restart_num, cinfo);
-
-  /* Re-initialize statistics areas */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    /* DC needs no table for refinement scan */
-    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
-      MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
-      /* Reset DC predictions to 0 */
-      entropy->last_dc_val[ci] = 0;
-      entropy->dc_context[ci] = 0;
-    }
-    /* AC needs no table when not present */
-    if (cinfo->Se) {
-      MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
-    }
-  }
-
-  /* Reset arithmetic encoding variables */
-  entropy->c = 0;
-  entropy->a = 0x10000L;
-  entropy->sc = 0;
-  entropy->zc = 0;
-  entropy->ct = 11;
-  entropy->buffer = -1;  /* empty */
-}
-
-
-/*
- * MCU encoding for DC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  unsigned char *st;
-  int blkn, ci, tbl;
-  int v, v2, m;
-  ISHIFT_TEMPS
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      emit_restart(cinfo, entropy->next_restart_num);
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    ci = cinfo->MCU_membership[blkn];
-    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
-
-    /* Compute the DC value after the required point transform by Al.
-     * This is simply an arithmetic right shift.
-     */
-    m = IRIGHT_SHIFT((int) (MCU_data[blkn][0][0]), cinfo->Al);
-
-    /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
-
-    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
-    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
-
-    /* Figure F.4: Encode_DC_DIFF */
-    if ((v = m - entropy->last_dc_val[ci]) == 0) {
-      arith_encode(cinfo, st, 0);
-      entropy->dc_context[ci] = 0;	/* zero diff category */
-    } else {
-      entropy->last_dc_val[ci] = m;
-      arith_encode(cinfo, st, 1);
-      /* Figure F.6: Encoding nonzero value v */
-      /* Figure F.7: Encoding the sign of v */
-      if (v > 0) {
-	arith_encode(cinfo, st + 1, 0);	/* Table F.4: SS = S0 + 1 */
-	st += 2;			/* Table F.4: SP = S0 + 2 */
-	entropy->dc_context[ci] = 4;	/* small positive diff category */
-      } else {
-	v = -v;
-	arith_encode(cinfo, st + 1, 1);	/* Table F.4: SS = S0 + 1 */
-	st += 3;			/* Table F.4: SN = S0 + 3 */
-	entropy->dc_context[ci] = 8;	/* small negative diff category */
-      }
-      /* Figure F.8: Encoding the magnitude category of v */
-      m = 0;
-      if (v -= 1) {
-	arith_encode(cinfo, st, 1);
-	m = 1;
-	v2 = v;
-	st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
-	while (v2 >>= 1) {
-	  arith_encode(cinfo, st, 1);
-	  m <<= 1;
-	  st += 1;
-	}
-      }
-      arith_encode(cinfo, st, 0);
-      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
-      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
-	entropy->dc_context[ci] = 0;	/* zero diff category */
-      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
-	entropy->dc_context[ci] += 8;	/* large diff category */
-      /* Figure F.9: Encoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	arith_encode(cinfo, st, (m & v) ? 1 : 0);
-    }
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for AC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  const int * natural_order;
-  JBLOCKROW block;
-  unsigned char *st;
-  int tbl, k, ke;
-  int v, v2, m;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      emit_restart(cinfo, entropy->next_restart_num);
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  natural_order = cinfo->natural_order;
-
-  /* Encode the MCU data block */
-  block = MCU_data[0];
-  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
-
-  /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */
-
-  /* Establish EOB (end-of-block) index */
-  ke = cinfo->Se;
-  do {
-    /* We must apply the point transform by Al.  For AC coefficients this
-     * is an integer division with rounding towards 0.  To do this portably
-     * in C, we shift after obtaining the absolute value.
-     */
-    if ((v = (*block)[natural_order[ke]]) >= 0) {
-      if (v >>= cinfo->Al) break;
-    } else {
-      v = -v;
-      if (v >>= cinfo->Al) break;
-    }
-  } while (--ke);
-
-  /* Figure F.5: Encode_AC_Coefficients */
-  for (k = cinfo->Ss - 1; k < ke;) {
-    st = entropy->ac_stats[tbl] + 3 * k;
-    arith_encode(cinfo, st, 0);		/* EOB decision */
-    for (;;) {
-      if ((v = (*block)[natural_order[++k]]) >= 0) {
-	if (v >>= cinfo->Al) {
-	  arith_encode(cinfo, st + 1, 1);
-	  arith_encode(cinfo, entropy->fixed_bin, 0);
-	  break;
-	}
-      } else {
-	v = -v;
-	if (v >>= cinfo->Al) {
-	  arith_encode(cinfo, st + 1, 1);
-	  arith_encode(cinfo, entropy->fixed_bin, 1);
-	  break;
-	}
-      }
-      arith_encode(cinfo, st + 1, 0);
-      st += 3;
-    }
-    st += 2;
-    /* Figure F.8: Encoding the magnitude category of v */
-    m = 0;
-    if (v -= 1) {
-      arith_encode(cinfo, st, 1);
-      m = 1;
-      v2 = v;
-      if (v2 >>= 1) {
-	arith_encode(cinfo, st, 1);
-	m <<= 1;
-	st = entropy->ac_stats[tbl] +
-	     (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
-	while (v2 >>= 1) {
-	  arith_encode(cinfo, st, 1);
-	  m <<= 1;
-	  st += 1;
-	}
-      }
-    }
-    arith_encode(cinfo, st, 0);
-    /* Figure F.9: Encoding the magnitude bit pattern of v */
-    st += 14;
-    while (m >>= 1)
-      arith_encode(cinfo, st, (m & v) ? 1 : 0);
-  }
-  /* Encode EOB decision only if k < cinfo->Se */
-  if (k < cinfo->Se) {
-    st = entropy->ac_stats[tbl] + 3 * k;
-    arith_encode(cinfo, st, 1);
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for DC successive approximation refinement scan.
- * Note: we assume such scans can be multi-component,
- * although the spec is not very clear on the point.
- */
-
-METHODDEF(boolean)
-encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  unsigned char *st;
-  int Al, blkn;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      emit_restart(cinfo, entropy->next_restart_num);
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  st = entropy->fixed_bin;	/* use fixed probability estimation */
-  Al = cinfo->Al;
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    /* We simply emit the Al'th bit of the DC coefficient value. */
-    arith_encode(cinfo, st, (MCU_data[blkn][0][0] >> Al) & 1);
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for AC successive approximation refinement scan.
- */
-
-METHODDEF(boolean)
-encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  const int * natural_order;
-  JBLOCKROW block;
-  unsigned char *st;
-  int tbl, k, ke, kex;
-  int v;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      emit_restart(cinfo, entropy->next_restart_num);
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  natural_order = cinfo->natural_order;
-
-  /* Encode the MCU data block */
-  block = MCU_data[0];
-  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
-
-  /* Section G.1.3.3: Encoding of AC coefficients */
-
-  /* Establish EOB (end-of-block) index */
-  ke = cinfo->Se;
-  do {
-    /* We must apply the point transform by Al.  For AC coefficients this
-     * is an integer division with rounding towards 0.  To do this portably
-     * in C, we shift after obtaining the absolute value.
-     */
-    if ((v = (*block)[natural_order[ke]]) >= 0) {
-      if (v >>= cinfo->Al) break;
-    } else {
-      v = -v;
-      if (v >>= cinfo->Al) break;
-    }
-  } while (--ke);
-
-  /* Establish EOBx (previous stage end-of-block) index */
-  for (kex = ke; kex > 0; kex--)
-    if ((v = (*block)[natural_order[kex]]) >= 0) {
-      if (v >>= cinfo->Ah) break;
-    } else {
-      v = -v;
-      if (v >>= cinfo->Ah) break;
-    }
-
-  /* Figure G.10: Encode_AC_Coefficients_SA */
-  for (k = cinfo->Ss - 1; k < ke;) {
-    st = entropy->ac_stats[tbl] + 3 * k;
-    if (k >= kex)
-      arith_encode(cinfo, st, 0);	/* EOB decision */
-    for (;;) {
-      if ((v = (*block)[natural_order[++k]]) >= 0) {
-	if (v >>= cinfo->Al) {
-	  if (v >> 1)			/* previously nonzero coef */
-	    arith_encode(cinfo, st + 2, (v & 1));
-	  else {			/* newly nonzero coef */
-	    arith_encode(cinfo, st + 1, 1);
-	    arith_encode(cinfo, entropy->fixed_bin, 0);
-	  }
-	  break;
-	}
-      } else {
-	v = -v;
-	if (v >>= cinfo->Al) {
-	  if (v >> 1)			/* previously nonzero coef */
-	    arith_encode(cinfo, st + 2, (v & 1));
-	  else {			/* newly nonzero coef */
-	    arith_encode(cinfo, st + 1, 1);
-	    arith_encode(cinfo, entropy->fixed_bin, 1);
-	  }
-	  break;
-	}
-      }
-      arith_encode(cinfo, st + 1, 0);
-      st += 3;
-    }
-  }
-  /* Encode EOB decision only if k < cinfo->Se */
-  if (k < cinfo->Se) {
-    st = entropy->ac_stats[tbl] + 3 * k;
-    arith_encode(cinfo, st, 1);
-  }
-
-  return TRUE;
-}
-
-
-/*
- * Encode and output one MCU's worth of arithmetic-compressed coefficients.
- */
-
-METHODDEF(boolean)
-encode_mcu (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  const int * natural_order;
-  JBLOCKROW block;
-  unsigned char *st;
-  int tbl, k, ke;
-  int v, v2, m;
-  int blkn, ci;
-  jpeg_component_info * compptr;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      emit_restart(cinfo, entropy->next_restart_num);
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  natural_order = cinfo->natural_order;
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
-    ci = cinfo->MCU_membership[blkn];
-    compptr = cinfo->cur_comp_info[ci];
-
-    /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
-
-    tbl = compptr->dc_tbl_no;
-
-    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
-    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
-
-    /* Figure F.4: Encode_DC_DIFF */
-    if ((v = (*block)[0] - entropy->last_dc_val[ci]) == 0) {
-      arith_encode(cinfo, st, 0);
-      entropy->dc_context[ci] = 0;	/* zero diff category */
-    } else {
-      entropy->last_dc_val[ci] = (*block)[0];
-      arith_encode(cinfo, st, 1);
-      /* Figure F.6: Encoding nonzero value v */
-      /* Figure F.7: Encoding the sign of v */
-      if (v > 0) {
-	arith_encode(cinfo, st + 1, 0);	/* Table F.4: SS = S0 + 1 */
-	st += 2;			/* Table F.4: SP = S0 + 2 */
-	entropy->dc_context[ci] = 4;	/* small positive diff category */
-      } else {
-	v = -v;
-	arith_encode(cinfo, st + 1, 1);	/* Table F.4: SS = S0 + 1 */
-	st += 3;			/* Table F.4: SN = S0 + 3 */
-	entropy->dc_context[ci] = 8;	/* small negative diff category */
-      }
-      /* Figure F.8: Encoding the magnitude category of v */
-      m = 0;
-      if (v -= 1) {
-	arith_encode(cinfo, st, 1);
-	m = 1;
-	v2 = v;
-	st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
-	while (v2 >>= 1) {
-	  arith_encode(cinfo, st, 1);
-	  m <<= 1;
-	  st += 1;
-	}
-      }
-      arith_encode(cinfo, st, 0);
-      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
-      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
-	entropy->dc_context[ci] = 0;	/* zero diff category */
-      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
-	entropy->dc_context[ci] += 8;	/* large diff category */
-      /* Figure F.9: Encoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	arith_encode(cinfo, st, (m & v) ? 1 : 0);
-    }
-
-    /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */
-
-    if ((ke = cinfo->lim_Se) == 0) continue;
-    tbl = compptr->ac_tbl_no;
-
-    /* Establish EOB (end-of-block) index */
-    do {
-      if ((*block)[natural_order[ke]]) break;
-    } while (--ke);
-
-    /* Figure F.5: Encode_AC_Coefficients */
-    for (k = 0; k < ke;) {
-      st = entropy->ac_stats[tbl] + 3 * k;
-      arith_encode(cinfo, st, 0);	/* EOB decision */
-      while ((v = (*block)[natural_order[++k]]) == 0) {
-	arith_encode(cinfo, st + 1, 0);
-	st += 3;
-      }
-      arith_encode(cinfo, st + 1, 1);
-      /* Figure F.6: Encoding nonzero value v */
-      /* Figure F.7: Encoding the sign of v */
-      if (v > 0) {
-	arith_encode(cinfo, entropy->fixed_bin, 0);
-      } else {
-	v = -v;
-	arith_encode(cinfo, entropy->fixed_bin, 1);
-      }
-      st += 2;
-      /* Figure F.8: Encoding the magnitude category of v */
-      m = 0;
-      if (v -= 1) {
-	arith_encode(cinfo, st, 1);
-	m = 1;
-	v2 = v;
-	if (v2 >>= 1) {
-	  arith_encode(cinfo, st, 1);
-	  m <<= 1;
-	  st = entropy->ac_stats[tbl] +
-	       (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
-	  while (v2 >>= 1) {
-	    arith_encode(cinfo, st, 1);
-	    m <<= 1;
-	    st += 1;
-	  }
-	}
-      }
-      arith_encode(cinfo, st, 0);
-      /* Figure F.9: Encoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	arith_encode(cinfo, st, (m & v) ? 1 : 0);
-    }
-    /* Encode EOB decision only if k < cinfo->lim_Se */
-    if (k < cinfo->lim_Se) {
-      st = entropy->ac_stats[tbl] + 3 * k;
-      arith_encode(cinfo, st, 1);
-    }
-  }
-
-  return TRUE;
-}
-
-
-/*
- * Initialize for an arithmetic-compressed scan.
- */
-
-METHODDEF(void)
-start_pass (j_compress_ptr cinfo, boolean gather_statistics)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  int ci, tbl;
-  jpeg_component_info * compptr;
-
-  if (gather_statistics)
-    /* Make sure to avoid that in the master control logic!
-     * We are fully adaptive here and need no extra
-     * statistics gathering pass!
-     */
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-
-  /* We assume jcmaster.c already validated the progressive scan parameters. */
-
-  /* Select execution routines */
-  if (cinfo->progressive_mode) {
-    if (cinfo->Ah == 0) {
-      if (cinfo->Ss == 0)
-	entropy->pub.encode_mcu = encode_mcu_DC_first;
-      else
-	entropy->pub.encode_mcu = encode_mcu_AC_first;
-    } else {
-      if (cinfo->Ss == 0)
-	entropy->pub.encode_mcu = encode_mcu_DC_refine;
-      else
-	entropy->pub.encode_mcu = encode_mcu_AC_refine;
-    }
-  } else
-    entropy->pub.encode_mcu = encode_mcu;
-
-  /* Allocate & initialize requested statistics areas */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    /* DC needs no table for refinement scan */
-    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
-      tbl = compptr->dc_tbl_no;
-      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
-	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
-      if (entropy->dc_stats[tbl] == NULL)
-	entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
-      MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
-      /* Initialize DC predictions to 0 */
-      entropy->last_dc_val[ci] = 0;
-      entropy->dc_context[ci] = 0;
-    }
-    /* AC needs no table when not present */
-    if (cinfo->Se) {
-      tbl = compptr->ac_tbl_no;
-      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
-	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
-      if (entropy->ac_stats[tbl] == NULL)
-	entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
-      MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
-#ifdef CALCULATE_SPECTRAL_CONDITIONING
-      if (cinfo->progressive_mode)
-	/* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */
-	cinfo->arith_ac_K[tbl] = cinfo->Ss + ((8 + cinfo->Se - cinfo->Ss) >> 4);
-#endif
-    }
-  }
-
-  /* Initialize arithmetic encoding variables */
-  entropy->c = 0;
-  entropy->a = 0x10000L;
-  entropy->sc = 0;
-  entropy->zc = 0;
-  entropy->ct = 11;
-  entropy->buffer = -1;  /* empty */
-
-  /* Initialize restart stuff */
-  entropy->restarts_to_go = cinfo->restart_interval;
-  entropy->next_restart_num = 0;
-}
-
-
-/*
- * Module initialization routine for arithmetic entropy encoding.
- */
-
-GLOBAL(void)
-jinit_arith_encoder (j_compress_ptr cinfo)
-{
-  arith_entropy_ptr entropy;
-  int i;
-
-  entropy = (arith_entropy_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(arith_entropy_encoder));
-  cinfo->entropy = &entropy->pub;
-  entropy->pub.start_pass = start_pass;
-  entropy->pub.finish_pass = finish_pass;
-
-  /* Mark tables unallocated */
-  for (i = 0; i < NUM_ARITH_TBLS; i++) {
-    entropy->dc_stats[i] = NULL;
-    entropy->ac_stats[i] = NULL;
-  }
-
-  /* Initialize index for fixed probability estimation */
-  entropy->fixed_bin[0] = 113;
-}
diff --git a/cmake/externals/libjpeg/jccoefct.c b/cmake/externals/libjpeg/jccoefct.c
deleted file mode 100644
index 494aa2298..000000000
--- a/cmake/externals/libjpeg/jccoefct.c
+++ /dev/null
@@ -1,456 +0,0 @@
-/*
- * jccoefct.c
- *
- * Copyright (C) 1994-1997, Thomas G. Lane.
- * Modified 2003-2022 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the coefficient buffer controller for compression.
- * This controller is the top level of the JPEG compressor proper.
- * The coefficient buffer lies between forward-DCT and entropy encoding steps.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* We use a full-image coefficient buffer when doing Huffman optimization,
- * and also for writing multiple-scan JPEG files.  In all cases, the DCT
- * step is run during the first pass, and subsequent passes need only read
- * the buffered coefficients.
- */
-#ifdef ENTROPY_OPT_SUPPORTED
-#define FULL_COEF_BUFFER_SUPPORTED
-#else
-#ifdef C_MULTISCAN_FILES_SUPPORTED
-#define FULL_COEF_BUFFER_SUPPORTED
-#endif
-#endif
-
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_c_coef_controller pub; /* public fields */
-
-  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
-  JDIMENSION MCU_ctr;		/* counts MCUs processed in current row */
-  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
-  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
-
-  /* For single-pass compression, it's sufficient to buffer just one MCU
-   * (although this may prove a bit slow in practice).
-   * We append a workspace of C_MAX_BLOCKS_IN_MCU coefficient blocks,
-   * and reuse it for each MCU constructed and sent.
-   * In multi-pass modes, this array points to the current MCU's blocks
-   * within the virtual arrays.
-   */
-  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
-
-  /* In multi-pass modes, we need a virtual block array for each component. */
-  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
-
-  /* Workspace for single-pass compression (omitted otherwise). */
-  JBLOCK blk_buffer[C_MAX_BLOCKS_IN_MCU];
-} my_coef_controller;
-
-typedef my_coef_controller * my_coef_ptr;
-
-
-/* Forward declarations */
-METHODDEF(boolean) compress_data
-    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
-#ifdef FULL_COEF_BUFFER_SUPPORTED
-METHODDEF(boolean) compress_first_pass
-    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
-METHODDEF(boolean) compress_output
-    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
-#endif
-
-
-LOCAL(void)
-start_iMCU_row (j_compress_ptr cinfo)
-/* Reset within-iMCU-row counters for a new row */
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  /* In an interleaved scan, an MCU row is the same as an iMCU row.
-   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
-   * But at the bottom of the image, process only what's left.
-   */
-  if (cinfo->comps_in_scan > 1) {
-    coef->MCU_rows_per_iMCU_row = 1;
-  } else {
-    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
-    else
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
-  }
-
-  coef->MCU_ctr = 0;
-  coef->MCU_vert_offset = 0;
-}
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  coef->iMCU_row_num = 0;
-  start_iMCU_row(cinfo);
-
-  switch (pass_mode) {
-  case JBUF_PASS_THRU:
-    if (coef->whole_image[0] != NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    coef->pub.compress_data = compress_data;
-    break;
-#ifdef FULL_COEF_BUFFER_SUPPORTED
-  case JBUF_SAVE_AND_PASS:
-    if (coef->whole_image[0] == NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    coef->pub.compress_data = compress_first_pass;
-    break;
-  case JBUF_CRANK_DEST:
-    if (coef->whole_image[0] == NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    coef->pub.compress_data = compress_output;
-    break;
-#endif
-  default:
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-  }
-}
-
-
-/*
- * Process some data in the single-pass case.
- * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
- * per call, ie, v_samp_factor block rows for each component in the image.
- * Returns TRUE if the iMCU row is completed, FALSE if suspended.
- *
- * NB: input_buf contains a plane for each component in image,
- * which we index according to the component's SOF position.
- */
-
-METHODDEF(boolean)
-compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
-  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  int ci, xindex, yindex, yoffset, blockcnt;
-  JBLOCKROW blkp;
-  JSAMPARRAY input_ptr;
-  JDIMENSION xpos;
-  jpeg_component_info *compptr;
-  forward_DCT_ptr forward_DCT;
-
-  /* Loop to write as much as one whole iMCU row */
-  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
-       yoffset++) {
-    for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
-	 MCU_col_num++) {
-      /* Determine where data comes from in input_buf and do the DCT thing.
-       * Each call on forward_DCT processes a horizontal row of DCT blocks as
-       * wide as an MCU.  Dummy blocks at the right or bottom edge are filled in
-       * specially.  The data in them does not matter for image reconstruction,
-       * so we fill them with values that will encode to the smallest amount of
-       * data, viz: all zeroes in the AC entries, DC entries equal to previous
-       * block's DC value.  (Thanks to Thomas Kinsman for this idea.)
-       */
-      blkp = coef->blk_buffer;	/* pointer to current DCT block within MCU */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	forward_DCT = cinfo->fdct->forward_DCT[compptr->component_index];
-	input_ptr = input_buf[compptr->component_index] +
-	  yoffset * compptr->DCT_v_scaled_size;
-	/* ypos == (yoffset + yindex) * compptr->DCT_v_scaled_size */
-	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
-						: compptr->last_col_width;
-	xpos = MCU_col_num * compptr->MCU_sample_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  if (coef->iMCU_row_num < last_iMCU_row ||
-	      yoffset + yindex < compptr->last_row_height) {
-	    (*forward_DCT) (cinfo, compptr, input_ptr, blkp,
-			    xpos, (JDIMENSION) blockcnt);
-	    input_ptr += compptr->DCT_v_scaled_size;
-	    blkp += blockcnt;
-	    /* Dummy blocks at right edge */
-	    if ((xindex = compptr->MCU_width - blockcnt) == 0)
-	      continue;
-	  } else {
-	    /* At bottom of image, need a whole row of dummy blocks */
-	    xindex = compptr->MCU_width;
-	  }
-	  /* Fill in any dummy blocks needed in this row */
-	  MEMZERO(blkp, xindex * SIZEOF(JBLOCK));
-	  do {
-	    blkp[0][0] = blkp[-1][0];
-	    blkp++;
-	  } while (--xindex);
-	}
-      }
-      /* Try to write the MCU.  In event of a suspension failure, we will
-       * re-DCT the MCU on restart (a bit inefficient, could be fixed...)
-       */
-      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->MCU_ctr = MCU_col_num;
-	return FALSE;
-      }
-    }
-    /* Completed an MCU row, but perhaps not an iMCU row */
-    coef->MCU_ctr = 0;
-  }
-  /* Completed the iMCU row, advance counters for next one */
-  coef->iMCU_row_num++;
-  start_iMCU_row(cinfo);
-  return TRUE;
-}
-
-
-#ifdef FULL_COEF_BUFFER_SUPPORTED
-
-/*
- * Process some data in the first pass of a multi-pass case.
- * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
- * per call, ie, v_samp_factor block rows for each component in the image.
- * This amount of data is read from the source buffer, DCT'd and quantized,
- * and saved into the virtual arrays.  We also generate suitable dummy blocks
- * as needed at the right and lower edges.  (The dummy blocks are constructed
- * in the virtual arrays, which have been padded appropriately.)  This makes
- * it possible for subsequent passes not to worry about real vs. dummy blocks.
- *
- * We must also emit the data to the entropy encoder.  This is conveniently
- * done by calling compress_output() after we've loaded the current strip
- * of the virtual arrays.
- *
- * NB: input_buf contains a plane for each component in image.  All
- * components are DCT'd and loaded into the virtual arrays in this pass.
- * However, it may be that only a subset of the components are emitted to
- * the entropy encoder during this first pass; be careful about looking
- * at the scan-dependent variables (MCU dimensions, etc).
- */
-
-METHODDEF(boolean)
-compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  JDIMENSION blocks_across, MCUs_across, MCUindex;
-  int bi, ci, h_samp_factor, block_row, block_rows, ndummy;
-  JCOEF lastDC;
-  jpeg_component_info *compptr;
-  JBLOCKARRAY buffer;
-  JBLOCKROW thisblockrow, lastblockrow;
-  JSAMPARRAY input_ptr;
-  forward_DCT_ptr forward_DCT;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Align the virtual buffer for this component. */
-    buffer = (*cinfo->mem->access_virt_barray)
-      ((j_common_ptr) cinfo, coef->whole_image[ci],
-       coef->iMCU_row_num * compptr->v_samp_factor,
-       (JDIMENSION) compptr->v_samp_factor, TRUE);
-    /* Count non-dummy DCT block rows in this iMCU row. */
-    if (coef->iMCU_row_num < last_iMCU_row)
-      block_rows = compptr->v_samp_factor;
-    else {
-      /* NB: can't use last_row_height here, since may not be set! */
-      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
-      if (block_rows == 0) block_rows = compptr->v_samp_factor;
-    }
-    blocks_across = compptr->width_in_blocks;
-    h_samp_factor = compptr->h_samp_factor;
-    /* Count number of dummy blocks to be added at the right margin. */
-    ndummy = (int) (blocks_across % h_samp_factor);
-    if (ndummy > 0)
-      ndummy = h_samp_factor - ndummy;
-    forward_DCT = cinfo->fdct->forward_DCT[ci];
-    input_ptr = input_buf[ci];
-    /* Perform DCT for all non-dummy blocks in this iMCU row.  Each call
-     * on forward_DCT processes a complete horizontal row of DCT blocks.
-     */
-    for (block_row = 0; block_row < block_rows; block_row++) {
-      thisblockrow = buffer[block_row];
-      (*forward_DCT) (cinfo, compptr, input_ptr, thisblockrow,
-		      (JDIMENSION) 0, blocks_across);
-      input_ptr += compptr->DCT_v_scaled_size;
-      if (ndummy > 0) {
-	/* Create dummy blocks at the right edge of the image. */
-	thisblockrow += blocks_across; /* => first dummy block */
-	FMEMZERO((void FAR *) thisblockrow, ndummy * SIZEOF(JBLOCK));
-	lastDC = thisblockrow[-1][0];
-	for (bi = 0; bi < ndummy; bi++) {
-	  thisblockrow[bi][0] = lastDC;
-	}
-      }
-    }
-    /* If at end of image, create dummy block rows as needed.
-     * The tricky part here is that within each MCU, we want the DC values
-     * of the dummy blocks to match the last real block's DC value.
-     * This squeezes a few more bytes out of the resulting file...
-     */
-    if (block_row < compptr->v_samp_factor) {
-      blocks_across += ndummy;	/* include lower right corner */
-      MCUs_across = blocks_across / h_samp_factor;
-      do {
-	thisblockrow = buffer[block_row];
-	lastblockrow = buffer[block_row-1];
-	FMEMZERO((void FAR *) thisblockrow,
-		 (size_t) blocks_across * SIZEOF(JBLOCK));
-	for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) {
-	  lastDC = lastblockrow[h_samp_factor-1][0];
-	  for (bi = 0; bi < h_samp_factor; bi++) {
-	    thisblockrow[bi][0] = lastDC;
-	  }
-	  thisblockrow += h_samp_factor; /* advance to next MCU in row */
-	  lastblockrow += h_samp_factor;
-	}
-      } while (++block_row < compptr->v_samp_factor);
-    }
-  }
-  /* NB: compress_output will increment iMCU_row_num if successful.
-   * A suspension return will result in redoing all the work above next time.
-   */
-
-  /* Emit data to the entropy encoder, sharing code with subsequent passes */
-  return compress_output(cinfo, input_buf);
-}
-
-
-/*
- * Process some data in subsequent passes of a multi-pass case.
- * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
- * per call, ie, v_samp_factor block rows for each component in the scan.
- * The data is obtained from the virtual arrays and fed to the entropy coder.
- * Returns TRUE if the iMCU row is completed, FALSE if suspended.
- *
- * NB: input_buf is ignored; it is likely to be a NULL pointer.
- */
-
-METHODDEF(boolean)
-compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
-  int ci, xindex, yindex, yoffset;
-  JDIMENSION start_col;
-  JBLOCKARRAY blkp;
-  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
-  JBLOCKROW buffer_ptr;
-  jpeg_component_info *compptr;
-
-  /* Align the virtual buffers for the components used in this scan.
-   * NB: during first pass, this is safe only because the buffers will
-   * already be aligned properly, so jmemmgr.c won't need to do any I/O.
-   */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    buffer[ci] = (*cinfo->mem->access_virt_barray)
-      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
-       coef->iMCU_row_num * compptr->v_samp_factor,
-       (JDIMENSION) compptr->v_samp_factor, FALSE);
-  }
-
-  /* Loop to process one whole iMCU row */
-  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
-       yoffset++) {
-    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
-	 MCU_col_num++) {
-      /* Construct list of pointers to DCT blocks belonging to this MCU */
-      blkp = coef->MCU_buffer;	/* pointer to current DCT block within MCU */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	start_col = MCU_col_num * compptr->MCU_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  buffer_ptr = buffer[ci][yoffset + yindex] + start_col;
-	  xindex = compptr->MCU_width;
-	  do {
-	    *blkp++ = buffer_ptr++;
-	  } while (--xindex);
-	}
-      }
-      /* Try to write the MCU. */
-      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->MCU_ctr = MCU_col_num;
-	return FALSE;
-      }
-    }
-    /* Completed an MCU row, but perhaps not an iMCU row */
-    coef->MCU_ctr = 0;
-  }
-  /* Completed the iMCU row, advance counters for next one */
-  coef->iMCU_row_num++;
-  start_iMCU_row(cinfo);
-  return TRUE;
-}
-
-#endif /* FULL_COEF_BUFFER_SUPPORTED */
-
-
-/*
- * Initialize coefficient buffer controller.
- */
-
-GLOBAL(void)
-jinit_c_coef_controller (j_compress_ptr cinfo, boolean need_full_buffer)
-{
-  my_coef_ptr coef;
-
-  if (need_full_buffer) {
-#ifdef FULL_COEF_BUFFER_SUPPORTED
-    /* Allocate a full-image virtual array for each component, */
-    /* padded to a multiple of samp_factor DCT blocks in each direction. */
-    int ci;
-    jpeg_component_info *compptr;
-
-    coef = (my_coef_ptr) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       SIZEOF(my_coef_controller) - SIZEOF(coef->blk_buffer));
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
-	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
-				(long) compptr->h_samp_factor),
-	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
-				(long) compptr->v_samp_factor),
-	 (JDIMENSION) compptr->v_samp_factor);
-    }
-#else
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-#endif
-  } else {
-    /* We only need a single-MCU buffer. */
-    JBLOCKARRAY blkp;
-    JBLOCKROW buffer_ptr;
-    int bi;
-
-    coef = (my_coef_ptr) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_coef_controller));
-    blkp = coef->MCU_buffer;
-    buffer_ptr = coef->blk_buffer;
-    bi = C_MAX_BLOCKS_IN_MCU;
-    do {
-      *blkp++ = buffer_ptr++;
-    } while (--bi);
-    coef->whole_image[0] = NULL; /* flag for no virtual arrays */
-  }
-
-  coef->pub.start_pass = start_pass_coef;
-  cinfo->coef = &coef->pub;
-}
diff --git a/cmake/externals/libjpeg/jccolor.c b/cmake/externals/libjpeg/jccolor.c
deleted file mode 100644
index c028dd9db..000000000
--- a/cmake/externals/libjpeg/jccolor.c
+++ /dev/null
@@ -1,598 +0,0 @@
-/*
- * jccolor.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2011-2023 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains input colorspace conversion routines.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_color_converter pub; /* public fields */
-
-  /* Private state for RGB->YCC conversion */
-  INT32 * rgb_ycc_tab;		/* => table for RGB to YCbCr conversion */
-} my_color_converter;
-
-typedef my_color_converter * my_cconvert_ptr;
-
-
-/**************** RGB -> YCbCr conversion: most common case **************/
-
-/*
- * YCbCr is defined per Recommendation ITU-R BT.601-7 (03/2011),
- * previously known as Recommendation CCIR 601-1, except that Cb and Cr
- * are normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
- * sRGB (standard RGB color space) is defined per IEC 61966-2-1:1999.
- * sYCC (standard luma-chroma-chroma color space with extended gamut)
- * is defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex F.
- * bg-sRGB and bg-sYCC (big gamut standard color spaces)
- * are defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex G.
- * Note that the derived conversion coefficients given in some of these
- * documents are imprecise.  The general conversion equations are
- *	Y  = Kr * R + (1 - Kr - Kb) * G + Kb * B
- *	Cb = (B - Y) / (1 - Kb) / K
- *	Cr = (R - Y) / (1 - Kr) / K
- * With Kr = 0.299 and Kb = 0.114 (derived according to SMPTE RP 177-1993
- * from the 1953 FCC NTSC primaries and CIE Illuminant C), K = 2 for sYCC,
- * the conversion equations to be implemented are therefore
- *	Y  =  0.299 * R + 0.587 * G + 0.114 * B
- *	Cb = -0.168735892 * R - 0.331264108 * G + 0.5 * B + CENTERJSAMPLE
- *	Cr =  0.5 * R - 0.418687589 * G - 0.081312411 * B + CENTERJSAMPLE
- * Note: older versions of the IJG code used a zero offset of MAXJSAMPLE/2,
- * rather than CENTERJSAMPLE, for Cb and Cr.  This gave equal positive and
- * negative swings for Cb/Cr, but meant that grayscale values (Cb=Cr=0)
- * were not represented exactly.  Now we sacrifice exact representation of
- * maximum red and maximum blue in order to get exact grayscales.
- *
- * To avoid floating-point arithmetic, we represent the fractional constants
- * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
- * the products by 2^16, with appropriate rounding, to get the correct answer.
- *
- * For even more speed, we avoid doing any multiplications in the inner loop
- * by precalculating the constants times R,G,B for all possible values.
- * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
- * for 9-bit to 12-bit samples it is still acceptable.  It's not very
- * reasonable for 16-bit samples, but if you want lossless storage
- * you shouldn't be changing colorspace anyway.
- * The CENTERJSAMPLE offsets and the rounding fudge-factor of 0.5 are included
- * in the tables to save adding them separately in the inner loop.
- */
-
-#define SCALEBITS	16	/* speediest right-shift on some machines */
-#define CBCR_OFFSET	((INT32) CENTERJSAMPLE << SCALEBITS)
-#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
-
-/* We allocate one big table and divide it up into eight parts, instead of
- * doing eight alloc_small requests.  This lets us use a single table base
- * address, which can be held in a register in the inner loops on many
- * machines (more than can hold all eight addresses, anyway).
- */
-
-#define R_Y_OFF		0			/* offset to R => Y section */
-#define G_Y_OFF		(1*(MAXJSAMPLE+1))	/* offset to G => Y section */
-#define B_Y_OFF		(2*(MAXJSAMPLE+1))	/* etc. */
-#define R_CB_OFF	(3*(MAXJSAMPLE+1))
-#define G_CB_OFF	(4*(MAXJSAMPLE+1))
-#define B_CB_OFF	(5*(MAXJSAMPLE+1))
-#define R_CR_OFF	B_CB_OFF		/* B=>Cb, R=>Cr are the same */
-#define G_CR_OFF	(6*(MAXJSAMPLE+1))
-#define B_CR_OFF	(7*(MAXJSAMPLE+1))
-#define TABLE_SIZE	(8*(MAXJSAMPLE+1))
-
-
-/*
- * Initialize for RGB->YCC colorspace conversion.
- */
-
-METHODDEF(void)
-rgb_ycc_start (j_compress_ptr cinfo)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  INT32 * rgb_ycc_tab;
-  INT32 i;
-
-  /* Allocate and fill in the conversion tables. */
-  cconvert->rgb_ycc_tab = rgb_ycc_tab = (INT32 *)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				TABLE_SIZE * SIZEOF(INT32));
-
-  for (i = 0; i <= MAXJSAMPLE; i++) {
-    rgb_ycc_tab[i+R_Y_OFF] = FIX(0.299) * i;
-    rgb_ycc_tab[i+G_Y_OFF] = FIX(0.587) * i;
-    rgb_ycc_tab[i+B_Y_OFF] = FIX(0.114) * i + ONE_HALF;
-    rgb_ycc_tab[i+R_CB_OFF] = (- FIX(0.168735892)) * i;
-    rgb_ycc_tab[i+G_CB_OFF] = (- FIX(0.331264108)) * i;
-    /* We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr.
-     * This ensures that the maximum output will round to MAXJSAMPLE
-     * not MAXJSAMPLE+1, and thus that we don't have to range-limit.
-     */
-    rgb_ycc_tab[i+B_CB_OFF] = (i << (SCALEBITS-1)) + CBCR_OFFSET + ONE_HALF-1;
-/*  B=>Cb and R=>Cr tables are the same
-    rgb_ycc_tab[i+R_CR_OFF] = (i << (SCALEBITS-1)) + CBCR_OFFSET + ONE_HALF-1;
-*/
-    rgb_ycc_tab[i+G_CR_OFF] = (- FIX(0.418687589)) * i;
-    rgb_ycc_tab[i+B_CR_OFF] = (- FIX(0.081312411)) * i;
-  }
-}
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- *
- * Note that we change from the application's interleaved-pixel format
- * to our internal noninterleaved, one-plane-per-component format.  The
- * input buffer is therefore three times as wide as the output buffer.
- *
- * A starting row offset is provided only for the output buffer.  The
- * caller can easily adjust the passed input_buf value to accommodate
- * any row offset required on that side.
- */
-
-METHODDEF(void)
-rgb_ycc_convert (j_compress_ptr cinfo,
-		 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		 JDIMENSION output_row, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int r, g, b;
-  register INT32 * ctab = cconvert->rgb_ycc_tab;
-  register JSAMPROW inptr;
-  register JSAMPROW outptr0, outptr1, outptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr0 = output_buf[0][output_row];
-    outptr1 = output_buf[1][output_row];
-    outptr2 = output_buf[2][output_row];
-    output_row++;
-    for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr[RGB_RED]);
-      g = GETJSAMPLE(inptr[RGB_GREEN]);
-      b = GETJSAMPLE(inptr[RGB_BLUE]);
-      inptr += RGB_PIXELSIZE;
-      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
-       * must be too; we do not need an explicit range-limiting operation.
-       * Hence the value being shifted is never negative, and we don't
-       * need the general RIGHT_SHIFT macro.
-       */
-      /* Y */
-      outptr0[col] = (JSAMPLE)
-		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
-		 >> SCALEBITS);
-      /* Cb */
-      outptr1[col] = (JSAMPLE)
-		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
-		 >> SCALEBITS);
-      /* Cr */
-      outptr2[col] = (JSAMPLE)
-		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
-		 >> SCALEBITS);
-    }
-  }
-}
-
-
-/**************** Cases other than RGB -> YCbCr **************/
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- * This version handles RGB->grayscale conversion,
- * which is the same as the RGB->Y portion of RGB->YCbCr.
- * We assume rgb_ycc_start has been called (we only use the Y tables).
- */
-
-METHODDEF(void)
-rgb_gray_convert (j_compress_ptr cinfo,
-		  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		  JDIMENSION output_row, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register INT32 y;
-  register INT32 * ctab = cconvert->rgb_ycc_tab;
-  register JSAMPROW inptr;
-  register JSAMPROW outptr;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr = output_buf[0][output_row++];
-    for (col = 0; col < num_cols; col++) {
-      y  = ctab[R_Y_OFF + GETJSAMPLE(inptr[RGB_RED])];
-      y += ctab[G_Y_OFF + GETJSAMPLE(inptr[RGB_GREEN])];
-      y += ctab[B_Y_OFF + GETJSAMPLE(inptr[RGB_BLUE])];
-      inptr += RGB_PIXELSIZE;
-      outptr[col] = (JSAMPLE) (y >> SCALEBITS);
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- * This version handles Adobe-style CMYK->YCCK conversion,
- * where we convert R=1-C, G=1-M, and B=1-Y to YCbCr using the
- * same conversion as above, while passing K (black) unchanged.
- * We assume rgb_ycc_start has been called.
- */
-
-METHODDEF(void)
-cmyk_ycck_convert (j_compress_ptr cinfo,
-		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		   JDIMENSION output_row, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int r, g, b;
-  register INT32 * ctab = cconvert->rgb_ycc_tab;
-  register JSAMPROW inptr;
-  register JSAMPROW outptr0, outptr1, outptr2, outptr3;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr0 = output_buf[0][output_row];
-    outptr1 = output_buf[1][output_row];
-    outptr2 = output_buf[2][output_row];
-    outptr3 = output_buf[3][output_row];
-    output_row++;
-    for (col = 0; col < num_cols; col++) {
-      r = MAXJSAMPLE - GETJSAMPLE(inptr[0]);
-      g = MAXJSAMPLE - GETJSAMPLE(inptr[1]);
-      b = MAXJSAMPLE - GETJSAMPLE(inptr[2]);
-      /* K passes through as-is */
-      outptr3[col] = inptr[3];	/* don't need GETJSAMPLE here */
-      inptr += 4;
-      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
-       * must be too; we do not need an explicit range-limiting operation.
-       * Hence the value being shifted is never negative, and we don't
-       * need the general RIGHT_SHIFT macro.
-       */
-      /* Y */
-      outptr0[col] = (JSAMPLE)
-		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
-		 >> SCALEBITS);
-      /* Cb */
-      outptr1[col] = (JSAMPLE)
-		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
-		 >> SCALEBITS);
-      /* Cr */
-      outptr2[col] = (JSAMPLE)
-		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
-		 >> SCALEBITS);
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- * [R,G,B] to [R-G,G,B-G] conversion with modulo calculation
- * (forward reversible color transform).
- * This can be seen as an adaption of the general RGB->YCbCr
- * conversion equation with Kr = Kb = 0, while replacing the
- * normalization by modulo calculation.
- */
-
-METHODDEF(void)
-rgb_rgb1_convert (j_compress_ptr cinfo,
-		  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		  JDIMENSION output_row, int num_rows)
-{
-  register int r, g, b;
-  register JSAMPROW inptr;
-  register JSAMPROW outptr0, outptr1, outptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr0 = output_buf[0][output_row];
-    outptr1 = output_buf[1][output_row];
-    outptr2 = output_buf[2][output_row];
-    output_row++;
-    for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr[RGB_RED]);
-      g = GETJSAMPLE(inptr[RGB_GREEN]);
-      b = GETJSAMPLE(inptr[RGB_BLUE]);
-      inptr += RGB_PIXELSIZE;
-      /* Assume that MAXJSAMPLE+1 is a power of 2, so that the MOD
-       * (modulo) operator is equivalent to the bitmask operator AND.
-       */
-      outptr0[col] = (JSAMPLE) ((r - g + CENTERJSAMPLE) & MAXJSAMPLE);
-      outptr1[col] = (JSAMPLE) g;
-      outptr2[col] = (JSAMPLE) ((b - g + CENTERJSAMPLE) & MAXJSAMPLE);
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- * This version handles grayscale output with no conversion.
- * The source can be either plain grayscale or YCC (since Y == gray).
- */
-
-METHODDEF(void)
-grayscale_convert (j_compress_ptr cinfo,
-		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		   JDIMENSION output_row, int num_rows)
-{
-  register JSAMPROW inptr;
-  register JSAMPROW outptr;
-  register JDIMENSION count;
-  register int instride = cinfo->input_components;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr = output_buf[0][output_row++];
-    for (count = num_cols; count > 0; count--) {
-      *outptr++ = *inptr;	/* don't need GETJSAMPLE() here */
-      inptr += instride;
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- * No colorspace conversion, but change from interleaved
- * to separate-planes representation.
- */
-
-METHODDEF(void)
-rgb_convert (j_compress_ptr cinfo,
-	     JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-	     JDIMENSION output_row, int num_rows)
-{
-  register JSAMPROW inptr;
-  register JSAMPROW outptr0, outptr1, outptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr0 = output_buf[0][output_row];
-    outptr1 = output_buf[1][output_row];
-    outptr2 = output_buf[2][output_row];
-    output_row++;
-    for (col = 0; col < num_cols; col++) {
-      /* We can dispense with GETJSAMPLE() here */
-      outptr0[col] = inptr[RGB_RED];
-      outptr1[col] = inptr[RGB_GREEN];
-      outptr2[col] = inptr[RGB_BLUE];
-      inptr += RGB_PIXELSIZE;
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the JPEG colorspace.
- * This version handles multi-component colorspaces without conversion.
- * We assume input_components == num_components.
- */
-
-METHODDEF(void)
-null_convert (j_compress_ptr cinfo,
-	      JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-	      JDIMENSION output_row, int num_rows)
-{
-  register JSAMPROW inptr;
-  register JSAMPROW outptr;
-  register JDIMENSION count;
-  register int num_comps = cinfo->num_components;
-  JDIMENSION num_cols = cinfo->image_width;
-  int ci;
-
-  while (--num_rows >= 0) {
-    /* It seems fastest to make a separate pass for each component. */
-    for (ci = 0; ci < num_comps; ci++) {
-      inptr = input_buf[0] + ci;
-      outptr = output_buf[ci][output_row];
-      for (count = num_cols; count > 0; count--) {
-	*outptr++ = *inptr;	/* don't need GETJSAMPLE() here */
-	inptr += num_comps;
-      }
-    }
-    input_buf++;
-    output_row++;
-  }
-}
-
-
-/*
- * Empty method for start_pass.
- */
-
-METHODDEF(void)
-null_method (j_compress_ptr cinfo)
-{
-  /* no work needed */
-}
-
-
-/*
- * Module initialization routine for input colorspace conversion.
- */
-
-GLOBAL(void)
-jinit_color_converter (j_compress_ptr cinfo)
-{
-  my_cconvert_ptr cconvert;
-
-  cconvert = (my_cconvert_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_color_converter));
-  cinfo->cconvert = &cconvert->pub;
-  /* set start_pass to null method until we find out differently */
-  cconvert->pub.start_pass = null_method;
-
-  /* Make sure input_components agrees with in_color_space */
-  switch (cinfo->in_color_space) {
-  case JCS_GRAYSCALE:
-    if (cinfo->input_components != 1)
-      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-    break;
-
-  case JCS_RGB:
-  case JCS_BG_RGB:
-#if RGB_PIXELSIZE != 3
-    if (cinfo->input_components != RGB_PIXELSIZE)
-      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-    break;
-#endif /* else share code with YCbCr */
-
-  case JCS_YCbCr:
-  case JCS_BG_YCC:
-    if (cinfo->input_components != 3)
-      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-    break;
-
-  case JCS_CMYK:
-  case JCS_YCCK:
-    if (cinfo->input_components != 4)
-      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-    break;
-
-  default:			/* JCS_UNKNOWN can be anything */
-    if (cinfo->input_components < 1)
-      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-  }
-
-  /* Support color transform only for RGB colorspaces */
-  if (cinfo->color_transform &&
-      cinfo->jpeg_color_space != JCS_RGB &&
-      cinfo->jpeg_color_space != JCS_BG_RGB)
-    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-
-  /* Check num_components, set conversion method based on requested space */
-  switch (cinfo->jpeg_color_space) {
-  case JCS_GRAYSCALE:
-    if (cinfo->num_components != 1)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    switch (cinfo->in_color_space) {
-    case JCS_GRAYSCALE:
-    case JCS_YCbCr:
-    case JCS_BG_YCC:
-      cconvert->pub.color_convert = grayscale_convert;
-      break;
-    case JCS_RGB:
-      cconvert->pub.start_pass = rgb_ycc_start;
-      cconvert->pub.color_convert = rgb_gray_convert;
-      break;
-    default:
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    }
-    break;
-
-  case JCS_RGB:
-  case JCS_BG_RGB:
-    if (cinfo->num_components != 3)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space != cinfo->jpeg_color_space)
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    switch (cinfo->color_transform) {
-    case JCT_NONE:
-      cconvert->pub.color_convert = rgb_convert;
-      break;
-    case JCT_SUBTRACT_GREEN:
-      cconvert->pub.color_convert = rgb_rgb1_convert;
-      break;
-    default:
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    }
-    break;
-
-  case JCS_YCbCr:
-    if (cinfo->num_components != 3)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    switch (cinfo->in_color_space) {
-    case JCS_RGB:
-      cconvert->pub.start_pass = rgb_ycc_start;
-      cconvert->pub.color_convert = rgb_ycc_convert;
-      break;
-    case JCS_YCbCr:
-      cconvert->pub.color_convert = null_convert;
-      break;
-    default:
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    }
-    break;
-
-  case JCS_BG_YCC:
-    if (cinfo->num_components != 3)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    switch (cinfo->in_color_space) {
-    case JCS_RGB:
-      /* For conversion from normal RGB input to BG_YCC representation,
-       * the Cb/Cr values are first computed as usual, and then
-       * quantized further after DCT processing by a factor of
-       * 2 in reference to the nominal quantization factor.
-       */
-      /* need quantization scale by factor of 2 after DCT */
-      cinfo->comp_info[1].component_needed = TRUE;
-      cinfo->comp_info[2].component_needed = TRUE;
-      /* compute normal YCC first */
-      cconvert->pub.start_pass = rgb_ycc_start;
-      cconvert->pub.color_convert = rgb_ycc_convert;
-      break;
-    case JCS_YCbCr:
-      /* need quantization scale by factor of 2 after DCT */
-      cinfo->comp_info[1].component_needed = TRUE;
-      cinfo->comp_info[2].component_needed = TRUE;
-      /*FALLTHROUGH*/
-    case JCS_BG_YCC:
-      /* Pass through for BG_YCC input */
-      cconvert->pub.color_convert = null_convert;
-      break;
-    default:
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    }
-    break;
-
-  case JCS_CMYK:
-    if (cinfo->num_components != 4)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space != JCS_CMYK)
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    cconvert->pub.color_convert = null_convert;
-    break;
-
-  case JCS_YCCK:
-    if (cinfo->num_components != 4)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    switch (cinfo->in_color_space) {
-    case JCS_CMYK:
-      cconvert->pub.start_pass = rgb_ycc_start;
-      cconvert->pub.color_convert = cmyk_ycck_convert;
-      break;
-    case JCS_YCCK:
-      cconvert->pub.color_convert = null_convert;
-      break;
-    default:
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    }
-    break;
-
-  default:			/* allow null conversion of JCS_UNKNOWN */
-    if (cinfo->jpeg_color_space != cinfo->in_color_space ||
-	cinfo->num_components != cinfo->input_components)
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    cconvert->pub.color_convert = null_convert;
-  }
-}
diff --git a/cmake/externals/libjpeg/jcdctmgr.c b/cmake/externals/libjpeg/jcdctmgr.c
deleted file mode 100644
index a48ccd814..000000000
--- a/cmake/externals/libjpeg/jcdctmgr.c
+++ /dev/null
@@ -1,466 +0,0 @@
-/*
- * jcdctmgr.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2003-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the forward-DCT management logic.
- * This code selects a particular DCT implementation to be used,
- * and it performs related housekeeping chores including coefficient
- * quantization.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-
-/* Private subobject for this module */
-
-typedef struct {
-  struct jpeg_forward_dct pub;	/* public fields */
-
-  /* Pointer to the DCT routine actually in use */
-  forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
-
-#ifdef DCT_FLOAT_SUPPORTED
-  /* Same as above for the floating-point case. */
-  float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
-#endif
-} my_fdct_controller;
-
-typedef my_fdct_controller * my_fdct_ptr;
-
-
-/* The allocated post-DCT divisor tables -- big enough for any
- * supported variant and not identical to the quant table entries,
- * because of scaling (especially for an unnormalized DCT) --
- * are pointed to by dct_table in the per-component comp_info
- * structures.  Each table is given in normal array order.
- */
-
-typedef union {
-  DCTELEM int_array[DCTSIZE2];
-#ifdef DCT_FLOAT_SUPPORTED
-  FAST_FLOAT float_array[DCTSIZE2];
-#endif
-} divisor_table;
-
-
-/* The current scaled-DCT routines require ISLOW-style divisor tables,
- * so be sure to compile that code if either ISLOW or SCALING is requested.
- */
-#ifdef DCT_ISLOW_SUPPORTED
-#define PROVIDE_ISLOW_TABLES
-#else
-#ifdef DCT_SCALING_SUPPORTED
-#define PROVIDE_ISLOW_TABLES
-#endif
-#endif
-
-
-/*
- * Perform forward DCT on one or more blocks of a component.
- *
- * The input samples are taken from the sample_data[] array starting at
- * position start_col, and moving to the right for any additional blocks.
- * The quantized coefficients are returned in coef_blocks[].
- */
-
-METHODDEF(void)
-forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
-	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
-	     JDIMENSION start_col, JDIMENSION num_blocks)
-/* This version is used for integer DCT implementations. */
-{
-  /* This routine is heavily used, so it's worth coding it tightly. */
-  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
-  forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
-  DCTELEM * divisors = (DCTELEM *) compptr->dct_table;
-  DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
-  JDIMENSION bi;
-
-  for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
-    /* Perform the DCT */
-    (*do_dct) (workspace, sample_data, start_col);
-
-    /* Quantize/descale the coefficients, and store into coef_blocks[] */
-    { register DCTELEM temp, qval;
-      register int i;
-      register JCOEFPTR output_ptr = coef_blocks[bi];
-
-      for (i = 0; i < DCTSIZE2; i++) {
-	qval = divisors[i];
-	temp = workspace[i];
-	/* Divide the coefficient value by qval, ensuring proper rounding.
-	 * Since C does not specify the direction of rounding for negative
-	 * quotients, we have to force the dividend positive for portability.
-	 *
-	 * In most files, at least half of the output values will be zero
-	 * (at default quantization settings, more like three-quarters...)
-	 * so we should ensure that this case is fast.  On many machines,
-	 * a comparison is enough cheaper than a divide to make a special test
-	 * a win.  Since both inputs will be nonnegative, we need only test
-	 * for a < b to discover whether a/b is 0.
-	 * If your machine's division is fast enough, define FAST_DIVIDE.
-	 */
-#ifdef FAST_DIVIDE
-#define DIVIDE_BY(a,b)	a /= b
-#else
-#define DIVIDE_BY(a,b)	if (a >= b) a /= b; else a = 0
-#endif
-	if (temp < 0) {
-	  temp = -temp;
-	  temp += qval>>1;	/* for rounding */
-	  DIVIDE_BY(temp, qval);
-	  temp = -temp;
-	} else {
-	  temp += qval>>1;	/* for rounding */
-	  DIVIDE_BY(temp, qval);
-	}
-	output_ptr[i] = (JCOEF) temp;
-      }
-    }
-  }
-}
-
-
-#ifdef DCT_FLOAT_SUPPORTED
-
-METHODDEF(void)
-forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
-		   JDIMENSION start_col, JDIMENSION num_blocks)
-/* This version is used for floating-point DCT implementations. */
-{
-  /* This routine is heavily used, so it's worth coding it tightly. */
-  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
-  float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
-  FAST_FLOAT * divisors = (FAST_FLOAT *) compptr->dct_table;
-  FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
-  JDIMENSION bi;
-
-  for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
-    /* Perform the DCT */
-    (*do_dct) (workspace, sample_data, start_col);
-
-    /* Quantize/descale the coefficients, and store into coef_blocks[] */
-    { register FAST_FLOAT temp;
-      register int i;
-      register JCOEFPTR output_ptr = coef_blocks[bi];
-
-      for (i = 0; i < DCTSIZE2; i++) {
-	/* Apply the quantization and scaling factor */
-	temp = workspace[i] * divisors[i];
-	/* Round to nearest integer.
-	 * Since C does not specify the direction of rounding for negative
-	 * quotients, we have to force the dividend positive for portability.
-	 * The maximum coefficient size is +-16K (for 12-bit data), so this
-	 * code should work for either 16-bit or 32-bit ints.
-	 */
-	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
-      }
-    }
-  }
-}
-
-#endif /* DCT_FLOAT_SUPPORTED */
-
-
-/*
- * Initialize for a processing pass.
- * Verify that all referenced Q-tables are present, and set up
- * the divisor table for each one.
- * In the current implementation, DCT of all components is done during
- * the first pass, even if only some components will be output in the
- * first scan.  Hence all components should be examined here.
- */
-
-METHODDEF(void)
-start_pass_fdctmgr (j_compress_ptr cinfo)
-{
-  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
-  int ci, qtblno, i;
-  jpeg_component_info *compptr;
-  int method = 0;
-  JQUANT_TBL * qtbl;
-  DCTELEM * dtbl;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Select the proper DCT routine for this component's scaling */
-    switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
-#ifdef DCT_SCALING_SUPPORTED
-    case ((1 << 8) + 1):
-      fdct->do_dct[ci] = jpeg_fdct_1x1;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((2 << 8) + 2):
-      fdct->do_dct[ci] = jpeg_fdct_2x2;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((3 << 8) + 3):
-      fdct->do_dct[ci] = jpeg_fdct_3x3;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((4 << 8) + 4):
-      fdct->do_dct[ci] = jpeg_fdct_4x4;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((5 << 8) + 5):
-      fdct->do_dct[ci] = jpeg_fdct_5x5;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((6 << 8) + 6):
-      fdct->do_dct[ci] = jpeg_fdct_6x6;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((7 << 8) + 7):
-      fdct->do_dct[ci] = jpeg_fdct_7x7;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((9 << 8) + 9):
-      fdct->do_dct[ci] = jpeg_fdct_9x9;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((10 << 8) + 10):
-      fdct->do_dct[ci] = jpeg_fdct_10x10;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((11 << 8) + 11):
-      fdct->do_dct[ci] = jpeg_fdct_11x11;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((12 << 8) + 12):
-      fdct->do_dct[ci] = jpeg_fdct_12x12;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((13 << 8) + 13):
-      fdct->do_dct[ci] = jpeg_fdct_13x13;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((14 << 8) + 14):
-      fdct->do_dct[ci] = jpeg_fdct_14x14;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((15 << 8) + 15):
-      fdct->do_dct[ci] = jpeg_fdct_15x15;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((16 << 8) + 16):
-      fdct->do_dct[ci] = jpeg_fdct_16x16;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((16 << 8) + 8):
-      fdct->do_dct[ci] = jpeg_fdct_16x8;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((14 << 8) + 7):
-      fdct->do_dct[ci] = jpeg_fdct_14x7;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((12 << 8) + 6):
-      fdct->do_dct[ci] = jpeg_fdct_12x6;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((10 << 8) + 5):
-      fdct->do_dct[ci] = jpeg_fdct_10x5;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((8 << 8) + 4):
-      fdct->do_dct[ci] = jpeg_fdct_8x4;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((6 << 8) + 3):
-      fdct->do_dct[ci] = jpeg_fdct_6x3;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((4 << 8) + 2):
-      fdct->do_dct[ci] = jpeg_fdct_4x2;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((2 << 8) + 1):
-      fdct->do_dct[ci] = jpeg_fdct_2x1;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((8 << 8) + 16):
-      fdct->do_dct[ci] = jpeg_fdct_8x16;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((7 << 8) + 14):
-      fdct->do_dct[ci] = jpeg_fdct_7x14;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((6 << 8) + 12):
-      fdct->do_dct[ci] = jpeg_fdct_6x12;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((5 << 8) + 10):
-      fdct->do_dct[ci] = jpeg_fdct_5x10;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((4 << 8) + 8):
-      fdct->do_dct[ci] = jpeg_fdct_4x8;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((3 << 8) + 6):
-      fdct->do_dct[ci] = jpeg_fdct_3x6;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((2 << 8) + 4):
-      fdct->do_dct[ci] = jpeg_fdct_2x4;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-    case ((1 << 8) + 2):
-      fdct->do_dct[ci] = jpeg_fdct_1x2;
-      method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
-      break;
-#endif
-    case ((DCTSIZE << 8) + DCTSIZE):
-      switch (cinfo->dct_method) {
-#ifdef DCT_ISLOW_SUPPORTED
-      case JDCT_ISLOW:
-	fdct->do_dct[ci] = jpeg_fdct_islow;
-	method = JDCT_ISLOW;
-	break;
-#endif
-#ifdef DCT_IFAST_SUPPORTED
-      case JDCT_IFAST:
-	fdct->do_dct[ci] = jpeg_fdct_ifast;
-	method = JDCT_IFAST;
-	break;
-#endif
-#ifdef DCT_FLOAT_SUPPORTED
-      case JDCT_FLOAT:
-	fdct->do_float_dct[ci] = jpeg_fdct_float;
-	method = JDCT_FLOAT;
-	break;
-#endif
-      default:
-	ERREXIT(cinfo, JERR_NOT_COMPILED);
-      }
-      break;
-    default:
-      ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
-	       compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
-    }
-    qtblno = compptr->quant_tbl_no;
-    /* Make sure specified quantization table is present */
-    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
-	cinfo->quant_tbl_ptrs[qtblno] == NULL)
-      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
-    qtbl = cinfo->quant_tbl_ptrs[qtblno];
-    /* Create divisor table from quant table */
-    switch (method) {
-#ifdef PROVIDE_ISLOW_TABLES
-    case JDCT_ISLOW:
-      /* For LL&M IDCT method, divisors are equal to raw quantization
-       * coefficients multiplied by 8 (to counteract scaling).
-       */
-      dtbl = (DCTELEM *) compptr->dct_table;
-      for (i = 0; i < DCTSIZE2; i++) {
-	dtbl[i] =
-	  ((DCTELEM) qtbl->quantval[i]) << (compptr->component_needed ? 4 : 3);
-      }
-      fdct->pub.forward_DCT[ci] = forward_DCT;
-      break;
-#endif
-#ifdef DCT_IFAST_SUPPORTED
-    case JDCT_IFAST:
-      {
-	/* For AA&N IDCT method, divisors are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * We apply a further scale factor of 8.
-	 */
-#define CONST_BITS 14
-	static const INT16 aanscales[DCTSIZE2] = {
-	  /* precomputed values scaled up by 14 bits */
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
-	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
-	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
-	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
-	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
-	};
-	SHIFT_TEMPS
-
-	dtbl = (DCTELEM *) compptr->dct_table;
-	for (i = 0; i < DCTSIZE2; i++) {
-	  dtbl[i] = (DCTELEM)
-	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
-				  (INT32) aanscales[i]),
-		    compptr->component_needed ? CONST_BITS-4 : CONST_BITS-3);
-	}
-      }
-      fdct->pub.forward_DCT[ci] = forward_DCT;
-      break;
-#endif
-#ifdef DCT_FLOAT_SUPPORTED
-    case JDCT_FLOAT:
-      {
-	/* For float AA&N IDCT method, divisors are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * We apply a further scale factor of 8.
-	 * What's actually stored is 1/divisor so that the inner loop can
-	 * use a multiplication rather than a division.
-	 */
-	FAST_FLOAT * fdtbl = (FAST_FLOAT *) compptr->dct_table;
-	int row, col;
-	static const double aanscalefactor[DCTSIZE] = {
-	  1.0, 1.387039845, 1.306562965, 1.175875602,
-	  1.0, 0.785694958, 0.541196100, 0.275899379
-	};
-
-	i = 0;
-	for (row = 0; row < DCTSIZE; row++) {
-	  for (col = 0; col < DCTSIZE; col++) {
-	    fdtbl[i] = (FAST_FLOAT)
-	      (1.0 / ((double) qtbl->quantval[i] *
-		      aanscalefactor[row] * aanscalefactor[col] *
-		      (compptr->component_needed ? 16.0 : 8.0)));
-	    i++;
-	  }
-	}
-      }
-      fdct->pub.forward_DCT[ci] = forward_DCT_float;
-      break;
-#endif
-    default:
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-    }
-  }
-}
-
-
-/*
- * Initialize FDCT manager.
- */
-
-GLOBAL(void)
-jinit_forward_dct (j_compress_ptr cinfo)
-{
-  my_fdct_ptr fdct;
-  int ci;
-  jpeg_component_info *compptr;
-
-  fdct = (my_fdct_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_fdct_controller));
-  cinfo->fdct = &fdct->pub;
-  fdct->pub.start_pass = start_pass_fdctmgr;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Allocate a divisor table for each component */
-    compptr->dct_table = (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(divisor_table));
-  }
-}
diff --git a/cmake/externals/libjpeg/jchuff.c b/cmake/externals/libjpeg/jchuff.c
deleted file mode 100644
index 1f527b218..000000000
--- a/cmake/externals/libjpeg/jchuff.c
+++ /dev/null
@@ -1,1656 +0,0 @@
-/*
- * jchuff.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2006-2023 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains Huffman entropy encoding routines.
- * Both sequential and progressive modes are supported in this single module.
- *
- * Much of the complexity here has to do with supporting output suspension.
- * If the data destination module demands suspension, we want to be able to
- * back up to the start of the current MCU.  To do this, we copy state
- * variables into local working storage, and update them back to the
- * permanent JPEG objects only upon successful completion of an MCU.
- *
- * We do not support output suspension for the progressive JPEG mode, since
- * the library currently does not allow multiple-scan files to be written
- * with output suspension.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* The legal range of a DCT coefficient is
- *  -1024 .. +1023  for 8-bit sample data precision;
- * -16384 .. +16383 for 12-bit sample data precision.
- * Hence the magnitude should always fit in sample data precision + 2 bits.
- */
-
-/* Derived data constructed for each Huffman table */
-
-typedef struct {
-  unsigned int ehufco[256];	/* code for each symbol */
-  char ehufsi[256];		/* length of code for each symbol */
-  /* If no code has been allocated for a symbol S, ehufsi[S] contains 0 */
-} c_derived_tbl;
-
-
-/* Expanded entropy encoder object for Huffman encoding.
- *
- * The savable_state subrecord contains fields that change within an MCU,
- * but must not be updated permanently until we complete the MCU.
- */
-
-typedef struct {
-  INT32 put_buffer;		/* current bit-accumulation buffer */
-  int put_bits;			/* # of bits now in it */
-  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
-} savable_state;
-
-/* This macro is to work around compilers with missing or broken
- * structure assignment.  You'll need to fix this code if you have
- * such a compiler and you change MAX_COMPS_IN_SCAN.
- */
-
-#ifndef NO_STRUCT_ASSIGN
-#define ASSIGN_STATE(dest,src)  ((dest) = (src))
-#else
-#if MAX_COMPS_IN_SCAN == 4
-#define ASSIGN_STATE(dest,src)  \
-	((dest).put_buffer = (src).put_buffer, \
-	 (dest).put_bits = (src).put_bits, \
-	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
-	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
-	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
-	 (dest).last_dc_val[3] = (src).last_dc_val[3])
-#endif
-#endif
-
-
-typedef struct {
-  struct jpeg_entropy_encoder pub; /* public fields */
-
-  savable_state saved;		/* Bit buffer & DC state at start of MCU */
-
-  /* These fields are NOT loaded into local working state. */
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
-  int next_restart_num;		/* next restart number to write (0-7) */
-
-  /* Pointers to derived tables (these workspaces have image lifespan) */
-  c_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
-  c_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
-
-  /* Statistics tables for optimization */
-  long * dc_count_ptrs[NUM_HUFF_TBLS];
-  long * ac_count_ptrs[NUM_HUFF_TBLS];
-
-  /* Following fields used only in progressive mode */
-
-  /* Mode flag: TRUE for optimization, FALSE for actual data output */
-  boolean gather_statistics;
-
-  /* next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
-   */
-  JOCTET * next_output_byte;	/* => next byte to write in buffer */
-  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
-  j_compress_ptr cinfo;		/* link to cinfo (needed for dump_buffer) */
-
-  /* Coding status for AC components */
-  int ac_tbl_no;		/* the table number of the single component */
-  unsigned int EOBRUN;		/* run length of EOBs */
-  unsigned int BE;		/* # of buffered correction bits before MCU */
-  char * bit_buffer;		/* buffer for correction bits (1 per char) */
-  /* packing correction bits tightly would save some space but cost time... */
-} huff_entropy_encoder;
-
-typedef huff_entropy_encoder * huff_entropy_ptr;
-
-/* Working state while writing an MCU (sequential mode).
- * This struct contains all the fields that are needed by subroutines.
- */
-
-typedef struct {
-  JOCTET * next_output_byte;	/* => next byte to write in buffer */
-  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
-  savable_state cur;		/* Current bit buffer & DC state */
-  j_compress_ptr cinfo;		/* dump_buffer needs access to this */
-} working_state;
-
-/* MAX_CORR_BITS is the number of bits the AC refinement correction-bit
- * buffer can hold.  Larger sizes may slightly improve compression, but
- * 1000 is already well into the realm of overkill.
- * The minimum safe size is 64 bits.
- */
-
-#define MAX_CORR_BITS  1000	/* Max # of correction bits I can buffer */
-
-/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
- * We assume that int right shift is unsigned if INT32 right shift is,
- * which should be safe.
- */
-
-#ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define ISHIFT_TEMPS	int ishift_temp;
-#define IRIGHT_SHIFT(x,shft)  \
-	((ishift_temp = (x)) < 0 ? \
-	 (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
-	 (ishift_temp >> (shft)))
-#else
-#define ISHIFT_TEMPS
-#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
-#endif
-
-
-/*
- * Compute the derived values for a Huffman table.
- * This routine also performs some validation checks on the table.
- */
-
-LOCAL(void)
-jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
-			 c_derived_tbl ** pdtbl)
-{
-  JHUFF_TBL *htbl;
-  c_derived_tbl *dtbl;
-  int p, i, l, lastp, si, maxsymbol;
-  char huffsize[257];
-  unsigned int huffcode[257];
-  unsigned int code;
-
-  /* Note that huffsize[] and huffcode[] are filled in code-length order,
-   * paralleling the order of the symbols themselves in htbl->huffval[].
-   */
-
-  /* Find the input Huffman table */
-  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
-    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
-  htbl =
-    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
-  if (htbl == NULL)
-    htbl = jpeg_std_huff_table((j_common_ptr) cinfo, isDC, tblno);
-
-  /* Allocate a workspace if we haven't already done so. */
-  if (*pdtbl == NULL)
-    *pdtbl = (c_derived_tbl *) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(c_derived_tbl));
-  dtbl = *pdtbl;
-  
-  /* Figure C.1: make table of Huffman code length for each symbol */
-
-  p = 0;
-  for (l = 1; l <= 16; l++) {
-    i = (int) htbl->bits[l];
-    if (i < 0 || p + i > 256)	/* protect against table overrun */
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    while (i--)
-      huffsize[p++] = (char) l;
-  }
-  huffsize[p] = 0;
-  lastp = p;
-  
-  /* Figure C.2: generate the codes themselves */
-  /* We also validate that the counts represent a legal Huffman code tree. */
-
-  code = 0;
-  si = huffsize[0];
-  p = 0;
-  while (huffsize[p]) {
-    while (((int) huffsize[p]) == si) {
-      huffcode[p++] = code;
-      code++;
-    }
-    /* code is now 1 more than the last code used for codelength si; but
-     * it must still fit in si bits, since no code is allowed to be all ones.
-     */
-    if (((INT32) code) >= (((INT32) 1) << si))
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    code <<= 1;
-    si++;
-  }
-  
-  /* Figure C.3: generate encoding tables */
-  /* These are code and size indexed by symbol value */
-
-  /* Set all codeless symbols to have code length 0;
-   * this lets us detect duplicate VAL entries here, and later
-   * allows emit_bits to detect any attempt to emit such symbols.
-   */
-  MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi));
-
-  /* This is also a convenient place to check for out-of-range
-   * and duplicated VAL entries.  We allow 0..255 for AC symbols
-   * but only 0..15 for DC.  (We could constrain them further
-   * based on data depth and mode, but this seems enough.)
-   */
-  maxsymbol = isDC ? 15 : 255;
-
-  for (p = 0; p < lastp; p++) {
-    i = htbl->huffval[p];
-    if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    dtbl->ehufco[i] = huffcode[p];
-    dtbl->ehufsi[i] = huffsize[p];
-  }
-}
-
-
-/* Outputting bytes to the file.
- * NB: these must be called only when actually outputting,
- * that is, entropy->gather_statistics == FALSE.
- */
-
-/* Emit a byte, taking 'action' if must suspend. */
-#define emit_byte_s(state,val,action)  \
-	{ *(state)->next_output_byte++ = (JOCTET) (val);  \
-	  if (--(state)->free_in_buffer == 0)  \
-	    if (! dump_buffer_s(state))  \
-	      { action; } }
-
-/* Emit a byte */
-#define emit_byte_e(entropy,val)  \
-	{ *(entropy)->next_output_byte++ = (JOCTET) (val);  \
-	  if (--(entropy)->free_in_buffer == 0)  \
-	    dump_buffer_e(entropy); }
-
-
-LOCAL(boolean)
-dump_buffer_s (working_state * state)
-/* Empty the output buffer; return TRUE if successful, FALSE if must suspend */
-{
-  struct jpeg_destination_mgr * dest = state->cinfo->dest;
-
-  if (! (*dest->empty_output_buffer) (state->cinfo))
-    return FALSE;
-  /* After a successful buffer dump, must reset buffer pointers */
-  state->next_output_byte = dest->next_output_byte;
-  state->free_in_buffer = dest->free_in_buffer;
-  return TRUE;
-}
-
-
-LOCAL(void)
-dump_buffer_e (huff_entropy_ptr entropy)
-/* Empty the output buffer; we do not support suspension in this case. */
-{
-  struct jpeg_destination_mgr * dest = entropy->cinfo->dest;
-
-  if (! (*dest->empty_output_buffer) (entropy->cinfo))
-    ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);
-  /* After a successful buffer dump, must reset buffer pointers */
-  entropy->next_output_byte = dest->next_output_byte;
-  entropy->free_in_buffer = dest->free_in_buffer;
-}
-
-
-/* Outputting bits to the file */
-
-/* Only the right 24 bits of put_buffer are used; the valid bits are
- * left-justified in this part.  At most 16 bits can be passed to emit_bits
- * in one call, and we never retain more than 7 bits in put_buffer
- * between calls, so 24 bits are sufficient.
- */
-
-INLINE
-LOCAL(boolean)
-emit_bits_s (working_state * state, unsigned int code, int size)
-/* Emit some bits; return TRUE if successful, FALSE if must suspend */
-{
-  /* This routine is heavily used, so it's worth coding tightly. */
-  register INT32 put_buffer;
-  register int put_bits;
-
-  /* if size is 0, caller used an invalid Huffman table entry */
-  if (size == 0)
-    ERREXIT(state->cinfo, JERR_HUFF_MISSING_CODE);
-
-  /* mask off any extra bits in code */
-  put_buffer = ((INT32) code) & ((((INT32) 1) << size) - 1);
-
-  /* new number of bits in buffer */
-  put_bits = size + state->cur.put_bits;
-
-  put_buffer <<= 24 - put_bits; /* align incoming bits */
-
-  /* and merge with old buffer contents */
-  put_buffer |= state->cur.put_buffer;
-
-  while (put_bits >= 8) {
-    int c = (int) ((put_buffer >> 16) & 0xFF);
-
-    emit_byte_s(state, c, return FALSE);
-    if (c == 0xFF) {		/* need to stuff a zero byte? */
-      emit_byte_s(state, 0, return FALSE);
-    }
-    put_buffer <<= 8;
-    put_bits -= 8;
-  }
-
-  state->cur.put_buffer = put_buffer; /* update state variables */
-  state->cur.put_bits = put_bits;
-
-  return TRUE;
-}
-
-
-INLINE
-LOCAL(void)
-emit_bits_e (huff_entropy_ptr entropy, unsigned int code, int size)
-/* Emit some bits, unless we are in gather mode */
-{
-  /* This routine is heavily used, so it's worth coding tightly. */
-  register INT32 put_buffer;
-  register int put_bits;
-
-  /* if size is 0, caller used an invalid Huffman table entry */
-  if (size == 0)
-    ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
-
-  if (entropy->gather_statistics)
-    return;			/* do nothing if we're only getting stats */
-
-  /* mask off any extra bits in code */
-  put_buffer = ((INT32) code) & ((((INT32) 1) << size) - 1);
-
-  /* new number of bits in buffer */
-  put_bits = size + entropy->saved.put_bits;
-
-  put_buffer <<= 24 - put_bits; /* align incoming bits */
-
-  /* and merge with old buffer contents */
-  put_buffer |= entropy->saved.put_buffer;
-
-  while (put_bits >= 8) {
-    int c = (int) ((put_buffer >> 16) & 0xFF);
-
-    emit_byte_e(entropy, c);
-    if (c == 0xFF) {		/* need to stuff a zero byte? */
-      emit_byte_e(entropy, 0);
-    }
-    put_buffer <<= 8;
-    put_bits -= 8;
-  }
-
-  entropy->saved.put_buffer = put_buffer; /* update variables */
-  entropy->saved.put_bits = put_bits;
-}
-
-
-LOCAL(boolean)
-flush_bits_s (working_state * state)
-{
-  if (! emit_bits_s(state, 0x7F, 7)) /* fill any partial byte with ones */
-    return FALSE;
-  state->cur.put_buffer = 0;	     /* and reset bit-buffer to empty */
-  state->cur.put_bits = 0;
-  return TRUE;
-}
-
-
-LOCAL(void)
-flush_bits_e (huff_entropy_ptr entropy)
-{
-  emit_bits_e(entropy, 0x7F, 7); /* fill any partial byte with ones */
-  entropy->saved.put_buffer = 0; /* and reset bit-buffer to empty */
-  entropy->saved.put_bits = 0;
-}
-
-
-/*
- * Emit (or just count) a Huffman symbol.
- */
-
-INLINE
-LOCAL(void)
-emit_dc_symbol (huff_entropy_ptr entropy, int tbl_no, int symbol)
-{
-  if (entropy->gather_statistics)
-    entropy->dc_count_ptrs[tbl_no][symbol]++;
-  else {
-    c_derived_tbl * tbl = entropy->dc_derived_tbls[tbl_no];
-    emit_bits_e(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
-  }
-}
-
-
-INLINE
-LOCAL(void)
-emit_ac_symbol (huff_entropy_ptr entropy, int tbl_no, int symbol)
-{
-  if (entropy->gather_statistics)
-    entropy->ac_count_ptrs[tbl_no][symbol]++;
-  else {
-    c_derived_tbl * tbl = entropy->ac_derived_tbls[tbl_no];
-    emit_bits_e(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
-  }
-}
-
-
-/*
- * Emit bits from a correction bit buffer.
- */
-
-LOCAL(void)
-emit_buffered_bits (huff_entropy_ptr entropy, char * bufstart,
-		    unsigned int nbits)
-{
-  if (entropy->gather_statistics)
-    return;			/* no real work */
-
-  while (nbits > 0) {
-    emit_bits_e(entropy, (unsigned int) (*bufstart), 1);
-    bufstart++;
-    nbits--;
-  }
-}
-
-
-/*
- * Emit any pending EOBRUN symbol.
- */
-
-LOCAL(void)
-emit_eobrun (huff_entropy_ptr entropy)
-{
-  register int temp, nbits;
-
-  if (entropy->EOBRUN > 0) {	/* if there is any pending EOBRUN */
-    temp = entropy->EOBRUN;
-    nbits = 0;
-    while ((temp >>= 1))
-      nbits++;
-    /* safety check: shouldn't happen given limited correction-bit buffer */
-    if (nbits > 14)
-      ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
-
-    emit_ac_symbol(entropy, entropy->ac_tbl_no, nbits << 4);
-    if (nbits)
-      emit_bits_e(entropy, entropy->EOBRUN, nbits);
-
-    entropy->EOBRUN = 0;
-
-    /* Emit any buffered correction bits */
-    emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);
-    entropy->BE = 0;
-  }
-}
-
-
-/*
- * Emit a restart marker & resynchronize predictions.
- */
-
-LOCAL(boolean)
-emit_restart_s (working_state * state, int restart_num)
-{
-  int ci;
-
-  if (! flush_bits_s(state))
-    return FALSE;
-
-  emit_byte_s(state, 0xFF, return FALSE);
-  emit_byte_s(state, JPEG_RST0 + restart_num, return FALSE);
-
-  /* Re-initialize DC predictions to 0 */
-  for (ci = 0; ci < state->cinfo->comps_in_scan; ci++)
-    state->cur.last_dc_val[ci] = 0;
-
-  /* The restart counter is not updated until we successfully write the MCU. */
-
-  return TRUE;
-}
-
-
-LOCAL(void)
-emit_restart_e (huff_entropy_ptr entropy, int restart_num)
-{
-  int ci;
-
-  emit_eobrun(entropy);
-
-  if (! entropy->gather_statistics) {
-    flush_bits_e(entropy);
-    emit_byte_e(entropy, 0xFF);
-    emit_byte_e(entropy, JPEG_RST0 + restart_num);
-  }
-
-  if (entropy->cinfo->Ss == 0) {
-    /* Re-initialize DC predictions to 0 */
-    for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)
-      entropy->saved.last_dc_val[ci] = 0;
-  } else {
-    /* Re-initialize all AC-related fields to 0 */
-    entropy->EOBRUN = 0;
-    entropy->BE = 0;
-  }
-}
-
-
-/*
- * MCU encoding for DC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  register int temp, temp2;
-  register int nbits;
-  int max_coef_bits;
-  int blkn, ci, tbl;
-  ISHIFT_TEMPS
-
-  entropy->next_output_byte = cinfo->dest->next_output_byte;
-  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval)
-    if (entropy->restarts_to_go == 0)
-      emit_restart_e(entropy, entropy->next_restart_num);
-
-  /* Since we're encoding a difference, the range limit is twice as much. */
-  max_coef_bits = cinfo->data_precision + 3;
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    ci = cinfo->MCU_membership[blkn];
-    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
-
-    /* Compute the DC value after the required point transform by Al.
-     * This is simply an arithmetic right shift.
-     */
-    temp = IRIGHT_SHIFT((int) (MCU_data[blkn][0][0]), cinfo->Al);
-
-    /* DC differences are figured on the point-transformed values. */
-    if ((temp2 = temp - entropy->saved.last_dc_val[ci]) == 0) {
-      /* Count/emit the Huffman-coded symbol for the number of bits */
-      emit_dc_symbol(entropy, tbl, 0);
-
-      continue;
-    }
-
-    entropy->saved.last_dc_val[ci] = temp;
-
-    /* Encode the DC coefficient difference per section G.1.2.1 */
-    if ((temp = temp2) < 0) {
-      temp = -temp;		/* temp is abs value of input */
-      /* For a negative input, want temp2 = bitwise complement of abs(input) */
-      /* This code assumes we are on a two's complement machine */
-      temp2--;
-    }
-
-    /* Find the number of bits needed for the magnitude of the coefficient */
-    nbits = 0;
-    do nbits++;			/* there must be at least one 1 bit */
-    while ((temp >>= 1));
-    /* Check for out-of-range coefficient values */
-    if (nbits > max_coef_bits)
-      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-
-    /* Count/emit the Huffman-coded symbol for the number of bits */
-    emit_dc_symbol(entropy, tbl, nbits);
-
-    /* Emit that number of bits of the value, if positive, */
-    /* or the complement of its magnitude, if negative. */
-    emit_bits_e(entropy, (unsigned int) temp2, nbits);
-  }
-
-  cinfo->dest->next_output_byte = entropy->next_output_byte;
-  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
-
-  /* Update restart-interval state too */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for AC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  const int * natural_order;
-  JBLOCKROW block;
-  register int temp, temp2;
-  register int nbits;
-  register int r, k;
-  int Se, Al, max_coef_bits;
-
-  entropy->next_output_byte = cinfo->dest->next_output_byte;
-  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval)
-    if (entropy->restarts_to_go == 0)
-      emit_restart_e(entropy, entropy->next_restart_num);
-
-  Se = cinfo->Se;
-  Al = cinfo->Al;
-  natural_order = cinfo->natural_order;
-  max_coef_bits = cinfo->data_precision + 2;
-
-  /* Encode the MCU data block */
-  block = MCU_data[0];
-
-  /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
-  
-  r = 0;			/* r = run length of zeros */
-   
-  for (k = cinfo->Ss; k <= Se; k++) {
-    if ((temp = (*block)[natural_order[k]]) == 0) {
-      r++;
-      continue;
-    }
-    /* We must apply the point transform by Al.  For AC coefficients this
-     * is an integer division with rounding towards 0.  To do this portably
-     * in C, we shift after obtaining the absolute value; so the code is
-     * interwoven with finding the abs value (temp) and output bits (temp2).
-     */
-    if (temp < 0) {
-      temp = -temp;		/* temp is abs value of input */
-      /* Apply the point transform, and watch out for case */
-      /* that nonzero coef is zero after point transform. */
-      if ((temp >>= Al) == 0) {
-	r++;
-	continue;
-      }
-      /* For a negative coef, want temp2 = bitwise complement of abs(coef) */
-      temp2 = ~temp;
-    } else {
-      /* Apply the point transform, and watch out for case */
-      /* that nonzero coef is zero after point transform. */
-      if ((temp >>= Al) == 0) {
-	r++;
-	continue;
-      }
-      temp2 = temp;
-    }
-
-    /* Emit any pending EOBRUN */
-    if (entropy->EOBRUN > 0)
-      emit_eobrun(entropy);
-    /* if run length > 15, must emit special run-length-16 codes (0xF0) */
-    while (r > 15) {
-      emit_ac_symbol(entropy, entropy->ac_tbl_no, 0xF0);
-      r -= 16;
-    }
-
-    /* Find the number of bits needed for the magnitude of the coefficient */
-    nbits = 0;
-    do nbits++;			/* there must be at least one 1 bit */
-    while ((temp >>= 1));
-    /* Check for out-of-range coefficient values */
-    if (nbits > max_coef_bits)
-      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-
-    /* Count/emit Huffman symbol for run length / number of bits */
-    emit_ac_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits);
-
-    /* Emit that number of bits of the value, if positive, */
-    /* or the complement of its magnitude, if negative. */
-    emit_bits_e(entropy, (unsigned int) temp2, nbits);
-
-    r = 0;			/* reset zero run length */
-  }
-
-  if (r > 0) {			/* If there are trailing zeroes, */
-    entropy->EOBRUN++;		/* count an EOB */
-    if (entropy->EOBRUN == 0x7FFF)
-      emit_eobrun(entropy);	/* force it out to avoid overflow */
-  }
-
-  cinfo->dest->next_output_byte = entropy->next_output_byte;
-  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
-
-  /* Update restart-interval state too */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for DC successive approximation refinement scan.
- * Note: we assume such scans can be multi-component,
- * although the spec is not very clear on the point.
- */
-
-METHODDEF(boolean)
-encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int Al, blkn;
-
-  entropy->next_output_byte = cinfo->dest->next_output_byte;
-  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval)
-    if (entropy->restarts_to_go == 0)
-      emit_restart_e(entropy, entropy->next_restart_num);
-
-  Al = cinfo->Al;
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    /* We simply emit the Al'th bit of the DC coefficient value. */
-    emit_bits_e(entropy, (unsigned int) (MCU_data[blkn][0][0] >> Al), 1);
-  }
-
-  cinfo->dest->next_output_byte = entropy->next_output_byte;
-  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
-
-  /* Update restart-interval state too */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU encoding for AC successive approximation refinement scan.
- */
-
-METHODDEF(boolean)
-encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  const int * natural_order;
-  JBLOCKROW block;
-  register int temp;
-  register int r, k;
-  int Se, Al;
-  int EOB;
-  char *BR_buffer;
-  unsigned int BR;
-  int absvalues[DCTSIZE2];
-
-  entropy->next_output_byte = cinfo->dest->next_output_byte;
-  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval)
-    if (entropy->restarts_to_go == 0)
-      emit_restart_e(entropy, entropy->next_restart_num);
-
-  Se = cinfo->Se;
-  Al = cinfo->Al;
-  natural_order = cinfo->natural_order;
-
-  /* Encode the MCU data block */
-  block = MCU_data[0];
-
-  /* It is convenient to make a pre-pass to determine the transformed
-   * coefficients' absolute values and the EOB position.
-   */
-  EOB = 0;
-  for (k = cinfo->Ss; k <= Se; k++) {
-    temp = (*block)[natural_order[k]];
-    /* We must apply the point transform by Al.  For AC coefficients this
-     * is an integer division with rounding towards 0.  To do this portably
-     * in C, we shift after obtaining the absolute value.
-     */
-    if (temp < 0)
-      temp = -temp;		/* temp is abs value of input */
-    temp >>= Al;		/* apply the point transform */
-    absvalues[k] = temp;	/* save abs value for main pass */
-    if (temp == 1)
-      EOB = k;			/* EOB = index of last newly-nonzero coef */
-  }
-
-  /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
-  
-  r = 0;			/* r = run length of zeros */
-  BR = 0;			/* BR = count of buffered bits added now */
-  BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
-
-  for (k = cinfo->Ss; k <= Se; k++) {
-    if ((temp = absvalues[k]) == 0) {
-      r++;
-      continue;
-    }
-
-    /* Emit any required ZRLs, but not if they can be folded into EOB */
-    while (r > 15 && k <= EOB) {
-      /* emit any pending EOBRUN and the BE correction bits */
-      emit_eobrun(entropy);
-      /* Emit ZRL */
-      emit_ac_symbol(entropy, entropy->ac_tbl_no, 0xF0);
-      r -= 16;
-      /* Emit buffered correction bits that must be associated with ZRL */
-      emit_buffered_bits(entropy, BR_buffer, BR);
-      BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
-      BR = 0;
-    }
-
-    /* If the coef was previously nonzero, it only needs a correction bit.
-     * NOTE: a straight translation of the spec's figure G.7 would suggest
-     * that we also need to test r > 15.  But if r > 15, we can only get here
-     * if k > EOB, which implies that this coefficient is not 1.
-     */
-    if (temp > 1) {
-      /* The correction bit is the next bit of the absolute value. */
-      BR_buffer[BR++] = (char) (temp & 1);
-      continue;
-    }
-
-    /* Emit any pending EOBRUN and the BE correction bits */
-    emit_eobrun(entropy);
-
-    /* Count/emit Huffman symbol for run length / number of bits */
-    emit_ac_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1);
-
-    /* Emit output bit for newly-nonzero coef */
-    temp = ((*block)[natural_order[k]] < 0) ? 0 : 1;
-    emit_bits_e(entropy, (unsigned int) temp, 1);
-
-    /* Emit buffered correction bits that must be associated with this code */
-    emit_buffered_bits(entropy, BR_buffer, BR);
-    BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
-    BR = 0;
-    r = 0;			/* reset zero run length */
-  }
-
-  if (r > 0 || BR > 0) {	/* If there are trailing zeroes, */
-    entropy->EOBRUN++;		/* count an EOB */
-    entropy->BE += BR;		/* concat my correction bits to older ones */
-    /* We force out the EOB if we risk either:
-     * 1. overflow of the EOB counter;
-     * 2. overflow of the correction bit buffer during the next MCU.
-     */
-    if (entropy->EOBRUN == 0x7FFF || entropy->BE > (MAX_CORR_BITS-DCTSIZE2+1))
-      emit_eobrun(entropy);
-  }
-
-  cinfo->dest->next_output_byte = entropy->next_output_byte;
-  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
-
-  /* Update restart-interval state too */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  return TRUE;
-}
-
-
-/* Encode a single block's worth of coefficients */
-
-LOCAL(boolean)
-encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val,
-		  c_derived_tbl *dctbl, c_derived_tbl *actbl)
-{
-  register int temp, temp2;
-  register int nbits;
-  register int r, k;
-  int Se = state->cinfo->lim_Se;
-  int max_coef_bits = state->cinfo->data_precision + 3;
-  const int * natural_order = state->cinfo->natural_order;
-
-  /* Encode the DC coefficient difference per section F.1.2.1 */
-
-  if ((temp = block[0] - last_dc_val) == 0) {
-    /* Emit the Huffman-coded symbol for the number of bits */
-    if (! emit_bits_s(state, dctbl->ehufco[0], dctbl->ehufsi[0]))
-      return FALSE;
-  } else {
-    if ((temp2 = temp) < 0) {
-      temp = -temp;		/* temp is abs value of input */
-      /* For a negative input, want temp2 = bitwise complement of abs(input) */
-      /* This code assumes we are on a two's complement machine */
-      temp2--;
-    }
-
-    /* Find the number of bits needed for the magnitude of the coefficient */
-    nbits = 0;
-    do nbits++;			/* there must be at least one 1 bit */
-    while ((temp >>= 1));
-    /* Check for out-of-range coefficient values.
-     * Since we're encoding a difference, the range limit is twice as much.
-     */
-    if (nbits > max_coef_bits)
-      ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
-
-    /* Emit the Huffman-coded symbol for the number of bits */
-    if (! emit_bits_s(state, dctbl->ehufco[nbits], dctbl->ehufsi[nbits]))
-      return FALSE;
-
-    /* Emit that number of bits of the value, if positive, */
-    /* or the complement of its magnitude, if negative. */
-    if (! emit_bits_s(state, (unsigned int) temp2, nbits))
-      return FALSE;
-  }
-
-  /* Encode the AC coefficients per section F.1.2.2 */
-
-  r = 0;			/* r = run length of zeros */
-
-  for (k = 1; k <= Se; k++) {
-    if ((temp = block[natural_order[k]]) == 0) {
-      r++;
-      continue;
-    }
-
-    /* if run length > 15, must emit special run-length-16 codes (0xF0) */
-    while (r > 15) {
-      if (! emit_bits_s(state, actbl->ehufco[0xF0], actbl->ehufsi[0xF0]))
-	return FALSE;
-      r -= 16;
-    }
-
-    if ((temp2 = temp) < 0) {
-      temp = -temp;		/* temp is abs value of input */
-      /* For a negative coef, want temp2 = bitwise complement of abs(coef) */
-      /* This code assumes we are on a two's complement machine */
-      temp2--;
-    }
-
-    /* Find the number of bits needed for the magnitude of the coefficient */
-    nbits = 0;
-    do nbits++;			/* there must be at least one 1 bit */
-    while ((temp >>= 1));
-    /* Check for out-of-range coefficient values.
-     * Use ">=" instead of ">" so can use the
-     * same one larger limit from DC check here.
-     */
-    if (nbits >= max_coef_bits)
-      ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
-
-    /* Emit Huffman symbol for run length / number of bits */
-    temp = (r << 4) + nbits;
-    if (! emit_bits_s(state, actbl->ehufco[temp], actbl->ehufsi[temp]))
-      return FALSE;
-
-    /* Emit that number of bits of the value, if positive, */
-    /* or the complement of its magnitude, if negative. */
-    if (! emit_bits_s(state, (unsigned int) temp2, nbits))
-      return FALSE;
-
-    r = 0;			/* reset zero run length */
-  }
-
-  /* If the last coef(s) were zero, emit an end-of-block code */
-  if (r > 0)
-    if (! emit_bits_s(state, actbl->ehufco[0], actbl->ehufsi[0]))
-      return FALSE;
-
-  return TRUE;
-}
-
-
-/*
- * Encode and output one MCU's worth of Huffman-compressed coefficients.
- */
-
-METHODDEF(boolean)
-encode_mcu_huff (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  working_state state;
-  int blkn, ci;
-  jpeg_component_info * compptr;
-
-  /* Load up working state */
-  state.next_output_byte = cinfo->dest->next_output_byte;
-  state.free_in_buffer = cinfo->dest->free_in_buffer;
-  ASSIGN_STATE(state.cur, entropy->saved);
-  state.cinfo = cinfo;
-
-  /* Emit restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! emit_restart_s(&state, entropy->next_restart_num))
-	return FALSE;
-  }
-
-  /* Encode the MCU data blocks */
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    ci = cinfo->MCU_membership[blkn];
-    compptr = cinfo->cur_comp_info[ci];
-    if (! encode_one_block(&state,
-			   MCU_data[blkn][0], state.cur.last_dc_val[ci],
-			   entropy->dc_derived_tbls[compptr->dc_tbl_no],
-			   entropy->ac_derived_tbls[compptr->ac_tbl_no]))
-      return FALSE;
-    /* Update last_dc_val */
-    state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];
-  }
-
-  /* Completed MCU, so update state */
-  cinfo->dest->next_output_byte = state.next_output_byte;
-  cinfo->dest->free_in_buffer = state.free_in_buffer;
-  ASSIGN_STATE(entropy->saved, state.cur);
-
-  /* Update restart-interval state too */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      entropy->restarts_to_go = cinfo->restart_interval;
-      entropy->next_restart_num++;
-      entropy->next_restart_num &= 7;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  return TRUE;
-}
-
-
-/*
- * Finish up at the end of a Huffman-compressed scan.
- */
-
-METHODDEF(void)
-finish_pass_huff (j_compress_ptr cinfo)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  working_state state;
-
-  if (cinfo->progressive_mode) {
-    entropy->next_output_byte = cinfo->dest->next_output_byte;
-    entropy->free_in_buffer = cinfo->dest->free_in_buffer;
-
-    /* Flush out any buffered data */
-    emit_eobrun(entropy);
-    flush_bits_e(entropy);
-
-    cinfo->dest->next_output_byte = entropy->next_output_byte;
-    cinfo->dest->free_in_buffer = entropy->free_in_buffer;
-  } else {
-    /* Load up working state ... flush_bits needs it */
-    state.next_output_byte = cinfo->dest->next_output_byte;
-    state.free_in_buffer = cinfo->dest->free_in_buffer;
-    ASSIGN_STATE(state.cur, entropy->saved);
-    state.cinfo = cinfo;
-
-    /* Flush out the last data */
-    if (! flush_bits_s(&state))
-      ERREXIT(cinfo, JERR_CANT_SUSPEND);
-
-    /* Update state */
-    cinfo->dest->next_output_byte = state.next_output_byte;
-    cinfo->dest->free_in_buffer = state.free_in_buffer;
-    ASSIGN_STATE(entropy->saved, state.cur);
-  }
-}
-
-
-/*
- * Huffman coding optimization.
- *
- * We first scan the supplied data and count the number of uses of each symbol
- * that is to be Huffman-coded. (This process MUST agree with the code above.)
- * Then we build a Huffman coding tree for the observed counts.
- * Symbols which are not needed at all for the particular image are not
- * assigned any code, which saves space in the DHT marker as well as in
- * the compressed data.
- */
-
-
-/* Process a single block's worth of coefficients */
-
-LOCAL(void)
-htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,
-		 long dc_counts[], long ac_counts[])
-{
-  register int temp;
-  register int nbits;
-  register int r, k;
-  int Se = cinfo->lim_Se;
-  int max_coef_bits = cinfo->data_precision + 3;
-  const int * natural_order = cinfo->natural_order;
-
-  /* Encode the DC coefficient difference per section F.1.2.1 */
-
-  if ((temp = block[0] - last_dc_val) == 0) {
-    /* Count the Huffman symbol for the number of bits */
-    dc_counts[0]++;
-  } else {
-    if (temp < 0)
-      temp = -temp;		/* temp is abs value of input */
-
-    /* Find the number of bits needed for the magnitude of the coefficient */
-    nbits = 0;
-    do nbits++;			/* there must be at least one 1 bit */
-    while ((temp >>= 1));
-    /* Check for out-of-range coefficient values.
-     * Since we're encoding a difference, the range limit is twice as much.
-     */
-    if (nbits > max_coef_bits)
-      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-
-    /* Count the Huffman symbol for the number of bits */
-    dc_counts[nbits]++;
-  }
-
-  /* Encode the AC coefficients per section F.1.2.2 */
-
-  r = 0;			/* r = run length of zeros */
-
-  for (k = 1; k <= Se; k++) {
-    if ((temp = block[natural_order[k]]) == 0) {
-      r++;
-      continue;
-    }
-
-    /* if run length > 15, must emit special run-length-16 codes (0xF0) */
-    while (r > 15) {
-      ac_counts[0xF0]++;
-      r -= 16;
-    }
-
-    if (temp < 0)
-      temp = -temp;		/* temp is abs value of input */
-
-    /* Find the number of bits needed for the magnitude of the coefficient */
-    nbits = 0;
-    do nbits++;			/* there must be at least one 1 bit */
-    while ((temp >>= 1));
-    /* Check for out-of-range coefficient values.
-     * Use ">=" instead of ">" so can use the
-     * same one larger limit from DC check here.
-     */
-    if (nbits >= max_coef_bits)
-      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-
-    /* Count Huffman symbol for run length / number of bits */
-    ac_counts[(r << 4) + nbits]++;
-
-    r = 0;			/* reset zero run length */
-  }
-
-  /* If the last coef(s) were zero, emit an end-of-block code */
-  if (r > 0)
-    ac_counts[0]++;
-}
-
-
-/*
- * Trial-encode one MCU's worth of Huffman-compressed coefficients.
- * No data is actually output, so no suspension return is possible.
- */
-
-METHODDEF(boolean)
-encode_mcu_gather (j_compress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int blkn, ci;
-  jpeg_component_info * compptr;
-
-  /* Take care of restart intervals if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0) {
-      /* Re-initialize DC predictions to 0 */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++)
-	entropy->saved.last_dc_val[ci] = 0;
-      /* Update restart state */
-      entropy->restarts_to_go = cinfo->restart_interval;
-    }
-    entropy->restarts_to_go--;
-  }
-
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    ci = cinfo->MCU_membership[blkn];
-    compptr = cinfo->cur_comp_info[ci];
-    htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci],
-		    entropy->dc_count_ptrs[compptr->dc_tbl_no],
-		    entropy->ac_count_ptrs[compptr->ac_tbl_no]);
-    entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0];
-  }
-
-  return TRUE;
-}
-
-
-/*
- * Generate the best Huffman code table for the given counts, fill htbl.
- *
- * The JPEG standard requires that no symbol be assigned a codeword of all
- * one bits (so that padding bits added at the end of a compressed segment
- * can't look like a valid code).  Because of the canonical ordering of
- * codewords, this just means that there must be an unused slot in the
- * longest codeword length category.  Section K.2 of the JPEG spec suggests
- * reserving such a slot by pretending that symbol 256 is a valid symbol
- * with count 1.  In theory that's not optimal; giving it count zero but
- * including it in the symbol set anyway should give a better Huffman code.
- * But the theoretically better code actually seems to come out worse in
- * practice, because it produces more all-ones bytes (which incur stuffed
- * zero bytes in the final file).  In any case the difference is tiny.
- *
- * The JPEG standard requires Huffman codes to be no more than 16 bits long.
- * If some symbols have a very small but nonzero probability, the Huffman tree
- * must be adjusted to meet the code length restriction.  We currently use
- * the adjustment method suggested in JPEG section K.2.  This method is *not*
- * optimal; it may not choose the best possible limited-length code.  But
- * typically only very-low-frequency symbols will be given less-than-optimal
- * lengths, so the code is almost optimal.  Experimental comparisons against
- * an optimal limited-length-code algorithm indicate that the difference is
- * microscopic --- usually less than a hundredth of a percent of total size.
- * So the extra complexity of an optimal algorithm doesn't seem worthwhile.
- */
-
-LOCAL(void)
-jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])
-{
-#define MAX_CLEN 32		/* assumed maximum initial code length */
-  UINT8 bits[MAX_CLEN+1];	/* bits[k] = # of symbols with code length k */
-  int codesize[257];		/* codesize[k] = code length of symbol k */
-  int others[257];		/* next symbol in current branch of tree */
-  int c1, c2, i, j;
-  UINT8 *p;
-  long v;
-
-  freq[256] = 1;		/* make sure 256 has a nonzero count */
-  /* Including the pseudo-symbol 256 in the Huffman procedure guarantees
-   * that no real symbol is given code-value of all ones, because 256
-   * will be placed last in the largest codeword category.
-   * In the symbol list build procedure this element serves as sentinel
-   * for the zero run loop.
-   */
-
-#ifndef DONT_USE_FANCY_HUFF_OPT
-
-  /* Build list of symbols sorted in order of descending frequency */
-  /* This approach has several benefits (thank to John Korejwa for the idea):
-   *     1.
-   * If a codelength category is split during the length limiting procedure
-   * below, the feature that more frequent symbols are assigned shorter
-   * codewords remains valid for the adjusted code.
-   *     2.
-   * To reduce consecutive ones in a Huffman data stream (thus reducing the
-   * number of stuff bytes in JPEG) it is preferable to follow 0 branches
-   * (and avoid 1 branches) as much as possible.  This is easily done by
-   * assigning symbols to leaves of the Huffman tree in order of decreasing
-   * frequency, with no secondary sort based on codelengths.
-   *     3.
-   * The symbol list can be built independently from the assignment of code
-   * lengths by the Huffman procedure below.
-   * Note: The symbol list build procedure must be performed first, because
-   * the Huffman procedure assigning the codelengths clobbers the frequency
-   * counts!
-   */
-
-  /* Here we use the others array as a linked list of nonzero frequencies
-   * to be sorted.  Already sorted elements are removed from the list.
-   */
-
-  /* Building list */
-
-  /* This item does not correspond to a valid symbol frequency and is used
-   * as starting index.
-   */
-  j = 256;
-
-  for (i = 0;; i++) {
-    if (freq[i] == 0)		/* skip zero frequencies */
-      continue;
-    if (i > 255)
-      break;
-    others[j] = i;		/* this symbol value */
-    j = i;			/* previous symbol value */
-  }
-  others[j] = -1;		/* mark end of list */
-
-  /* Sorting list */
-
-  p = htbl->huffval;
-  while ((c1 = others[256]) >= 0) {
-    v = freq[c1];
-    i = c1;			/* first symbol value */
-    j = 256;			/* pseudo symbol value for starting index */
-    while ((c2 = others[c1]) >= 0) {
-      if (freq[c2] > v) {
-	v = freq[c2];
-	i = c2;			/* this symbol value */
-	j = c1;			/* previous symbol value */
-      }
-      c1 = c2;
-    }
-    others[j] = others[i];	/* remove this symbol i from list */
-    *p++ = (UINT8) i;
-  }
-
-#endif /* DONT_USE_FANCY_HUFF_OPT */
-
-  /* This algorithm is explained in section K.2 of the JPEG standard */
-
-  MEMZERO(bits, SIZEOF(bits));
-  MEMZERO(codesize, SIZEOF(codesize));
-  for (i = 0; i < 257; i++)
-    others[i] = -1;		/* init links to empty */
-
-  /* Huffman's basic algorithm to assign optimal code lengths to symbols */
-
-  for (;;) {
-    /* Find the smallest nonzero frequency, set c1 = its symbol */
-    /* In case of ties, take the larger symbol number */
-    c1 = -1;
-    v = 1000000000L;
-    for (i = 0; i <= 256; i++) {
-      if (freq[i] && freq[i] <= v) {
-	v = freq[i];
-	c1 = i;
-      }
-    }
-
-    /* Find the next smallest nonzero frequency, set c2 = its symbol */
-    /* In case of ties, take the larger symbol number */
-    c2 = -1;
-    v = 1000000000L;
-    for (i = 0; i <= 256; i++) {
-      if (freq[i] && freq[i] <= v && i != c1) {
-	v = freq[i];
-	c2 = i;
-      }
-    }
-
-    /* Done if we've merged everything into one frequency */
-    if (c2 < 0)
-      break;
-
-    /* Else merge the two counts/trees */
-    freq[c1] += freq[c2];
-    freq[c2] = 0;
-
-    /* Increment the codesize of everything in c1's tree branch */
-    codesize[c1]++;
-    while (others[c1] >= 0) {
-      c1 = others[c1];
-      codesize[c1]++;
-    }
-
-    others[c1] = c2;		/* chain c2 onto c1's tree branch */
-
-    /* Increment the codesize of everything in c2's tree branch */
-    codesize[c2]++;
-    while (others[c2] >= 0) {
-      c2 = others[c2];
-      codesize[c2]++;
-    }
-  }
-
-  /* Now count the number of symbols of each code length */
-  for (i = 0; i <= 256; i++) {
-    if (codesize[i]) {
-      /* The JPEG standard seems to think that this can't happen, */
-      /* but I'm paranoid... */
-      if (codesize[i] > MAX_CLEN)
-	ERREXIT(cinfo, JERR_HUFF_CLEN_OUTOFBOUNDS);
-
-      bits[codesize[i]]++;
-    }
-  }
-
-  /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
-   * Huffman procedure assigned any such lengths, we must adjust the coding.
-   * Here is what the JPEG spec says about how this next bit works:
-   * Since symbols are paired for the longest Huffman code, the symbols are
-   * removed from this length category two at a time.  The prefix for the pair
-   * (which is one bit shorter) is allocated to one of the pair; then,
-   * skipping the BITS entry for that prefix length, a code word from the next
-   * shortest nonzero BITS entry is converted into a prefix for two code words
-   * one bit longer.
-   */
-
-  for (i = MAX_CLEN; i > 16; i--) {
-    while (bits[i] > 0) {
-      j = i - 2;		/* find length of new prefix to be used */
-      while (bits[j] == 0) {
-	if (j == 0)
-	  ERREXIT(cinfo, JERR_HUFF_CLEN_OUTOFBOUNDS);
-	j--;
-      }
-
-      bits[i] -= 2;		/* remove two symbols */
-      bits[i-1]++;		/* one goes in this length */
-      bits[j+1] += 2;		/* two new symbols in this length */
-      bits[j]--;		/* symbol of this length is now a prefix */
-    }
-  }
-
-  /* Remove the count for the pseudo-symbol 256 from the largest codelength */
-  while (bits[i] == 0)		/* find largest codelength still in use */
-    i--;
-  bits[i]--;
-
-  /* Return final symbol counts (only for lengths 0..16) */
-  MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));
-
-#ifdef DONT_USE_FANCY_HUFF_OPT
-
-  /* Return a list of the symbols sorted by code length */
-  /* Note: Due to the codelength changes made above, it can happen
-   * that more frequent symbols are assigned longer codewords.
-   */
-  p = htbl->huffval;
-  for (i = 1; i <= MAX_CLEN; i++) {
-    for (j = 0; j <= 255; j++) {
-      if (codesize[j] == i) {
-	*p++ = (UINT8) j;
-      }
-    }
-  }
-
-#endif /* DONT_USE_FANCY_HUFF_OPT */
-
-  /* Set sent_table FALSE so updated table will be written to JPEG file. */
-  htbl->sent_table = FALSE;
-}
-
-
-/*
- * Finish up a statistics-gathering pass and create the new Huffman tables.
- */
-
-METHODDEF(void)
-finish_pass_gather (j_compress_ptr cinfo)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int ci, tbl;
-  jpeg_component_info * compptr;
-  JHUFF_TBL **htblptr;
-  boolean did_dc[NUM_HUFF_TBLS];
-  boolean did_ac[NUM_HUFF_TBLS];
-
-  if (cinfo->progressive_mode)
-    /* Flush out buffered data (all we care about is counting the EOB symbol) */
-    emit_eobrun(entropy);
-
-  /* It's important not to apply jpeg_gen_optimal_table more than once
-   * per table, because it clobbers the input frequency counts!
-   */
-  MEMZERO(did_dc, SIZEOF(did_dc));
-  MEMZERO(did_ac, SIZEOF(did_ac));
-
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    /* DC needs no table for refinement scan */
-    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
-      tbl = compptr->dc_tbl_no;
-      if (! did_dc[tbl]) {
-	htblptr = & cinfo->dc_huff_tbl_ptrs[tbl];
-	if (*htblptr == NULL)
-	  *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
-	jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[tbl]);
-	did_dc[tbl] = TRUE;
-      }
-    }
-    /* AC needs no table when not present */
-    if (cinfo->Se) {
-      tbl = compptr->ac_tbl_no;
-      if (! did_ac[tbl]) {
-	htblptr = & cinfo->ac_huff_tbl_ptrs[tbl];
-	if (*htblptr == NULL)
-	  *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
-	jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[tbl]);
-	did_ac[tbl] = TRUE;
-      }
-    }
-  }
-}
-
-
-/*
- * Initialize for a Huffman-compressed scan.
- * If gather_statistics is TRUE, we do not output anything during the scan,
- * just count the Huffman symbols used and generate Huffman code tables.
- */
-
-METHODDEF(void)
-start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int ci, tbl;
-  jpeg_component_info * compptr;
-
-  if (gather_statistics)
-    entropy->pub.finish_pass = finish_pass_gather;
-  else
-    entropy->pub.finish_pass = finish_pass_huff;
-
-  if (cinfo->progressive_mode) {
-    entropy->cinfo = cinfo;
-    entropy->gather_statistics = gather_statistics;
-
-    /* We assume jcmaster.c already validated the scan parameters. */
-
-    /* Select execution routine */
-    if (cinfo->Ah == 0) {
-      if (cinfo->Ss == 0)
-	entropy->pub.encode_mcu = encode_mcu_DC_first;
-      else
-	entropy->pub.encode_mcu = encode_mcu_AC_first;
-    } else {
-      if (cinfo->Ss == 0)
-	entropy->pub.encode_mcu = encode_mcu_DC_refine;
-      else {
-	entropy->pub.encode_mcu = encode_mcu_AC_refine;
-	/* AC refinement needs a correction bit buffer */
-	if (entropy->bit_buffer == NULL)
-	  entropy->bit_buffer = (char *) (*cinfo->mem->alloc_small)
-	    ((j_common_ptr) cinfo, JPOOL_IMAGE, MAX_CORR_BITS * SIZEOF(char));
-      }
-    }
-
-    /* Initialize AC stuff */
-    entropy->ac_tbl_no = cinfo->cur_comp_info[0]->ac_tbl_no;
-    entropy->EOBRUN = 0;
-    entropy->BE = 0;
-  } else {
-    if (gather_statistics)
-      entropy->pub.encode_mcu = encode_mcu_gather;
-    else
-      entropy->pub.encode_mcu = encode_mcu_huff;
-  }
-
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    /* DC needs no table for refinement scan */
-    if (cinfo->Ss == 0 && cinfo->Ah == 0) {
-      tbl = compptr->dc_tbl_no;
-      if (gather_statistics) {
-	/* Check for invalid table index */
-	/* (make_c_derived_tbl does this in the other path) */
-	if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
-	  ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
-	/* Allocate and zero the statistics tables */
-	/* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
-	if (entropy->dc_count_ptrs[tbl] == NULL)
-	  entropy->dc_count_ptrs[tbl] = (long *) (*cinfo->mem->alloc_small)
-	    ((j_common_ptr) cinfo, JPOOL_IMAGE, 257 * SIZEOF(long));
-	MEMZERO(entropy->dc_count_ptrs[tbl], 257 * SIZEOF(long));
-      } else {
-	/* Compute derived values for Huffman tables */
-	/* We may do this more than once for a table, but it's not expensive */
-	jpeg_make_c_derived_tbl(cinfo, TRUE, tbl,
-				& entropy->dc_derived_tbls[tbl]);
-      }
-      /* Initialize DC predictions to 0 */
-      entropy->saved.last_dc_val[ci] = 0;
-    }
-    /* AC needs no table when not present */
-    if (cinfo->Se) {
-      tbl = compptr->ac_tbl_no;
-      if (gather_statistics) {
-	if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
-	  ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
-	if (entropy->ac_count_ptrs[tbl] == NULL)
-	  entropy->ac_count_ptrs[tbl] = (long *) (*cinfo->mem->alloc_small)
-	    ((j_common_ptr) cinfo, JPOOL_IMAGE, 257 * SIZEOF(long));
-	MEMZERO(entropy->ac_count_ptrs[tbl], 257 * SIZEOF(long));
-      } else {
-	jpeg_make_c_derived_tbl(cinfo, FALSE, tbl,
-				& entropy->ac_derived_tbls[tbl]);
-      }
-    }
-  }
-
-  /* Initialize bit buffer to empty */
-  entropy->saved.put_buffer = 0;
-  entropy->saved.put_bits = 0;
-
-  /* Initialize restart stuff */
-  entropy->restarts_to_go = cinfo->restart_interval;
-  entropy->next_restart_num = 0;
-}
-
-
-/*
- * Module initialization routine for Huffman entropy encoding.
- */
-
-GLOBAL(void)
-jinit_huff_encoder (j_compress_ptr cinfo)
-{
-  huff_entropy_ptr entropy;
-  int i;
-
-  entropy = (huff_entropy_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(huff_entropy_encoder));
-  cinfo->entropy = &entropy->pub;
-  entropy->pub.start_pass = start_pass_huff;
-
-  /* Mark tables unallocated */
-  for (i = 0; i < NUM_HUFF_TBLS; i++) {
-    entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
-    entropy->dc_count_ptrs[i] = entropy->ac_count_ptrs[i] = NULL;
-  }
-
-  if (cinfo->progressive_mode)
-    entropy->bit_buffer = NULL;	/* needed only in AC refinement scan */
-}
diff --git a/cmake/externals/libjpeg/jcinit.c b/cmake/externals/libjpeg/jcinit.c
deleted file mode 100644
index 2aea7ca2a..000000000
--- a/cmake/externals/libjpeg/jcinit.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * jcinit.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2003-2017 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains initialization logic for the JPEG compressor.
- * This routine is in charge of selecting the modules to be executed and
- * making an initialization call to each one.
- *
- * Logically, this code belongs in jcmaster.c.  It's split out because
- * linking this routine implies linking the entire compression library.
- * For a transcoding-only application, we want to be able to use jcmaster.c
- * without linking in the whole library.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Compute JPEG image dimensions and related values.
- * NOTE: this is exported for possible use by application.
- * Hence it mustn't do anything that can't be done twice.
- */
-
-GLOBAL(void)
-jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo)
-/* Do computations that are needed before master selection phase */
-{
-  /* Sanity check on input image dimensions to prevent overflow in
-   * following calculations.
-   * We do check jpeg_width and jpeg_height in initial_setup in jcmaster.c,
-   * but image_width and image_height can come from arbitrary data,
-   * and we need some space for multiplication by block_size.
-   */
-  if (((long) cinfo->image_width >> 24) || ((long) cinfo->image_height >> 24))
-    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
-
-#ifdef DCT_SCALING_SUPPORTED
-
-  /* Compute actual JPEG image dimensions and DCT scaling choices. */
-  if (cinfo->scale_num >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/1 scaling */
-    cinfo->jpeg_width = cinfo->image_width * cinfo->block_size;
-    cinfo->jpeg_height = cinfo->image_height * cinfo->block_size;
-    cinfo->min_DCT_h_scaled_size = 1;
-    cinfo->min_DCT_v_scaled_size = 1;
-  } else if (cinfo->scale_num * 2 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/2 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 2L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 2L);
-    cinfo->min_DCT_h_scaled_size = 2;
-    cinfo->min_DCT_v_scaled_size = 2;
-  } else if (cinfo->scale_num * 3 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/3 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 3L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 3L);
-    cinfo->min_DCT_h_scaled_size = 3;
-    cinfo->min_DCT_v_scaled_size = 3;
-  } else if (cinfo->scale_num * 4 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/4 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 4L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 4L);
-    cinfo->min_DCT_h_scaled_size = 4;
-    cinfo->min_DCT_v_scaled_size = 4;
-  } else if (cinfo->scale_num * 5 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/5 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 5L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 5L);
-    cinfo->min_DCT_h_scaled_size = 5;
-    cinfo->min_DCT_v_scaled_size = 5;
-  } else if (cinfo->scale_num * 6 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/6 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 6L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 6L);
-    cinfo->min_DCT_h_scaled_size = 6;
-    cinfo->min_DCT_v_scaled_size = 6;
-  } else if (cinfo->scale_num * 7 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/7 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 7L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 7L);
-    cinfo->min_DCT_h_scaled_size = 7;
-    cinfo->min_DCT_v_scaled_size = 7;
-  } else if (cinfo->scale_num * 8 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/8 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 8L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 8L);
-    cinfo->min_DCT_h_scaled_size = 8;
-    cinfo->min_DCT_v_scaled_size = 8;
-  } else if (cinfo->scale_num * 9 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/9 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 9L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 9L);
-    cinfo->min_DCT_h_scaled_size = 9;
-    cinfo->min_DCT_v_scaled_size = 9;
-  } else if (cinfo->scale_num * 10 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/10 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 10L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 10L);
-    cinfo->min_DCT_h_scaled_size = 10;
-    cinfo->min_DCT_v_scaled_size = 10;
-  } else if (cinfo->scale_num * 11 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/11 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 11L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 11L);
-    cinfo->min_DCT_h_scaled_size = 11;
-    cinfo->min_DCT_v_scaled_size = 11;
-  } else if (cinfo->scale_num * 12 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/12 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 12L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 12L);
-    cinfo->min_DCT_h_scaled_size = 12;
-    cinfo->min_DCT_v_scaled_size = 12;
-  } else if (cinfo->scale_num * 13 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/13 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 13L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 13L);
-    cinfo->min_DCT_h_scaled_size = 13;
-    cinfo->min_DCT_v_scaled_size = 13;
-  } else if (cinfo->scale_num * 14 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/14 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 14L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 14L);
-    cinfo->min_DCT_h_scaled_size = 14;
-    cinfo->min_DCT_v_scaled_size = 14;
-  } else if (cinfo->scale_num * 15 >= cinfo->scale_denom * cinfo->block_size) {
-    /* Provide block_size/15 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 15L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 15L);
-    cinfo->min_DCT_h_scaled_size = 15;
-    cinfo->min_DCT_v_scaled_size = 15;
-  } else {
-    /* Provide block_size/16 scaling */
-    cinfo->jpeg_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 16L);
-    cinfo->jpeg_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 16L);
-    cinfo->min_DCT_h_scaled_size = 16;
-    cinfo->min_DCT_v_scaled_size = 16;
-  }
-
-#else /* !DCT_SCALING_SUPPORTED */
-
-  /* Hardwire it to "no scaling" */
-  cinfo->jpeg_width = cinfo->image_width;
-  cinfo->jpeg_height = cinfo->image_height;
-  cinfo->min_DCT_h_scaled_size = DCTSIZE;
-  cinfo->min_DCT_v_scaled_size = DCTSIZE;
-
-#endif /* DCT_SCALING_SUPPORTED */
-}
-
-
-/*
- * Master selection of compression modules.
- * This is done once at the start of processing an image.  We determine
- * which modules will be used and give them appropriate initialization calls.
- */
-
-GLOBAL(void)
-jinit_compress_master (j_compress_ptr cinfo)
-{
-  long samplesperrow;
-  JDIMENSION jd_samplesperrow;
-
-  /* For now, precision must match compiled-in value... */
-  if (cinfo->data_precision != BITS_IN_JSAMPLE)
-    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
-
-  /* Sanity check on input image dimensions */
-  if (cinfo->image_height <= 0 || cinfo->image_width <= 0 ||
-      cinfo->input_components <= 0)
-    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
-
-  /* Width of an input scanline must be representable as JDIMENSION. */
-  samplesperrow = (long) cinfo->image_width * (long) cinfo->input_components;
-  jd_samplesperrow = (JDIMENSION) samplesperrow;
-  if ((long) jd_samplesperrow != samplesperrow)
-    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
-
-  /* Compute JPEG image dimensions and related values. */
-  jpeg_calc_jpeg_dimensions(cinfo);
-
-  /* Initialize master control (includes parameter checking/processing) */
-  jinit_c_master_control(cinfo, FALSE /* full compression */);
-
-  /* Preprocessing */
-  if (! cinfo->raw_data_in) {
-    jinit_color_converter(cinfo);
-    jinit_downsampler(cinfo);
-    jinit_c_prep_controller(cinfo, FALSE /* never need full buffer here */);
-  }
-  /* Forward DCT */
-  jinit_forward_dct(cinfo);
-  /* Entropy encoding: either Huffman or arithmetic coding. */
-  if (cinfo->arith_code)
-    jinit_arith_encoder(cinfo);
-  else {
-    jinit_huff_encoder(cinfo);
-  }
-
-  /* Need a full-image coefficient buffer in any multi-pass mode. */
-  jinit_c_coef_controller(cinfo,
-		(boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding));
-  jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */);
-
-  jinit_marker_writer(cinfo);
-
-  /* We can now tell the memory manager to allocate virtual arrays. */
-  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
-
-  /* Write the datastream header (SOI) immediately.
-   * Frame and scan headers are postponed till later.
-   * This lets application insert special markers after the SOI.
-   */
-  (*cinfo->marker->write_file_header) (cinfo);
-}
diff --git a/cmake/externals/libjpeg/jcmainct.c b/cmake/externals/libjpeg/jcmainct.c
deleted file mode 100644
index 39b97902e..000000000
--- a/cmake/externals/libjpeg/jcmainct.c
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * jcmainct.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2003-2012 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the main buffer controller for compression.
- * The main buffer lies between the pre-processor and the JPEG
- * compressor proper; it holds downsampled data in the JPEG colorspace.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Note: currently, there is no operating mode in which a full-image buffer
- * is needed at this step.  If there were, that mode could not be used with
- * "raw data" input, since this module is bypassed in that case.  However,
- * we've left the code here for possible use in special applications.
- */
-#undef FULL_MAIN_BUFFER_SUPPORTED
-
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_c_main_controller pub; /* public fields */
-
-  JDIMENSION cur_iMCU_row;	/* number of current iMCU row */
-  JDIMENSION rowgroup_ctr;	/* counts row groups received in iMCU row */
-  boolean suspended;		/* remember if we suspended output */
-  J_BUF_MODE pass_mode;		/* current operating mode */
-
-  /* If using just a strip buffer, this points to the entire set of buffers
-   * (we allocate one for each component).  In the full-image case, this
-   * points to the currently accessible strips of the virtual arrays.
-   */
-  JSAMPARRAY buffer[MAX_COMPONENTS];
-
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-  /* If using full-image storage, this array holds pointers to virtual-array
-   * control blocks for each component.  Unused if not full-image storage.
-   */
-  jvirt_sarray_ptr whole_image[MAX_COMPONENTS];
-#endif
-} my_main_controller;
-
-typedef my_main_controller * my_main_ptr;
-
-
-/* Forward declarations */
-METHODDEF(void) process_data_simple_main
-	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
-	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-METHODDEF(void) process_data_buffer_main
-	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
-	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
-#endif
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_main (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-
-  /* Do nothing in raw-data mode. */
-  if (cinfo->raw_data_in)
-    return;
-
-  mainp->cur_iMCU_row = 0;	/* initialize counters */
-  mainp->rowgroup_ctr = 0;
-  mainp->suspended = FALSE;
-  mainp->pass_mode = pass_mode;	/* save mode for use by process_data */
-
-  switch (pass_mode) {
-  case JBUF_PASS_THRU:
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-    if (mainp->whole_image[0] != NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-#endif
-    mainp->pub.process_data = process_data_simple_main;
-    break;
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-  case JBUF_SAVE_SOURCE:
-  case JBUF_CRANK_DEST:
-  case JBUF_SAVE_AND_PASS:
-    if (mainp->whole_image[0] == NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    mainp->pub.process_data = process_data_buffer_main;
-    break;
-#endif
-  default:
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    break;
-  }
-}
-
-
-/*
- * Process some data.
- * This routine handles the simple pass-through mode,
- * where we have only a strip buffer.
- */
-
-METHODDEF(void)
-process_data_simple_main (j_compress_ptr cinfo,
-			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-			  JDIMENSION in_rows_avail)
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-
-  while (mainp->cur_iMCU_row < cinfo->total_iMCU_rows) {
-    /* Read input data if we haven't filled the main buffer yet */
-    if (mainp->rowgroup_ctr < (JDIMENSION) cinfo->min_DCT_v_scaled_size)
-      (*cinfo->prep->pre_process_data) (cinfo,
-					input_buf, in_row_ctr, in_rows_avail,
-					mainp->buffer, &mainp->rowgroup_ctr,
-					(JDIMENSION) cinfo->min_DCT_v_scaled_size);
-
-    /* If we don't have a full iMCU row buffered, return to application for
-     * more data.  Note that preprocessor will always pad to fill the iMCU row
-     * at the bottom of the image.
-     */
-    if (mainp->rowgroup_ctr != (JDIMENSION) cinfo->min_DCT_v_scaled_size)
-      return;
-
-    /* Send the completed row to the compressor */
-    if (! (*cinfo->coef->compress_data) (cinfo, mainp->buffer)) {
-      /* If compressor did not consume the whole row, then we must need to
-       * suspend processing and return to the application.  In this situation
-       * we pretend we didn't yet consume the last input row; otherwise, if
-       * it happened to be the last row of the image, the application would
-       * think we were done.
-       */
-      if (! mainp->suspended) {
-	(*in_row_ctr)--;
-	mainp->suspended = TRUE;
-      }
-      return;
-    }
-    /* We did finish the row.  Undo our little suspension hack if a previous
-     * call suspended; then mark the main buffer empty.
-     */
-    if (mainp->suspended) {
-      (*in_row_ctr)++;
-      mainp->suspended = FALSE;
-    }
-    mainp->rowgroup_ctr = 0;
-    mainp->cur_iMCU_row++;
-  }
-}
-
-
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-
-/*
- * Process some data.
- * This routine handles all of the modes that use a full-size buffer.
- */
-
-METHODDEF(void)
-process_data_buffer_main (j_compress_ptr cinfo,
-			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-			  JDIMENSION in_rows_avail)
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-  int ci;
-  jpeg_component_info *compptr;
-  boolean writing = (mainp->pass_mode != JBUF_CRANK_DEST);
-
-  while (mainp->cur_iMCU_row < cinfo->total_iMCU_rows) {
-    /* Realign the virtual buffers if at the start of an iMCU row. */
-    if (mainp->rowgroup_ctr == 0) {
-      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	   ci++, compptr++) {
-	mainp->buffer[ci] = (*cinfo->mem->access_virt_sarray)
-	  ((j_common_ptr) cinfo, mainp->whole_image[ci], mainp->cur_iMCU_row *
-	   ((JDIMENSION) (compptr->v_samp_factor * cinfo->min_DCT_v_scaled_size)),
-	   (JDIMENSION) (compptr->v_samp_factor * cinfo->min_DCT_v_scaled_size),
-	   writing);
-      }
-      /* In a read pass, pretend we just read some source data. */
-      if (! writing) {
-	*in_row_ctr += (JDIMENSION)
-	  (cinfo->max_v_samp_factor * cinfo->min_DCT_v_scaled_size);
-	mainp->rowgroup_ctr = (JDIMENSION) cinfo->min_DCT_v_scaled_size;
-      }
-    }
-
-    /* If a write pass, read input data until the current iMCU row is full. */
-    /* Note: preprocessor will pad if necessary to fill the last iMCU row. */
-    if (writing) {
-      (*cinfo->prep->pre_process_data) (cinfo,
-					input_buf, in_row_ctr, in_rows_avail,
-					mainp->buffer, &mainp->rowgroup_ctr,
-					(JDIMENSION) cinfo->min_DCT_v_scaled_size);
-      /* Return to application if we need more data to fill the iMCU row. */
-      if (mainp->rowgroup_ctr < (JDIMENSION) cinfo->min_DCT_v_scaled_size)
-	return;
-    }
-
-    /* Emit data, unless this is a sink-only pass. */
-    if (mainp->pass_mode != JBUF_SAVE_SOURCE) {
-      if (! (*cinfo->coef->compress_data) (cinfo, mainp->buffer)) {
-	/* If compressor did not consume the whole row, then we must need to
-	 * suspend processing and return to the application.  In this situation
-	 * we pretend we didn't yet consume the last input row; otherwise, if
-	 * it happened to be the last row of the image, the application would
-	 * think we were done.
-	 */
-	if (! mainp->suspended) {
-	  (*in_row_ctr)--;
-	  mainp->suspended = TRUE;
-	}
-	return;
-      }
-      /* We did finish the row.  Undo our little suspension hack if a previous
-       * call suspended; then mark the main buffer empty.
-       */
-      if (mainp->suspended) {
-	(*in_row_ctr)++;
-	mainp->suspended = FALSE;
-      }
-    }
-
-    /* If get here, we are done with this iMCU row.  Mark buffer empty. */
-    mainp->rowgroup_ctr = 0;
-    mainp->cur_iMCU_row++;
-  }
-}
-
-#endif /* FULL_MAIN_BUFFER_SUPPORTED */
-
-
-/*
- * Initialize main buffer controller.
- */
-
-GLOBAL(void)
-jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer)
-{
-  my_main_ptr mainp;
-  int ci;
-  jpeg_component_info *compptr;
-
-  mainp = (my_main_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_main_controller));
-  cinfo->main = &mainp->pub;
-  mainp->pub.start_pass = start_pass_main;
-
-  /* We don't need to create a buffer in raw-data mode. */
-  if (cinfo->raw_data_in)
-    return;
-
-  /* Create the buffer.  It holds downsampled data, so each component
-   * may be of a different size.
-   */
-  if (need_full_buffer) {
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-    /* Allocate a full-image virtual array for each component */
-    /* Note we pad the bottom to a multiple of the iMCU height */
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      mainp->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
-	 compptr->width_in_blocks * ((JDIMENSION) compptr->DCT_h_scaled_size),
-	 ((JDIMENSION) jround_up((long) compptr->height_in_blocks,
-				 (long) compptr->v_samp_factor)) *
-	 ((JDIMENSION) cinfo->min_DCT_v_scaled_size),
-	 (JDIMENSION) (compptr->v_samp_factor * compptr->DCT_v_scaled_size));
-    }
-#else
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-#endif
-  } else {
-#ifdef FULL_MAIN_BUFFER_SUPPORTED
-    mainp->whole_image[0] = NULL; /* flag for no virtual arrays */
-#endif
-    /* Allocate a strip buffer for each component */
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      mainp->buffer[ci] = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 compptr->width_in_blocks * ((JDIMENSION) compptr->DCT_h_scaled_size),
-	 (JDIMENSION) (compptr->v_samp_factor * compptr->DCT_v_scaled_size));
-    }
-  }
-}
diff --git a/cmake/externals/libjpeg/jcmarker.c b/cmake/externals/libjpeg/jcmarker.c
deleted file mode 100644
index 8874cd867..000000000
--- a/cmake/externals/libjpeg/jcmarker.c
+++ /dev/null
@@ -1,717 +0,0 @@
-/*
- * jcmarker.c
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2003-2019 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains routines to write JPEG datastream markers.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-typedef enum {			/* JPEG marker codes */
-  M_SOF0  = 0xc0,
-  M_SOF1  = 0xc1,
-  M_SOF2  = 0xc2,
-  M_SOF3  = 0xc3,
-
-  M_SOF5  = 0xc5,
-  M_SOF6  = 0xc6,
-  M_SOF7  = 0xc7,
-
-  M_JPG   = 0xc8,
-  M_SOF9  = 0xc9,
-  M_SOF10 = 0xca,
-  M_SOF11 = 0xcb,
-
-  M_SOF13 = 0xcd,
-  M_SOF14 = 0xce,
-  M_SOF15 = 0xcf,
-
-  M_DHT   = 0xc4,
-
-  M_DAC   = 0xcc,
-
-  M_RST0  = 0xd0,
-  M_RST1  = 0xd1,
-  M_RST2  = 0xd2,
-  M_RST3  = 0xd3,
-  M_RST4  = 0xd4,
-  M_RST5  = 0xd5,
-  M_RST6  = 0xd6,
-  M_RST7  = 0xd7,
-
-  M_SOI   = 0xd8,
-  M_EOI   = 0xd9,
-  M_SOS   = 0xda,
-  M_DQT   = 0xdb,
-  M_DNL   = 0xdc,
-  M_DRI   = 0xdd,
-  M_DHP   = 0xde,
-  M_EXP   = 0xdf,
-
-  M_APP0  = 0xe0,
-  M_APP1  = 0xe1,
-  M_APP2  = 0xe2,
-  M_APP3  = 0xe3,
-  M_APP4  = 0xe4,
-  M_APP5  = 0xe5,
-  M_APP6  = 0xe6,
-  M_APP7  = 0xe7,
-  M_APP8  = 0xe8,
-  M_APP9  = 0xe9,
-  M_APP10 = 0xea,
-  M_APP11 = 0xeb,
-  M_APP12 = 0xec,
-  M_APP13 = 0xed,
-  M_APP14 = 0xee,
-  M_APP15 = 0xef,
-
-  M_JPG0  = 0xf0,
-  M_JPG8  = 0xf8,
-  M_JPG13 = 0xfd,
-  M_COM   = 0xfe,
-
-  M_TEM   = 0x01,
-
-  M_ERROR = 0x100
-} JPEG_MARKER;
-
-
-/* Private state */
-
-typedef struct {
-  struct jpeg_marker_writer pub; /* public fields */
-
-  unsigned int last_restart_interval; /* last DRI value emitted; 0 after SOI */
-} my_marker_writer;
-
-typedef my_marker_writer * my_marker_ptr;
-
-
-/*
- * Basic output routines.
- *
- * Note that we do not support suspension while writing a marker.
- * Therefore, an application using suspension must ensure that there is
- * enough buffer space for the initial markers (typ. 600-700 bytes) before
- * calling jpeg_start_compress, and enough space to write the trailing EOI
- * (a few bytes) before calling jpeg_finish_compress.  Multipass compression
- * modes are not supported at all with suspension, so those two are the only
- * points where markers will be written.
- */
-
-LOCAL(void)
-emit_byte (j_compress_ptr cinfo, int val)
-/* Emit a byte */
-{
-  struct jpeg_destination_mgr * dest = cinfo->dest;
-
-  *(dest->next_output_byte)++ = (JOCTET) val;
-  if (--dest->free_in_buffer == 0) {
-    if (! (*dest->empty_output_buffer) (cinfo))
-      ERREXIT(cinfo, JERR_CANT_SUSPEND);
-  }
-}
-
-
-LOCAL(void)
-emit_marker (j_compress_ptr cinfo, JPEG_MARKER mark)
-/* Emit a marker code */
-{
-  emit_byte(cinfo, 0xFF);
-  emit_byte(cinfo, (int) mark);
-}
-
-
-LOCAL(void)
-emit_2bytes (j_compress_ptr cinfo, int value)
-/* Emit a 2-byte integer; these are always MSB first in JPEG files */
-{
-  emit_byte(cinfo, (value >> 8) & 0xFF);
-  emit_byte(cinfo, value & 0xFF);
-}
-
-
-/*
- * Routines to write specific marker types.
- */
-
-LOCAL(int)
-emit_dqt (j_compress_ptr cinfo, int index)
-/* Emit a DQT marker */
-/* Returns the precision used (0 = 8bits, 1 = 16bits) for baseline checking */
-{
-  JQUANT_TBL * qtbl = cinfo->quant_tbl_ptrs[index];
-  int prec;
-  int i;
-
-  if (qtbl == NULL)
-    ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, index);
-
-  prec = 0;
-  for (i = 0; i <= cinfo->lim_Se; i++) {
-    if (qtbl->quantval[cinfo->natural_order[i]] > 255)
-      prec = 1;
-  }
-
-  if (! qtbl->sent_table) {
-    emit_marker(cinfo, M_DQT);
-
-    emit_2bytes(cinfo,
-      prec ? cinfo->lim_Se * 2 + 2 + 1 + 2 : cinfo->lim_Se + 1 + 1 + 2);
-
-    emit_byte(cinfo, index + (prec<<4));
-
-    for (i = 0; i <= cinfo->lim_Se; i++) {
-      /* The table entries must be emitted in zigzag order. */
-      unsigned int qval = qtbl->quantval[cinfo->natural_order[i]];
-      if (prec)
-	emit_byte(cinfo, (int) (qval >> 8));
-      emit_byte(cinfo, (int) (qval & 0xFF));
-    }
-
-    qtbl->sent_table = TRUE;
-  }
-
-  return prec;
-}
-
-
-LOCAL(void)
-emit_dht (j_compress_ptr cinfo, int index, boolean is_ac)
-/* Emit a DHT marker */
-{
-  JHUFF_TBL * htbl;
-  int length, i;
-  
-  if (is_ac) {
-    htbl = cinfo->ac_huff_tbl_ptrs[index];
-    index += 0x10;		/* output index has AC bit set */
-  } else {
-    htbl = cinfo->dc_huff_tbl_ptrs[index];
-  }
-
-  if (htbl == NULL)
-    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, index);
-  
-  if (! htbl->sent_table) {
-    emit_marker(cinfo, M_DHT);
-    
-    length = 0;
-    for (i = 1; i <= 16; i++)
-      length += htbl->bits[i];
-    
-    emit_2bytes(cinfo, length + 2 + 1 + 16);
-    emit_byte(cinfo, index);
-    
-    for (i = 1; i <= 16; i++)
-      emit_byte(cinfo, htbl->bits[i]);
-    
-    for (i = 0; i < length; i++)
-      emit_byte(cinfo, htbl->huffval[i]);
-    
-    htbl->sent_table = TRUE;
-  }
-}
-
-
-LOCAL(void)
-emit_dac (j_compress_ptr cinfo)
-/* Emit a DAC marker */
-/* Since the useful info is so small, we want to emit all the tables in */
-/* one DAC marker.  Therefore this routine does its own scan of the table. */
-{
-#ifdef C_ARITH_CODING_SUPPORTED
-  char dc_in_use[NUM_ARITH_TBLS];
-  char ac_in_use[NUM_ARITH_TBLS];
-  int length, i;
-  jpeg_component_info *compptr;
-
-  for (i = 0; i < NUM_ARITH_TBLS; i++)
-    dc_in_use[i] = ac_in_use[i] = 0;
-
-  for (i = 0; i < cinfo->comps_in_scan; i++) {
-    compptr = cinfo->cur_comp_info[i];
-    /* DC needs no table for refinement scan */
-    if (cinfo->Ss == 0 && cinfo->Ah == 0)
-      dc_in_use[compptr->dc_tbl_no] = 1;
-    /* AC needs no table when not present */
-    if (cinfo->Se)
-      ac_in_use[compptr->ac_tbl_no] = 1;
-  }
-
-  length = 0;
-  for (i = 0; i < NUM_ARITH_TBLS; i++)
-    length += dc_in_use[i] + ac_in_use[i];
-
-  if (length) {
-    emit_marker(cinfo, M_DAC);
-
-    emit_2bytes(cinfo, length*2 + 2);
-
-    for (i = 0; i < NUM_ARITH_TBLS; i++) {
-      if (dc_in_use[i]) {
-	emit_byte(cinfo, i);
-	emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4));
-      }
-      if (ac_in_use[i]) {
-	emit_byte(cinfo, i + 0x10);
-	emit_byte(cinfo, cinfo->arith_ac_K[i]);
-      }
-    }
-  }
-#endif /* C_ARITH_CODING_SUPPORTED */
-}
-
-
-LOCAL(void)
-emit_dri (j_compress_ptr cinfo)
-/* Emit a DRI marker */
-{
-  emit_marker(cinfo, M_DRI);
-  
-  emit_2bytes(cinfo, 4);	/* fixed length */
-
-  emit_2bytes(cinfo, (int) cinfo->restart_interval);
-}
-
-
-LOCAL(void)
-emit_lse_ict (j_compress_ptr cinfo)
-/* Emit an LSE inverse color transform specification marker */
-{
-  /* Support only 1 transform */
-  if (cinfo->color_transform != JCT_SUBTRACT_GREEN ||
-      cinfo->num_components < 3)
-    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-
-  emit_marker(cinfo, M_JPG8);
-  
-  emit_2bytes(cinfo, 24);	/* fixed length */
-
-  emit_byte(cinfo, 0x0D);	/* ID inverse transform specification */
-  emit_2bytes(cinfo, MAXJSAMPLE);	/* MAXTRANS */
-  emit_byte(cinfo, 3);		/* Nt=3 */
-  emit_byte(cinfo, cinfo->comp_info[1].component_id);
-  emit_byte(cinfo, cinfo->comp_info[0].component_id);
-  emit_byte(cinfo, cinfo->comp_info[2].component_id);
-  emit_byte(cinfo, 0x80);	/* F1: CENTER1=1, NORM1=0 */
-  emit_2bytes(cinfo, 0);	/* A(1,1)=0 */
-  emit_2bytes(cinfo, 0);	/* A(1,2)=0 */
-  emit_byte(cinfo, 0);		/* F2: CENTER2=0, NORM2=0 */
-  emit_2bytes(cinfo, 1);	/* A(2,1)=1 */
-  emit_2bytes(cinfo, 0);	/* A(2,2)=0 */
-  emit_byte(cinfo, 0);		/* F3: CENTER3=0, NORM3=0 */
-  emit_2bytes(cinfo, 1);	/* A(3,1)=1 */
-  emit_2bytes(cinfo, 0);	/* A(3,2)=0 */
-}
-
-
-LOCAL(void)
-emit_sof (j_compress_ptr cinfo, JPEG_MARKER code)
-/* Emit a SOF marker */
-{
-  int ci;
-  jpeg_component_info *compptr;
-  
-  emit_marker(cinfo, code);
-  
-  emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */
-
-  /* Make sure image isn't bigger than SOF field can handle */
-  if ((long) cinfo->jpeg_height > 65535L ||
-      (long) cinfo->jpeg_width > 65535L)
-    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) 65535);
-
-  emit_byte(cinfo, cinfo->data_precision);
-  emit_2bytes(cinfo, (int) cinfo->jpeg_height);
-  emit_2bytes(cinfo, (int) cinfo->jpeg_width);
-
-  emit_byte(cinfo, cinfo->num_components);
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    emit_byte(cinfo, compptr->component_id);
-    emit_byte(cinfo, (compptr->h_samp_factor << 4) + compptr->v_samp_factor);
-    emit_byte(cinfo, compptr->quant_tbl_no);
-  }
-}
-
-
-LOCAL(void)
-emit_sos (j_compress_ptr cinfo)
-/* Emit a SOS marker */
-{
-  int i, td, ta;
-  jpeg_component_info *compptr;
-  
-  emit_marker(cinfo, M_SOS);
-  
-  emit_2bytes(cinfo, 2 * cinfo->comps_in_scan + 2 + 1 + 3); /* length */
-  
-  emit_byte(cinfo, cinfo->comps_in_scan);
-  
-  for (i = 0; i < cinfo->comps_in_scan; i++) {
-    compptr = cinfo->cur_comp_info[i];
-    emit_byte(cinfo, compptr->component_id);
-
-    /* We emit 0 for unused field(s); this is recommended by the P&M text
-     * but does not seem to be specified in the standard.
-     */
-
-    /* DC needs no table for refinement scan */
-    td = cinfo->Ss == 0 && cinfo->Ah == 0 ? compptr->dc_tbl_no : 0;
-    /* AC needs no table when not present */
-    ta = cinfo->Se ? compptr->ac_tbl_no : 0;
-
-    emit_byte(cinfo, (td << 4) + ta);
-  }
-
-  emit_byte(cinfo, cinfo->Ss);
-  emit_byte(cinfo, cinfo->Se);
-  emit_byte(cinfo, (cinfo->Ah << 4) + cinfo->Al);
-}
-
-
-LOCAL(void)
-emit_pseudo_sos (j_compress_ptr cinfo)
-/* Emit a pseudo SOS marker */
-{
-  emit_marker(cinfo, M_SOS);
-  
-  emit_2bytes(cinfo, 2 + 1 + 3); /* length */
-  
-  emit_byte(cinfo, 0); /* Ns */
-
-  emit_byte(cinfo, 0); /* Ss */
-  emit_byte(cinfo, cinfo->block_size * cinfo->block_size - 1); /* Se */
-  emit_byte(cinfo, 0); /* Ah/Al */
-}
-
-
-LOCAL(void)
-emit_jfif_app0 (j_compress_ptr cinfo)
-/* Emit a JFIF-compliant APP0 marker */
-{
-  /*
-   * Length of APP0 block	(2 bytes)
-   * Block ID			(4 bytes - ASCII "JFIF")
-   * Zero byte			(1 byte to terminate the ID string)
-   * Version Major, Minor	(2 bytes - major first)
-   * Units			(1 byte - 0x00 = none, 0x01 = inch, 0x02 = cm)
-   * Xdpu			(2 bytes - dots per unit horizontal)
-   * Ydpu			(2 bytes - dots per unit vertical)
-   * Thumbnail X size		(1 byte)
-   * Thumbnail Y size		(1 byte)
-   */
-  
-  emit_marker(cinfo, M_APP0);
-  
-  emit_2bytes(cinfo, 2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1); /* length */
-
-  emit_byte(cinfo, 0x4A);	/* Identifier: ASCII "JFIF" */
-  emit_byte(cinfo, 0x46);
-  emit_byte(cinfo, 0x49);
-  emit_byte(cinfo, 0x46);
-  emit_byte(cinfo, 0);
-  emit_byte(cinfo, cinfo->JFIF_major_version); /* Version fields */
-  emit_byte(cinfo, cinfo->JFIF_minor_version);
-  emit_byte(cinfo, cinfo->density_unit); /* Pixel size information */
-  emit_2bytes(cinfo, (int) cinfo->X_density);
-  emit_2bytes(cinfo, (int) cinfo->Y_density);
-  emit_byte(cinfo, 0);		/* No thumbnail image */
-  emit_byte(cinfo, 0);
-}
-
-
-LOCAL(void)
-emit_adobe_app14 (j_compress_ptr cinfo)
-/* Emit an Adobe APP14 marker */
-{
-  /*
-   * Length of APP14 block	(2 bytes)
-   * Block ID			(5 bytes - ASCII "Adobe")
-   * Version Number		(2 bytes - currently 100)
-   * Flags0			(2 bytes - currently 0)
-   * Flags1			(2 bytes - currently 0)
-   * Color transform		(1 byte)
-   *
-   * Although Adobe TN 5116 mentions Version = 101, all the Adobe files
-   * now in circulation seem to use Version = 100, so that's what we write.
-   *
-   * We write the color transform byte as 1 if the JPEG color space is
-   * YCbCr, 2 if it's YCCK, 0 otherwise.  Adobe's definition has to do with
-   * whether the encoder performed a transformation, which is pretty useless.
-   */
-  
-  emit_marker(cinfo, M_APP14);
-  
-  emit_2bytes(cinfo, 2 + 5 + 2 + 2 + 2 + 1); /* length */
-
-  emit_byte(cinfo, 0x41);	/* Identifier: ASCII "Adobe" */
-  emit_byte(cinfo, 0x64);
-  emit_byte(cinfo, 0x6F);
-  emit_byte(cinfo, 0x62);
-  emit_byte(cinfo, 0x65);
-  emit_2bytes(cinfo, 100);	/* Version */
-  emit_2bytes(cinfo, 0);	/* Flags0 */
-  emit_2bytes(cinfo, 0);	/* Flags1 */
-  switch (cinfo->jpeg_color_space) {
-  case JCS_YCbCr:
-    emit_byte(cinfo, 1);	/* Color transform = 1 */
-    break;
-  case JCS_YCCK:
-    emit_byte(cinfo, 2);	/* Color transform = 2 */
-    break;
-  default:
-    emit_byte(cinfo, 0);	/* Color transform = 0 */
-  }
-}
-
-
-/*
- * These routines allow writing an arbitrary marker with parameters.
- * The only intended use is to emit COM or APPn markers after calling
- * write_file_header and before calling write_frame_header.
- * Other uses are not guaranteed to produce desirable results.
- * Counting the parameter bytes properly is the caller's responsibility.
- */
-
-METHODDEF(void)
-write_marker_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
-/* Emit an arbitrary marker header */
-{
-  if (datalen > (unsigned int) 65533)		/* safety check */
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  emit_marker(cinfo, (JPEG_MARKER) marker);
-
-  emit_2bytes(cinfo, (int) (datalen + 2));	/* total length */
-}
-
-METHODDEF(void)
-write_marker_byte (j_compress_ptr cinfo, int val)
-/* Emit one byte of marker parameters following write_marker_header */
-{
-  emit_byte(cinfo, val);
-}
-
-
-/*
- * Write datastream header.
- * This consists of an SOI and optional APPn markers.
- * We recommend use of the JFIF marker, but not the Adobe marker,
- * when using YCbCr or grayscale data.  The JFIF marker is also used
- * for other standard JPEG colorspaces.  The Adobe marker is helpful
- * to distinguish RGB, CMYK, and YCCK colorspaces.
- * Note that an application can write additional header markers after
- * jpeg_start_compress returns.
- */
-
-METHODDEF(void)
-write_file_header (j_compress_ptr cinfo)
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-
-  emit_marker(cinfo, M_SOI);	/* first the SOI */
-
-  /* SOI is defined to reset restart interval to 0 */
-  marker->last_restart_interval = 0;
-
-  if (cinfo->write_JFIF_header)	/* next an optional JFIF APP0 */
-    emit_jfif_app0(cinfo);
-  if (cinfo->write_Adobe_marker) /* next an optional Adobe APP14 */
-    emit_adobe_app14(cinfo);
-}
-
-
-/*
- * Write frame header.
- * This consists of DQT and SOFn markers,
- * a conditional LSE marker and a conditional pseudo SOS marker.
- * Note that we do not emit the SOF until we have emitted the DQT(s).
- * This avoids compatibility problems with incorrect implementations that
- * try to error-check the quant table numbers as soon as they see the SOF.
- */
-
-METHODDEF(void)
-write_frame_header (j_compress_ptr cinfo)
-{
-  int ci, prec;
-  boolean is_baseline;
-  jpeg_component_info *compptr;
-  
-  /* Emit DQT for each quantization table.
-   * Note that emit_dqt() suppresses any duplicate tables.
-   */
-  prec = 0;
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    prec += emit_dqt(cinfo, compptr->quant_tbl_no);
-  }
-  /* now prec is nonzero iff there are any 16-bit quant tables. */
-
-  /* Check for a non-baseline specification.
-   * Note we assume that Huffman table numbers won't be changed later.
-   */
-  if (cinfo->arith_code || cinfo->progressive_mode ||
-      cinfo->data_precision != 8 || cinfo->block_size != DCTSIZE) {
-    is_baseline = FALSE;
-  } else {
-    is_baseline = TRUE;
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      if (compptr->dc_tbl_no > 1 || compptr->ac_tbl_no > 1)
-	is_baseline = FALSE;
-    }
-    if (prec && is_baseline) {
-      is_baseline = FALSE;
-      /* If it's baseline except for quantizer size, warn the user */
-      TRACEMS(cinfo, 0, JTRC_16BIT_TABLES);
-    }
-  }
-
-  /* Emit the proper SOF marker */
-  if (cinfo->arith_code) {
-    if (cinfo->progressive_mode)
-      emit_sof(cinfo, M_SOF10); /* SOF code for progressive arithmetic */
-    else
-      emit_sof(cinfo, M_SOF9);  /* SOF code for sequential arithmetic */
-  } else {
-    if (cinfo->progressive_mode)
-      emit_sof(cinfo, M_SOF2);	/* SOF code for progressive Huffman */
-    else if (is_baseline)
-      emit_sof(cinfo, M_SOF0);	/* SOF code for baseline implementation */
-    else
-      emit_sof(cinfo, M_SOF1);	/* SOF code for non-baseline Huffman file */
-  }
-
-  /* Check to emit LSE inverse color transform specification marker */
-  if (cinfo->color_transform)
-    emit_lse_ict(cinfo);
-
-  /* Check to emit pseudo SOS marker */
-  if (cinfo->progressive_mode && cinfo->block_size != DCTSIZE)
-    emit_pseudo_sos(cinfo);
-}
-
-
-/*
- * Write scan header.
- * This consists of DHT or DAC markers, optional DRI, and SOS.
- * Compressed data will be written following the SOS.
- */
-
-METHODDEF(void)
-write_scan_header (j_compress_ptr cinfo)
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-  int i;
-  jpeg_component_info *compptr;
-
-  if (cinfo->arith_code) {
-    /* Emit arith conditioning info.  We may have some duplication
-     * if the file has multiple scans, but it's so small it's hardly
-     * worth worrying about.
-     */
-    emit_dac(cinfo);
-  } else {
-    /* Emit Huffman tables.
-     * Note that emit_dht() suppresses any duplicate tables.
-     */
-    for (i = 0; i < cinfo->comps_in_scan; i++) {
-      compptr = cinfo->cur_comp_info[i];
-      /* DC needs no table for refinement scan */
-      if (cinfo->Ss == 0 && cinfo->Ah == 0)
-	emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
-      /* AC needs no table when not present */
-      if (cinfo->Se)
-	emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
-    }
-  }
-
-  /* Emit DRI if required --- note that DRI value could change for each scan.
-   * We avoid wasting space with unnecessary DRIs, however.
-   */
-  if (cinfo->restart_interval != marker->last_restart_interval) {
-    emit_dri(cinfo);
-    marker->last_restart_interval = cinfo->restart_interval;
-  }
-
-  emit_sos(cinfo);
-}
-
-
-/*
- * Write datastream trailer.
- */
-
-METHODDEF(void)
-write_file_trailer (j_compress_ptr cinfo)
-{
-  emit_marker(cinfo, M_EOI);
-}
-
-
-/*
- * Write an abbreviated table-specification datastream.
- * This consists of SOI, DQT and DHT tables, and EOI.
- * Any table that is defined and not marked sent_table = TRUE will be
- * emitted.  Note that all tables will be marked sent_table = TRUE at exit.
- */
-
-METHODDEF(void)
-write_tables_only (j_compress_ptr cinfo)
-{
-  int i;
-
-  emit_marker(cinfo, M_SOI);
-
-  for (i = 0; i < NUM_QUANT_TBLS; i++) {
-    if (cinfo->quant_tbl_ptrs[i] != NULL)
-      (void) emit_dqt(cinfo, i);
-  }
-
-  if (! cinfo->arith_code) {
-    for (i = 0; i < NUM_HUFF_TBLS; i++) {
-      if (cinfo->dc_huff_tbl_ptrs[i] != NULL)
-	emit_dht(cinfo, i, FALSE);
-      if (cinfo->ac_huff_tbl_ptrs[i] != NULL)
-	emit_dht(cinfo, i, TRUE);
-    }
-  }
-
-  emit_marker(cinfo, M_EOI);
-}
-
-
-/*
- * Initialize the marker writer module.
- */
-
-GLOBAL(void)
-jinit_marker_writer (j_compress_ptr cinfo)
-{
-  my_marker_ptr marker;
-
-  /* Create the subobject */
-  marker = (my_marker_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_marker_writer));
-  cinfo->marker = &marker->pub;
-  /* Initialize method pointers */
-  marker->pub.write_file_header = write_file_header;
-  marker->pub.write_frame_header = write_frame_header;
-  marker->pub.write_scan_header = write_scan_header;
-  marker->pub.write_file_trailer = write_file_trailer;
-  marker->pub.write_tables_only = write_tables_only;
-  marker->pub.write_marker_header = write_marker_header;
-  marker->pub.write_marker_byte = write_marker_byte;
-  /* Initialize private state */
-  marker->last_restart_interval = 0;
-}
diff --git a/cmake/externals/libjpeg/jcmaster.c b/cmake/externals/libjpeg/jcmaster.c
deleted file mode 100644
index a70af0c02..000000000
--- a/cmake/externals/libjpeg/jcmaster.c
+++ /dev/null
@@ -1,675 +0,0 @@
-/*
- * jcmaster.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2003-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains master control logic for the JPEG compressor.
- * These routines are concerned with parameter validation, initial setup,
- * and inter-pass control (determining the number of passes and the work 
- * to be done in each pass).
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Private state */
-
-typedef enum {
-	main_pass,		/* input data, also do first output step */
-	huff_opt_pass,		/* Huffman code optimization pass */
-	output_pass		/* data output pass */
-} c_pass_type;
-
-typedef struct {
-  struct jpeg_comp_master pub;	/* public fields */
-
-  c_pass_type pass_type;	/* the type of the current pass */
-
-  int pass_number;		/* # of passes completed */
-  int total_passes;		/* total # of passes needed */
-
-  int scan_number;		/* current index in scan_info[] */
-} my_comp_master;
-
-typedef my_comp_master * my_master_ptr;
-
-
-/*
- * Support routines that do various essential calculations.
- */
-
-LOCAL(void)
-initial_setup (j_compress_ptr cinfo)
-/* Do computations that are needed before master selection phase */
-{
-  int ci, ssize;
-  jpeg_component_info *compptr;
-
-  /* Sanity check on block_size */
-  if (cinfo->block_size < 1 || cinfo->block_size > 16)
-    ERREXIT2(cinfo, JERR_BAD_DCTSIZE, cinfo->block_size, cinfo->block_size);
-
-  /* Derive natural_order from block_size */
-  switch (cinfo->block_size) {
-  case 2: cinfo->natural_order = jpeg_natural_order2; break;
-  case 3: cinfo->natural_order = jpeg_natural_order3; break;
-  case 4: cinfo->natural_order = jpeg_natural_order4; break;
-  case 5: cinfo->natural_order = jpeg_natural_order5; break;
-  case 6: cinfo->natural_order = jpeg_natural_order6; break;
-  case 7: cinfo->natural_order = jpeg_natural_order7; break;
-  default: cinfo->natural_order = jpeg_natural_order;
-  }
-
-  /* Derive lim_Se from block_size */
-  cinfo->lim_Se = cinfo->block_size < DCTSIZE ?
-    cinfo->block_size * cinfo->block_size - 1 : DCTSIZE2-1;
-
-  /* Sanity check on image dimensions */
-  if (cinfo->jpeg_height <= 0 || cinfo->jpeg_width <= 0 ||
-      cinfo->num_components <= 0)
-    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
-
-  /* Make sure image isn't bigger than I can handle */
-  if ((long) cinfo->jpeg_height > (long) JPEG_MAX_DIMENSION ||
-      (long) cinfo->jpeg_width > (long) JPEG_MAX_DIMENSION)
-    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
-
-  /* Only 8 to 12 bits data precision are supported for DCT based JPEG */
-  if (cinfo->data_precision < 8 || cinfo->data_precision > 12)
-    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
-
-  /* Check that number of components won't exceed internal array sizes */
-  if (cinfo->num_components > MAX_COMPONENTS)
-    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	     MAX_COMPONENTS);
-
-  /* Compute maximum sampling factors; check factor validity */
-  cinfo->max_h_samp_factor = 1;
-  cinfo->max_v_samp_factor = 1;
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
-	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
-      ERREXIT(cinfo, JERR_BAD_SAMPLING);
-    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
-				   compptr->h_samp_factor);
-    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
-				   compptr->v_samp_factor);
-  }
-
-  /* Compute dimensions of components */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Fill in the correct component_index value; don't rely on application */
-    compptr->component_index = ci;
-    /* In selecting the actual DCT scaling for each component, we try to
-     * scale down the chroma components via DCT scaling rather than downsampling.
-     * This saves time if the downsampler gets to use 1:1 scaling.
-     * Note this code adapts subsampling ratios which are powers of 2.
-     */
-    ssize = 1;
-#ifdef DCT_SCALING_SUPPORTED
-    if (! cinfo->raw_data_in)
-      while (cinfo->min_DCT_h_scaled_size * ssize <=
-	     (cinfo->do_fancy_downsampling ? DCTSIZE : DCTSIZE / 2) &&
-	     (cinfo->max_h_samp_factor % (compptr->h_samp_factor * ssize * 2)) ==
-	     0) {
-	ssize = ssize * 2;
-      }
-#endif
-    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size * ssize;
-    ssize = 1;
-#ifdef DCT_SCALING_SUPPORTED
-    if (! cinfo->raw_data_in)
-      while (cinfo->min_DCT_v_scaled_size * ssize <=
-	     (cinfo->do_fancy_downsampling ? DCTSIZE : DCTSIZE / 2) &&
-	     (cinfo->max_v_samp_factor % (compptr->v_samp_factor * ssize * 2)) ==
-	     0) {
-	ssize = ssize * 2;
-      }
-#endif
-    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size * ssize;
-
-    /* We don't support DCT ratios larger than 2. */
-    if (compptr->DCT_h_scaled_size > compptr->DCT_v_scaled_size * 2)
-	compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size * 2;
-    else if (compptr->DCT_v_scaled_size > compptr->DCT_h_scaled_size * 2)
-	compptr->DCT_v_scaled_size = compptr->DCT_h_scaled_size * 2;
-
-    /* Size in DCT blocks */
-    compptr->width_in_blocks = (JDIMENSION)
-      jdiv_round_up((long) cinfo->jpeg_width * (long) compptr->h_samp_factor,
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    compptr->height_in_blocks = (JDIMENSION)
-      jdiv_round_up((long) cinfo->jpeg_height * (long) compptr->v_samp_factor,
-		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-    /* Size in samples */
-    compptr->downsampled_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->jpeg_width *
-		    (long) (compptr->h_samp_factor * compptr->DCT_h_scaled_size),
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    compptr->downsampled_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->jpeg_height *
-		    (long) (compptr->v_samp_factor * compptr->DCT_v_scaled_size),
-		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-    /* Don't need quantization scale after DCT,
-     * until color conversion says otherwise.
-     */
-    compptr->component_needed = FALSE;
-  }
-
-  /* Compute number of fully interleaved MCU rows (number of times that
-   * main controller will call coefficient controller).
-   */
-  cinfo->total_iMCU_rows = (JDIMENSION)
-    jdiv_round_up((long) cinfo->jpeg_height,
-		  (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-}
-
-
-#ifdef C_MULTISCAN_FILES_SUPPORTED
-
-LOCAL(void)
-validate_script (j_compress_ptr cinfo)
-/* Verify that the scan script in cinfo->scan_info[] is valid; also
- * determine whether it uses progressive JPEG, and set cinfo->progressive_mode.
- */
-{
-  const jpeg_scan_info * scanptr;
-  int scanno, ncomps, ci, coefi, thisi;
-  int Ss, Se, Ah, Al;
-  boolean component_sent[MAX_COMPONENTS];
-#ifdef C_PROGRESSIVE_SUPPORTED
-  int * last_bitpos_ptr;
-  int last_bitpos[MAX_COMPONENTS][DCTSIZE2];
-  /* -1 until that coefficient has been seen; then last Al for it */
-#endif
-
-  if (cinfo->num_scans <= 0)
-    ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, 0);
-
-  /* For sequential JPEG, all scans must have Ss=0, Se=DCTSIZE2-1;
-   * for progressive JPEG, no scan can have this.
-   */
-  scanptr = cinfo->scan_info;
-  if (scanptr->Ss != 0 || scanptr->Se != DCTSIZE2-1) {
-#ifdef C_PROGRESSIVE_SUPPORTED
-    cinfo->progressive_mode = TRUE;
-    last_bitpos_ptr = & last_bitpos[0][0];
-    for (ci = 0; ci < cinfo->num_components; ci++) 
-      for (coefi = 0; coefi < DCTSIZE2; coefi++)
-	*last_bitpos_ptr++ = -1;
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-  } else {
-    cinfo->progressive_mode = FALSE;
-    for (ci = 0; ci < cinfo->num_components; ci++) 
-      component_sent[ci] = FALSE;
-  }
-
-  for (scanno = 1; scanno <= cinfo->num_scans; scanptr++, scanno++) {
-    /* Validate component indexes */
-    ncomps = scanptr->comps_in_scan;
-    if (ncomps <= 0 || ncomps > MAX_COMPS_IN_SCAN)
-      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, ncomps, MAX_COMPS_IN_SCAN);
-    for (ci = 0; ci < ncomps; ci++) {
-      thisi = scanptr->component_index[ci];
-      if (thisi < 0 || thisi >= cinfo->num_components)
-	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
-      /* Components must appear in SOF order within each scan */
-      if (ci > 0 && thisi <= scanptr->component_index[ci-1])
-	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
-    }
-    /* Validate progression parameters */
-    Ss = scanptr->Ss;
-    Se = scanptr->Se;
-    Ah = scanptr->Ah;
-    Al = scanptr->Al;
-    if (cinfo->progressive_mode) {
-#ifdef C_PROGRESSIVE_SUPPORTED
-      /* The JPEG spec simply gives the ranges 0..13 for Ah and Al, but that
-       * seems wrong: the upper bound ought to depend on data precision.
-       * Perhaps they really meant 0..N+1 for N-bit precision.
-       * Here we allow 0..10 for 8-bit data; Al larger than 10 results in
-       * out-of-range reconstructed DC values during the first DC scan,
-       * which might cause problems for some decoders.
-       */
-      if (Ss < 0 || Ss >= DCTSIZE2 || Se < Ss || Se >= DCTSIZE2 ||
-	  Ah < 0 || Ah > (cinfo->data_precision > 8 ? 13 : 10) ||
-	  Al < 0 || Al > (cinfo->data_precision > 8 ? 13 : 10))
-	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-      if (Ss == 0) {
-	if (Se != 0)		/* DC and AC together not OK */
-	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-      } else {
-	if (ncomps != 1)	/* AC scans must be for only one component */
-	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-      }
-      for (ci = 0; ci < ncomps; ci++) {
-	last_bitpos_ptr = & last_bitpos[scanptr->component_index[ci]][0];
-	if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */
-	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-	for (coefi = Ss; coefi <= Se; coefi++) {
-	  if (last_bitpos_ptr[coefi] < 0) {
-	    /* first scan of this coefficient */
-	    if (Ah != 0)
-	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-	  } else {
-	    /* not first scan */
-	    if (Ah != last_bitpos_ptr[coefi] || Al != Ah-1)
-	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-	  }
-	  last_bitpos_ptr[coefi] = Al;
-	}
-      }
-#endif
-    } else {
-      /* For sequential JPEG, all progression parameters must be these: */
-      if (Ss != 0 || Se != DCTSIZE2-1 || Ah != 0 || Al != 0)
-	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-      /* Make sure components are not sent twice */
-      for (ci = 0; ci < ncomps; ci++) {
-	thisi = scanptr->component_index[ci];
-	if (component_sent[thisi])
-	  ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
-	component_sent[thisi] = TRUE;
-      }
-    }
-  }
-
-  /* Now verify that everything got sent. */
-  if (cinfo->progressive_mode) {
-#ifdef C_PROGRESSIVE_SUPPORTED
-    /* For progressive mode, we only check that at least some DC data
-     * got sent for each component; the spec does not require that all bits
-     * of all coefficients be transmitted.  Would it be wiser to enforce
-     * transmission of all coefficient bits??
-     */
-    for (ci = 0; ci < cinfo->num_components; ci++) {
-      if (last_bitpos[ci][0] < 0)
-	ERREXIT(cinfo, JERR_MISSING_DATA);
-    }
-#endif
-  } else {
-    for (ci = 0; ci < cinfo->num_components; ci++) {
-      if (! component_sent[ci])
-	ERREXIT(cinfo, JERR_MISSING_DATA);
-    }
-  }
-}
-
-
-LOCAL(void)
-reduce_script (j_compress_ptr cinfo)
-/* Adapt scan script for use with reduced block size;
- * assume that script has been validated before.
- */
-{
-  jpeg_scan_info * scanptr;
-  int idxout, idxin;
-
-  /* Circumvent const declaration for this function */
-  scanptr = (jpeg_scan_info *) cinfo->scan_info;
-  idxout = 0;
-
-  for (idxin = 0; idxin < cinfo->num_scans; idxin++) {
-    /* After skipping, idxout becomes smaller than idxin */
-    if (idxin != idxout)
-      /* Copy rest of data;
-       * note we stay in given chunk of allocated memory.
-       */
-      scanptr[idxout] = scanptr[idxin];
-    if (scanptr[idxout].Ss > cinfo->lim_Se)
-      /* Entire scan out of range - skip this entry */
-      continue;
-    if (scanptr[idxout].Se > cinfo->lim_Se)
-      /* Limit scan to end of block */
-      scanptr[idxout].Se = cinfo->lim_Se;
-    idxout++;
-  }
-
-  cinfo->num_scans = idxout;
-}
-
-#endif /* C_MULTISCAN_FILES_SUPPORTED */
-
-
-LOCAL(void)
-select_scan_parameters (j_compress_ptr cinfo)
-/* Set up the scan parameters for the current scan */
-{
-  int ci;
-
-#ifdef C_MULTISCAN_FILES_SUPPORTED
-  if (cinfo->scan_info != NULL) {
-    /* Prepare for current scan --- the script is already validated */
-    my_master_ptr master = (my_master_ptr) cinfo->master;
-    const jpeg_scan_info * scanptr = cinfo->scan_info + master->scan_number;
-
-    cinfo->comps_in_scan = scanptr->comps_in_scan;
-    for (ci = 0; ci < scanptr->comps_in_scan; ci++) {
-      cinfo->cur_comp_info[ci] =
-	&cinfo->comp_info[scanptr->component_index[ci]];
-    }
-    if (cinfo->progressive_mode) {
-      cinfo->Ss = scanptr->Ss;
-      cinfo->Se = scanptr->Se;
-      cinfo->Ah = scanptr->Ah;
-      cinfo->Al = scanptr->Al;
-      return;
-    }
-  }
-  else
-#endif
-  {
-    /* Prepare for single sequential-JPEG scan containing all components */
-    if (cinfo->num_components > MAX_COMPS_IN_SCAN)
-      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	       MAX_COMPS_IN_SCAN);
-    cinfo->comps_in_scan = cinfo->num_components;
-    for (ci = 0; ci < cinfo->num_components; ci++) {
-      cinfo->cur_comp_info[ci] = &cinfo->comp_info[ci];
-    }
-  }
-  cinfo->Ss = 0;
-  cinfo->Se = cinfo->block_size * cinfo->block_size - 1;
-  cinfo->Ah = 0;
-  cinfo->Al = 0;
-}
-
-
-LOCAL(void)
-per_scan_setup (j_compress_ptr cinfo)
-/* Do computations that are needed before processing a JPEG scan */
-/* cinfo->comps_in_scan and cinfo->cur_comp_info[] are already set */
-{
-  int ci, mcublks, tmp;
-  jpeg_component_info *compptr;
-
-  if (cinfo->comps_in_scan == 1) {
-
-    /* Noninterleaved (single-component) scan */
-    compptr = cinfo->cur_comp_info[0];
-
-    /* Overall image size in MCUs */
-    cinfo->MCUs_per_row = compptr->width_in_blocks;
-    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
-
-    /* For noninterleaved scan, always one block per MCU */
-    compptr->MCU_width = 1;
-    compptr->MCU_height = 1;
-    compptr->MCU_blocks = 1;
-    compptr->MCU_sample_width = compptr->DCT_h_scaled_size;
-    compptr->last_col_width = 1;
-    /* For noninterleaved scans, it is convenient to define last_row_height
-     * as the number of block rows present in the last iMCU row.
-     */
-    tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
-    if (tmp == 0) tmp = compptr->v_samp_factor;
-    compptr->last_row_height = tmp;
-
-    /* Prepare array describing MCU composition */
-    cinfo->blocks_in_MCU = 1;
-    cinfo->MCU_membership[0] = 0;
-
-  } else {
-
-    /* Interleaved (multi-component) scan */
-    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
-      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
-	       MAX_COMPS_IN_SCAN);
-
-    /* Overall image size in MCUs */
-    cinfo->MCUs_per_row = (JDIMENSION)
-      jdiv_round_up((long) cinfo->jpeg_width,
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    cinfo->MCU_rows_in_scan = cinfo->total_iMCU_rows;
-
-    cinfo->blocks_in_MCU = 0;
-
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      compptr = cinfo->cur_comp_info[ci];
-      /* Sampling factors give # of blocks of component in each MCU */
-      compptr->MCU_width = compptr->h_samp_factor;
-      compptr->MCU_height = compptr->v_samp_factor;
-      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
-      compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_h_scaled_size;
-      /* Figure number of non-dummy blocks in last MCU column & row */
-      tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
-      if (tmp == 0) tmp = compptr->MCU_width;
-      compptr->last_col_width = tmp;
-      tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
-      if (tmp == 0) tmp = compptr->MCU_height;
-      compptr->last_row_height = tmp;
-      /* Prepare array describing MCU composition */
-      mcublks = compptr->MCU_blocks;
-      if (cinfo->blocks_in_MCU + mcublks > C_MAX_BLOCKS_IN_MCU)
-	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
-      while (mcublks-- > 0) {
-	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
-      }
-    }
-
-  }
-
-  /* Convert restart specified in rows to actual MCU count. */
-  /* Note that count must fit in 16 bits, so we provide limiting. */
-  if (cinfo->restart_in_rows > 0) {
-    long nominal = (long) cinfo->restart_in_rows * (long) cinfo->MCUs_per_row;
-    cinfo->restart_interval = (unsigned int) MIN(nominal, 65535L);
-  }
-}
-
-
-/*
- * Per-pass setup.
- * This is called at the beginning of each pass.  We determine which modules
- * will be active during this pass and give them appropriate start_pass calls.
- * We also set is_last_pass to indicate whether any more passes will be
- * required.
- */
-
-METHODDEF(void)
-prepare_for_pass (j_compress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-
-  switch (master->pass_type) {
-  case main_pass:
-    /* Initial pass: will collect input data, and do either Huffman
-     * optimization or data output for the first scan.
-     */
-    select_scan_parameters(cinfo);
-    per_scan_setup(cinfo);
-    if (! cinfo->raw_data_in) {
-      (*cinfo->cconvert->start_pass) (cinfo);
-      (*cinfo->downsample->start_pass) (cinfo);
-      (*cinfo->prep->start_pass) (cinfo, JBUF_PASS_THRU);
-    }
-    (*cinfo->fdct->start_pass) (cinfo);
-    (*cinfo->entropy->start_pass) (cinfo, cinfo->optimize_coding);
-    (*cinfo->coef->start_pass) (cinfo,
-				(master->total_passes > 1 ?
-				 JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
-    (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
-    if (cinfo->optimize_coding) {
-      /* No immediate data output; postpone writing frame/scan headers */
-      master->pub.call_pass_startup = FALSE;
-    } else {
-      /* Will write frame/scan headers at first jpeg_write_scanlines call */
-      master->pub.call_pass_startup = TRUE;
-    }
-    break;
-#ifdef ENTROPY_OPT_SUPPORTED
-  case huff_opt_pass:
-    /* Do Huffman optimization for a scan after the first one. */
-    select_scan_parameters(cinfo);
-    per_scan_setup(cinfo);
-    if (cinfo->Ss != 0 || cinfo->Ah == 0) {
-      (*cinfo->entropy->start_pass) (cinfo, TRUE);
-      (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
-      master->pub.call_pass_startup = FALSE;
-      break;
-    }
-    /* Special case: Huffman DC refinement scans need no Huffman table
-     * and therefore we can skip the optimization pass for them.
-     */
-    master->pass_type = output_pass;
-    master->pass_number++;
-    /*FALLTHROUGH*/
-#endif
-  case output_pass:
-    /* Do a data-output pass. */
-    /* We need not repeat per-scan setup if prior optimization pass did it. */
-    if (! cinfo->optimize_coding) {
-      select_scan_parameters(cinfo);
-      per_scan_setup(cinfo);
-    }
-    (*cinfo->entropy->start_pass) (cinfo, FALSE);
-    (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
-    /* We emit frame/scan headers now */
-    if (master->scan_number == 0)
-      (*cinfo->marker->write_frame_header) (cinfo);
-    (*cinfo->marker->write_scan_header) (cinfo);
-    master->pub.call_pass_startup = FALSE;
-    break;
-  default:
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-  }
-
-  master->pub.is_last_pass = (master->pass_number == master->total_passes-1);
-
-  /* Set up progress monitor's pass info if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->completed_passes = master->pass_number;
-    cinfo->progress->total_passes = master->total_passes;
-  }
-}
-
-
-/*
- * Special start-of-pass hook.
- * This is called by jpeg_write_scanlines if call_pass_startup is TRUE.
- * In single-pass processing, we need this hook because we don't want to
- * write frame/scan headers during jpeg_start_compress; we want to let the
- * application write COM markers etc. between jpeg_start_compress and the
- * jpeg_write_scanlines loop.
- * In multi-pass processing, this routine is not used.
- */
-
-METHODDEF(void)
-pass_startup (j_compress_ptr cinfo)
-{
-  cinfo->master->call_pass_startup = FALSE; /* reset flag so call only once */
-
-  (*cinfo->marker->write_frame_header) (cinfo);
-  (*cinfo->marker->write_scan_header) (cinfo);
-}
-
-
-/*
- * Finish up at end of pass.
- */
-
-METHODDEF(void)
-finish_pass_master (j_compress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-
-  /* The entropy coder always needs an end-of-pass call,
-   * either to analyze statistics or to flush its output buffer.
-   */
-  (*cinfo->entropy->finish_pass) (cinfo);
-
-  /* Update state for next pass */
-  switch (master->pass_type) {
-  case main_pass:
-    /* next pass is either output of scan 0 (after optimization)
-     * or output of scan 1 (if no optimization).
-     */
-    master->pass_type = output_pass;
-    if (! cinfo->optimize_coding)
-      master->scan_number++;
-    break;
-  case huff_opt_pass:
-    /* next pass is always output of current scan */
-    master->pass_type = output_pass;
-    break;
-  case output_pass:
-    /* next pass is either optimization or output of next scan */
-    if (cinfo->optimize_coding)
-      master->pass_type = huff_opt_pass;
-    master->scan_number++;
-    break;
-  }
-
-  master->pass_number++;
-}
-
-
-/*
- * Initialize master compression control.
- */
-
-GLOBAL(void)
-jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only)
-{
-  my_master_ptr master;
-
-  master = (my_master_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_comp_master));
-  cinfo->master = &master->pub;
-  master->pub.prepare_for_pass = prepare_for_pass;
-  master->pub.pass_startup = pass_startup;
-  master->pub.finish_pass = finish_pass_master;
-  master->pub.is_last_pass = FALSE;
-
-  /* Validate parameters, determine derived values */
-  initial_setup(cinfo);
-
-  if (cinfo->scan_info != NULL) {
-#ifdef C_MULTISCAN_FILES_SUPPORTED
-    validate_script(cinfo);
-    if (cinfo->block_size < DCTSIZE)
-      reduce_script(cinfo);
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-  } else {
-    cinfo->progressive_mode = FALSE;
-    cinfo->num_scans = 1;
-  }
-
-  if (cinfo->optimize_coding)
-    cinfo->arith_code = FALSE; /* disable arithmetic coding */
-  else if (! cinfo->arith_code &&
-	   (cinfo->progressive_mode ||
-	    (cinfo->block_size > 1 && cinfo->block_size < DCTSIZE)))
-    /* TEMPORARY HACK ??? */
-    /* assume default tables no good for progressive or reduced AC mode */
-    cinfo->optimize_coding = TRUE; /* force Huffman optimization */
-
-  /* Initialize my private state */
-  if (transcode_only) {
-    /* no main pass in transcoding */
-    if (cinfo->optimize_coding)
-      master->pass_type = huff_opt_pass;
-    else
-      master->pass_type = output_pass;
-  } else {
-    /* for normal compression, first pass is always this type: */
-    master->pass_type = main_pass;
-  }
-  master->scan_number = 0;
-  master->pass_number = 0;
-  if (cinfo->optimize_coding)
-    master->total_passes = cinfo->num_scans * 2;
-  else
-    master->total_passes = cinfo->num_scans;
-}
diff --git a/cmake/externals/libjpeg/jcomapi.c b/cmake/externals/libjpeg/jcomapi.c
deleted file mode 100644
index 678c5d12d..000000000
--- a/cmake/externals/libjpeg/jcomapi.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * jcomapi.c
- *
- * Copyright (C) 1994-1997, Thomas G. Lane.
- * Modified 2019 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains application interface routines that are used for both
- * compression and decompression.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Abort processing of a JPEG compression or decompression operation,
- * but don't destroy the object itself.
- *
- * For this, we merely clean up all the nonpermanent memory pools.
- * Note that temp files (virtual arrays) are not allowed to belong to
- * the permanent pool, so we will be able to close all temp files here.
- * Closing a data source or destination, if necessary, is the application's
- * responsibility.
- */
-
-GLOBAL(void)
-jpeg_abort (j_common_ptr cinfo)
-{
-  int pool;
-
-  /* Do nothing if called on a not-initialized or destroyed JPEG object. */
-  if (cinfo->mem == NULL)
-    return;
-
-  /* Releasing pools in reverse order might help avoid fragmentation
-   * with some (brain-damaged) malloc libraries.
-   */
-  for (pool = JPOOL_NUMPOOLS-1; pool > JPOOL_PERMANENT; pool--) {
-    (*cinfo->mem->free_pool) (cinfo, pool);
-  }
-
-  /* Reset overall state for possible reuse of object */
-  if (cinfo->is_decompressor) {
-    cinfo->global_state = DSTATE_START;
-    /* Try to keep application from accessing now-deleted marker list.
-     * A bit kludgy to do it here, but this is the most central place.
-     */
-    ((j_decompress_ptr) cinfo)->marker_list = NULL;
-  } else {
-    cinfo->global_state = CSTATE_START;
-  }
-}
-
-
-/*
- * Destruction of a JPEG object.
- *
- * Everything gets deallocated except the master jpeg_compress_struct itself
- * and the error manager struct.  Both of these are supplied by the application
- * and must be freed, if necessary, by the application.  (Often they are on
- * the stack and so don't need to be freed anyway.)
- * Closing a data source or destination, if necessary, is the application's
- * responsibility.
- */
-
-GLOBAL(void)
-jpeg_destroy (j_common_ptr cinfo)
-{
-  /* We need only tell the memory manager to release everything. */
-  /* NB: mem pointer is NULL if memory mgr failed to initialize. */
-  if (cinfo->mem != NULL)
-    (*cinfo->mem->self_destruct) (cinfo);
-  cinfo->mem = NULL;		/* be safe if jpeg_destroy is called twice */
-  cinfo->global_state = 0;	/* mark it destroyed */
-}
-
-
-/*
- * Convenience routines for allocating quantization and Huffman tables.
- * (Would jutils.c be a more reasonable place to put these?)
- */
-
-GLOBAL(JQUANT_TBL *)
-jpeg_alloc_quant_table (j_common_ptr cinfo)
-{
-  JQUANT_TBL *tbl;
-
-  tbl = (JQUANT_TBL *)
-    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JQUANT_TBL));
-  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
-  return tbl;
-}
-
-
-GLOBAL(JHUFF_TBL *)
-jpeg_alloc_huff_table (j_common_ptr cinfo)
-{
-  JHUFF_TBL *tbl;
-
-  tbl = (JHUFF_TBL *)
-    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JHUFF_TBL));
-  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
-  return tbl;
-}
-
-
-/*
- * Set up the standard Huffman tables (cf. JPEG standard section K.3).
- * IMPORTANT: these are only valid for 8-bit data precision!
- * (Would jutils.c be a more reasonable place to put this?)
- */
-
-GLOBAL(JHUFF_TBL *)
-jpeg_std_huff_table (j_common_ptr cinfo, boolean isDC, int tblno)
-{
-  JHUFF_TBL **htblptr, *htbl;
-  const UINT8 *bits, *val;
-  int nsymbols, len;
-
-  static const UINT8 bits_dc_luminance[17] =
-    { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
-  static const UINT8 val_dc_luminance[] =
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-
-  static const UINT8 bits_dc_chrominance[17] =
-    { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
-  static const UINT8 val_dc_chrominance[] =
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-
-  static const UINT8 bits_ac_luminance[17] =
-    { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
-  static const UINT8 val_ac_luminance[] =
-    { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
-      0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
-      0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
-      0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
-      0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
-      0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
-      0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
-      0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
-      0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
-      0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
-      0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
-      0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
-      0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
-      0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
-      0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
-      0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
-      0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
-      0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
-      0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
-      0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-      0xf9, 0xfa };
-
-  static const UINT8 bits_ac_chrominance[17] =
-    { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
-  static const UINT8 val_ac_chrominance[] =
-    { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
-      0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
-      0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
-      0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
-      0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
-      0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
-      0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
-      0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
-      0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
-      0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
-      0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
-      0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
-      0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
-      0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
-      0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
-      0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
-      0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
-      0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
-      0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
-      0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-      0xf9, 0xfa };
-
-  if (cinfo->is_decompressor) {
-    if (isDC)
-      htblptr = ((j_decompress_ptr) cinfo)->dc_huff_tbl_ptrs;
-    else
-      htblptr = ((j_decompress_ptr) cinfo)->ac_huff_tbl_ptrs;
-  } else {
-    if (isDC)
-      htblptr = ((j_compress_ptr) cinfo)->dc_huff_tbl_ptrs;
-    else
-      htblptr = ((j_compress_ptr) cinfo)->ac_huff_tbl_ptrs;
-  }
-
-  switch (tblno) {
-  case 0:
-    if (isDC) {
-      bits = bits_dc_luminance;
-      val = val_dc_luminance;
-    } else {
-      bits = bits_ac_luminance;
-      val = val_ac_luminance;
-    }
-    break;
-  case 1:
-    if (isDC) {
-      bits = bits_dc_chrominance;
-      val = val_dc_chrominance;
-    } else {
-      bits = bits_ac_chrominance;
-      val = val_ac_chrominance;
-    }
-    break;
-  default:
-    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
-    return NULL; /* avoid compiler warnings for uninitialized variables */
-  }
-
-  if (htblptr[tblno] == NULL)
-    htblptr[tblno] = jpeg_alloc_huff_table(cinfo);
-
-  htbl = htblptr[tblno];
-
-  /* Copy the number-of-symbols-of-each-code-length counts */
-  MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));
-
-  /* Validate the counts.  We do this here mainly so we can copy the right
-   * number of symbols from the val[] array, without risking marching off
-   * the end of memory.  jxhuff.c will do a more thorough test later.
-   */
-  nsymbols = 0;
-  for (len = 1; len <= 16; len++)
-    nsymbols += bits[len];
-  if (nsymbols > 256)
-    ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-
-  if (nsymbols > 0)
-    MEMCOPY(htbl->huffval, val, nsymbols * SIZEOF(UINT8));
-
-  /* Initialize sent_table FALSE so table will be written to JPEG file. */
-  htbl->sent_table = FALSE;
-
-  return htbl;
-}
diff --git a/cmake/externals/libjpeg/jconfig.h b/cmake/externals/libjpeg/jconfig.h
deleted file mode 100644
index 8d6c8f5b3..000000000
--- a/cmake/externals/libjpeg/jconfig.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* jconfig.vc --- jconfig.h for Microsoft Visual C++ on Windows 9x or NT.
- * This file also works for Borland/Embarcadero C++ for Win32 or Win64
- * (CLI: bcc32, bcc32c, bcc32x, bcc64; GUI IDE: C++Builder/RAD Studio).
- * See jconfig.txt for explanations.
- */
-
-#define HAVE_PROTOTYPES
-#define HAVE_UNSIGNED_CHAR
-#define HAVE_UNSIGNED_SHORT
-/* #define void char */
-/* #define const */
-#undef CHAR_IS_UNSIGNED
-#define HAVE_STDDEF_H
-#define HAVE_STDLIB_H
-#undef NEED_BSD_STRINGS
-#undef NEED_SYS_TYPES_H
-#undef NEED_FAR_POINTERS	/* we presume a 32-bit flat memory model */
-#undef NEED_SHORT_EXTERNAL_NAMES
-#undef INCOMPLETE_TYPES_BROKEN
-
-/* Define "boolean" as unsigned char, not enum, per Windows custom */
-#ifndef __RPCNDR_H__		/* don't conflict if rpcndr.h already read */
-typedef unsigned char boolean;
-#endif
-#ifndef FALSE			/* in case these macros already exist */
-#define FALSE	0		/* values of boolean */
-#endif
-#ifndef TRUE
-#define TRUE	1
-#endif
-#define HAVE_BOOLEAN		/* prevent jmorecfg.h from redefining it */
-
-/* Define custom RGB color order, prevent jmorecfg.h from redefinition */
-#undef JPEG_HAVE_RGB_CUSTOM
-/* Use Windows custom BGR color order defined in jmorecfg.h */
-#undef JPEG_USE_RGB_CUSTOM
-
-/* Define custom file I/O functions, prevent jinclude.h from redefinition */
-#undef JPEG_HAVE_FILE_IO_CUSTOM
-/* Use Delphi custom file I/O functions defined in jinclude.h */
-#undef JPEG_USE_FILE_IO_CUSTOM
-
-
-#ifdef JPEG_INTERNALS
-
-#undef RIGHT_SHIFT_IS_UNSIGNED
-
-#endif /* JPEG_INTERNALS */
-
-#ifdef JPEG_CJPEG_DJPEG
-
-#define BMP_SUPPORTED		/* BMP image file format */
-#define GIF_SUPPORTED		/* GIF image file format */
-#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
-#undef RLE_SUPPORTED		/* Utah RLE image file format */
-#define TARGA_SUPPORTED		/* Targa image file format */
-
-#define TWO_FILE_COMMANDLINE	/* optional */
-#define USE_SETMODE	/* Microsoft/Borland/Embarcadero have setmode() */
-#undef NEED_SIGNAL_CATCHER
-#undef DONT_USE_B_MODE
-#undef PROGRESS_REPORT		/* optional */
-
-#endif /* JPEG_CJPEG_DJPEG */
diff --git a/cmake/externals/libjpeg/jcparam.c b/cmake/externals/libjpeg/jcparam.c
deleted file mode 100644
index 261ae86ca..000000000
--- a/cmake/externals/libjpeg/jcparam.c
+++ /dev/null
@@ -1,591 +0,0 @@
-/*
- * jcparam.c
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2003-2022 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains optional default-setting code for the JPEG compressor.
- * Applications do not have to use this file, but those that don't use it
- * must know a lot more about the innards of the JPEG code.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Quantization table setup routines
- */
-
-GLOBAL(void)
-jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl,
-		      const unsigned int *basic_table,
-		      int scale_factor, boolean force_baseline)
-/* Define a quantization table equal to the basic_table times
- * a scale factor (given as a percentage).
- * If force_baseline is TRUE, the computed quantization table entries
- * are limited to 1..255 for JPEG baseline compatibility.
- */
-{
-  JQUANT_TBL ** qtblptr;
-  int i;
-  long temp;
-
-  /* Safety check to ensure start_compress not called yet. */
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  if (which_tbl < 0 || which_tbl >= NUM_QUANT_TBLS)
-    ERREXIT1(cinfo, JERR_DQT_INDEX, which_tbl);
-
-  qtblptr = & cinfo->quant_tbl_ptrs[which_tbl];
-
-  if (*qtblptr == NULL)
-    *qtblptr = jpeg_alloc_quant_table((j_common_ptr) cinfo);
-
-  for (i = 0; i < DCTSIZE2; i++) {
-    temp = ((long) basic_table[i] * scale_factor + 50L) / 100L;
-    /* limit the values to the valid range */
-    if (temp <= 0L) temp = 1L;
-    if (temp > 32767L) temp = 32767L; /* max quantizer needed for 12 bits */
-    if (force_baseline && temp > 255L)
-      temp = 255L;		/* limit to baseline range if requested */
-    (*qtblptr)->quantval[i] = (UINT16) temp;
-  }
-
-  /* Initialize sent_table FALSE so table will be written to JPEG file. */
-  (*qtblptr)->sent_table = FALSE;
-}
-
-
-/* These are the sample quantization tables given in JPEG spec section K.1.
- * NOTE: chrominance DC value is changed from 17 to 16 for lossless support.
- * The spec says that the values given produce "good" quality,
- * and when divided by 2, "very good" quality.
- */
-static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
-  16,  11,  10,  16,  24,  40,  51,  61,
-  12,  12,  14,  19,  26,  58,  60,  55,
-  14,  13,  16,  24,  40,  57,  69,  56,
-  14,  17,  22,  29,  51,  87,  80,  62,
-  18,  22,  37,  56,  68, 109, 103,  77,
-  24,  35,  55,  64,  81, 104, 113,  92,
-  49,  64,  78,  87, 103, 121, 120, 101,
-  72,  92,  95,  98, 112, 100, 103,  99
-};
-static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
-  16,  18,  24,  47,  99,  99,  99,  99,
-  18,  21,  26,  66,  99,  99,  99,  99,
-  24,  26,  56,  99,  99,  99,  99,  99,
-  47,  66,  99,  99,  99,  99,  99,  99,
-  99,  99,  99,  99,  99,  99,  99,  99,
-  99,  99,  99,  99,  99,  99,  99,  99,
-  99,  99,  99,  99,  99,  99,  99,  99,
-  99,  99,  99,  99,  99,  99,  99,  99
-};
-
-
-GLOBAL(void)
-jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline)
-/* Set or change the 'quality' (quantization) setting, using default tables
- * and straight percentage-scaling quality scales.
- * This entry point allows different scalings for luminance and chrominance.
- */
-{
-  /* Set up two quantization tables using the specified scaling */
-  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
-		       cinfo->q_scale_factor[0], force_baseline);
-  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
-		       cinfo->q_scale_factor[1], force_baseline);
-}
-
-
-GLOBAL(void)
-jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
-			 boolean force_baseline)
-/* Set or change the 'quality' (quantization) setting, using default tables
- * and a straight percentage-scaling quality scale.  In most cases it's better
- * to use jpeg_set_quality (below); this entry point is provided for
- * applications that insist on a linear percentage scaling.
- */
-{
-  /* Set up two quantization tables using the specified scaling */
-  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
-		       scale_factor, force_baseline);
-  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
-		       scale_factor, force_baseline);
-}
-
-
-GLOBAL(int)
-jpeg_quality_scaling (int quality)
-/* Convert a user-specified quality rating to a percentage scaling factor
- * for an underlying quantization table, using our recommended scaling curve.
- * The input 'quality' factor should be 0 (terrible) to 100 (very good).
- */
-{
-  /* Safety limit on quality factor.  Convert 0 to 1 to avoid zero divide. */
-  if (quality <= 0) quality = 1;
-  if (quality > 100) quality = 100;
-
-  /* The basic table is used as-is (scaling 100) for a quality of 50.
-   * Qualities 50..100 are converted to scaling percentage 200 - 2*Q;
-   * note that at Q=100 the scaling is 0, which will cause jpeg_add_quant_table
-   * to make all the table entries 1 (hence, minimum quantization loss).
-   * Qualities 1..50 are converted to scaling percentage 5000/Q.
-   */
-  if (quality < 50)
-    quality = 5000 / quality;
-  else
-    quality = 200 - quality*2;
-
-  return quality;
-}
-
-
-GLOBAL(void)
-jpeg_set_quality (j_compress_ptr cinfo, int quality, boolean force_baseline)
-/* Set or change the 'quality' (quantization) setting, using default tables.
- * This is the standard quality-adjusting entry point for typical user
- * interfaces; only those who want detailed control over quantization tables
- * would use the preceding routines directly.
- */
-{
-  /* Convert user 0-100 rating to percentage scaling */
-  quality = jpeg_quality_scaling(quality);
-
-  /* Set up standard quality tables */
-  jpeg_set_linear_quality(cinfo, quality, force_baseline);
-}
-
-
-/*
- * Reset standard Huffman tables
- */
-
-LOCAL(void)
-std_huff_tables (j_compress_ptr cinfo)
-{
-  if (cinfo->dc_huff_tbl_ptrs[0] != NULL)
-    (void) jpeg_std_huff_table((j_common_ptr) cinfo, TRUE, 0);
-
-  if (cinfo->ac_huff_tbl_ptrs[0] != NULL)
-    (void) jpeg_std_huff_table((j_common_ptr) cinfo, FALSE, 0);
-
-  if (cinfo->dc_huff_tbl_ptrs[1] != NULL)
-    (void) jpeg_std_huff_table((j_common_ptr) cinfo, TRUE, 1);
-
-  if (cinfo->ac_huff_tbl_ptrs[1] != NULL)
-    (void) jpeg_std_huff_table((j_common_ptr) cinfo, FALSE, 1);
-}
-
-
-/*
- * Default parameter setup for compression.
- *
- * Applications that don't choose to use this routine must do their
- * own setup of all these parameters.  Alternately, you can call this
- * to establish defaults and then alter parameters selectively.  This
- * is the recommended approach since, if we add any new parameters,
- * your code will still work (they'll be set to reasonable defaults).
- */
-
-GLOBAL(void)
-jpeg_set_defaults (j_compress_ptr cinfo)
-{
-  int i;
-
-  /* Safety check to ensure start_compress not called yet. */
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  /* Allocate comp_info array large enough for maximum component count.
-   * Array is made permanent in case application wants to compress
-   * multiple images at same param settings.
-   */
-  if (cinfo->comp_info == NULL)
-    cinfo->comp_info = (jpeg_component_info *)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  MAX_COMPONENTS * SIZEOF(jpeg_component_info));
-
-  /* Initialize everything not dependent on the color space */
-
-  cinfo->scale_num = 1;		/* 1:1 scaling */
-  cinfo->scale_denom = 1;
-  cinfo->data_precision = BITS_IN_JSAMPLE;
-  /* Set up two quantization tables using default quality of 75 */
-  jpeg_set_quality(cinfo, 75, TRUE);
-  /* Reset standard Huffman tables */
-  std_huff_tables(cinfo);
-
-  /* Initialize default arithmetic coding conditioning */
-  for (i = 0; i < NUM_ARITH_TBLS; i++) {
-    cinfo->arith_dc_L[i] = 0;
-    cinfo->arith_dc_U[i] = 1;
-    cinfo->arith_ac_K[i] = 5;
-  }
-
-  /* Default is no multiple-scan output */
-  cinfo->scan_info = NULL;
-  cinfo->num_scans = 0;
-
-  /* Expect normal source image, not raw downsampled data */
-  cinfo->raw_data_in = FALSE;
-
-  /* The standard Huffman tables are only valid for 8-bit data precision.
-   * If the precision is higher, use arithmetic coding.
-   * (Alternatively, using Huffman coding would be possible with forcing
-   * optimization on so that usable tables will be computed, or by
-   * supplying default tables that are valid for the desired precision.)
-   * Otherwise, use Huffman coding by default.
-   */
-  cinfo->arith_code = cinfo->data_precision > 8 ? TRUE : FALSE;
-
-  /* By default, don't do extra passes to optimize entropy coding */
-  cinfo->optimize_coding = FALSE;
-
-  /* By default, use the simpler non-cosited sampling alignment */
-  cinfo->CCIR601_sampling = FALSE;
-
-  /* By default, apply fancy downsampling */
-  cinfo->do_fancy_downsampling = TRUE;
-
-  /* No input smoothing */
-  cinfo->smoothing_factor = 0;
-
-  /* DCT algorithm preference */
-  cinfo->dct_method = JDCT_DEFAULT;
-
-  /* No restart markers */
-  cinfo->restart_interval = 0;
-  cinfo->restart_in_rows = 0;
-
-  /* Fill in default JFIF marker parameters.  Note that whether the marker
-   * will actually be written is determined by jpeg_set_colorspace.
-   *
-   * By default, the library emits JFIF version code 1.01.
-   * An application that wants to emit JFIF 1.02 extension markers should set
-   * JFIF_minor_version to 2.  We could probably get away with just defaulting
-   * to 1.02, but there may still be some decoders in use that will complain
-   * about that; saying 1.01 should minimize compatibility problems.
-   *
-   * For wide gamut colorspaces (BG_RGB and BG_YCC), the major version will be
-   * overridden by jpeg_set_colorspace and set to 2.
-   */
-  cinfo->JFIF_major_version = 1; /* Default JFIF version = 1.01 */
-  cinfo->JFIF_minor_version = 1;
-  cinfo->density_unit = 0;	/* Pixel size is unknown by default */
-  cinfo->X_density = 1;		/* Pixel aspect ratio is square by default */
-  cinfo->Y_density = 1;
-
-  /* No color transform */
-  cinfo->color_transform = JCT_NONE;
-
-  /* Choose JPEG colorspace based on input space, set defaults accordingly */
-
-  jpeg_default_colorspace(cinfo);
-}
-
-
-/*
- * Select an appropriate JPEG colorspace for in_color_space.
- */
-
-GLOBAL(void)
-jpeg_default_colorspace (j_compress_ptr cinfo)
-{
-  switch (cinfo->in_color_space) {
-  case JCS_UNKNOWN:
-    jpeg_set_colorspace(cinfo, JCS_UNKNOWN);
-    break;
-  case JCS_GRAYSCALE:
-    jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
-    break;
-  case JCS_RGB:
-    jpeg_set_colorspace(cinfo, JCS_YCbCr);
-    break;
-  case JCS_YCbCr:
-    jpeg_set_colorspace(cinfo, JCS_YCbCr);
-    break;
-  case JCS_CMYK:
-    jpeg_set_colorspace(cinfo, JCS_CMYK); /* By default, no translation */
-    break;
-  case JCS_YCCK:
-    jpeg_set_colorspace(cinfo, JCS_YCCK);
-    break;
-  case JCS_BG_RGB:
-    /* No translation for now -- conversion to BG_YCC not yet supportet */
-    jpeg_set_colorspace(cinfo, JCS_BG_RGB);
-    break;
-  case JCS_BG_YCC:
-    jpeg_set_colorspace(cinfo, JCS_BG_YCC);
-    break;
-  default:
-    ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
-  }
-}
-
-
-/*
- * Set the JPEG colorspace, and choose colorspace-dependent default values.
- */
-
-GLOBAL(void)
-jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace)
-{
-  jpeg_component_info * compptr;
-  int ci;
-
-#define SET_COMP(index,id,hsamp,vsamp,quant,dctbl,actbl)  \
-  (compptr = &cinfo->comp_info[index], \
-   compptr->component_id = (id), \
-   compptr->h_samp_factor = (hsamp), \
-   compptr->v_samp_factor = (vsamp), \
-   compptr->quant_tbl_no = (quant), \
-   compptr->dc_tbl_no = (dctbl), \
-   compptr->ac_tbl_no = (actbl) )
-
-  /* Safety check to ensure start_compress not called yet. */
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  /* For all colorspaces, we use Q and Huff tables 0 for luminance components,
-   * tables 1 for chrominance components.
-   */
-
-  cinfo->jpeg_color_space = colorspace;
-
-  cinfo->write_JFIF_header = FALSE; /* No marker for non-JFIF colorspaces */
-  cinfo->write_Adobe_marker = FALSE; /* write no Adobe marker by default */
-
-  switch (colorspace) {
-  case JCS_UNKNOWN:
-    cinfo->num_components = cinfo->input_components;
-    if (cinfo->num_components < 1 || cinfo->num_components > MAX_COMPONENTS)
-      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	       MAX_COMPONENTS);
-    for (ci = 0; ci < cinfo->num_components; ci++) {
-      SET_COMP(ci, ci, 1,1, 0, 0,0);
-    }
-    break;
-  case JCS_GRAYSCALE:
-    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
-    cinfo->num_components = 1;
-    /* JFIF specifies component ID 1 */
-    SET_COMP(0, 0x01, 1,1, 0, 0,0);
-    break;
-  case JCS_RGB:
-    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag RGB */
-    cinfo->num_components = 3;
-    SET_COMP(0, 0x52 /* 'R' */, 1,1,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
-    SET_COMP(1, 0x47 /* 'G' */, 1,1, 0, 0,0);
-    SET_COMP(2, 0x42 /* 'B' */, 1,1,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
-    break;
-  case JCS_YCbCr:
-    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
-    cinfo->num_components = 3;
-    /* JFIF specifies component IDs 1,2,3 */
-    /* We default to 2x2 subsamples of chrominance */
-    SET_COMP(0, 0x01, 2,2, 0, 0,0);
-    SET_COMP(1, 0x02, 1,1, 1, 1,1);
-    SET_COMP(2, 0x03, 1,1, 1, 1,1);
-    break;
-  case JCS_CMYK:
-    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag CMYK */
-    cinfo->num_components = 4;
-    SET_COMP(0, 0x43 /* 'C' */, 1,1, 0, 0,0);
-    SET_COMP(1, 0x4D /* 'M' */, 1,1, 0, 0,0);
-    SET_COMP(2, 0x59 /* 'Y' */, 1,1, 0, 0,0);
-    SET_COMP(3, 0x4B /* 'K' */, 1,1, 0, 0,0);
-    break;
-  case JCS_YCCK:
-    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag YCCK */
-    cinfo->num_components = 4;
-    SET_COMP(0, 0x01, 2,2, 0, 0,0);
-    SET_COMP(1, 0x02, 1,1, 1, 1,1);
-    SET_COMP(2, 0x03, 1,1, 1, 1,1);
-    SET_COMP(3, 0x04, 2,2, 0, 0,0);
-    break;
-  case JCS_BG_RGB:
-    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
-    cinfo->JFIF_major_version = 2;   /* Set JFIF major version = 2 */
-    cinfo->num_components = 3;
-    /* Add offset 0x20 to the normal R/G/B component IDs */
-    SET_COMP(0, 0x72 /* 'r' */, 1,1,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
-    SET_COMP(1, 0x67 /* 'g' */, 1,1, 0, 0,0);
-    SET_COMP(2, 0x62 /* 'b' */, 1,1,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
-		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
-    break;
-  case JCS_BG_YCC:
-    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
-    cinfo->JFIF_major_version = 2;   /* Set JFIF major version = 2 */
-    cinfo->num_components = 3;
-    /* Add offset 0x20 to the normal Cb/Cr component IDs */
-    /* We default to 2x2 subsamples of chrominance */
-    SET_COMP(0, 0x01, 2,2, 0, 0,0);
-    SET_COMP(1, 0x22, 1,1, 1, 1,1);
-    SET_COMP(2, 0x23, 1,1, 1, 1,1);
-    break;
-  default:
-    ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-  }
-}
-
-
-#ifdef C_PROGRESSIVE_SUPPORTED
-
-LOCAL(jpeg_scan_info *)
-fill_a_scan (jpeg_scan_info * scanptr, int ci,
-	     int Ss, int Se, int Ah, int Al)
-/* Support routine: generate one scan for specified component */
-{
-  scanptr->comps_in_scan = 1;
-  scanptr->component_index[0] = ci;
-  scanptr->Ss = Ss;
-  scanptr->Se = Se;
-  scanptr->Ah = Ah;
-  scanptr->Al = Al;
-  scanptr++;
-  return scanptr;
-}
-
-LOCAL(jpeg_scan_info *)
-fill_scans (jpeg_scan_info * scanptr, int ncomps,
-	    int Ss, int Se, int Ah, int Al)
-/* Support routine: generate one scan for each component */
-{
-  int ci;
-
-  for (ci = 0; ci < ncomps; ci++) {
-    scanptr->comps_in_scan = 1;
-    scanptr->component_index[0] = ci;
-    scanptr->Ss = Ss;
-    scanptr->Se = Se;
-    scanptr->Ah = Ah;
-    scanptr->Al = Al;
-    scanptr++;
-  }
-  return scanptr;
-}
-
-LOCAL(jpeg_scan_info *)
-fill_dc_scans (jpeg_scan_info * scanptr, int ncomps, int Ah, int Al)
-/* Support routine: generate interleaved DC scan if possible, else N scans */
-{
-  int ci;
-
-  if (ncomps <= MAX_COMPS_IN_SCAN) {
-    /* Single interleaved DC scan */
-    scanptr->comps_in_scan = ncomps;
-    for (ci = 0; ci < ncomps; ci++)
-      scanptr->component_index[ci] = ci;
-    scanptr->Ss = scanptr->Se = 0;
-    scanptr->Ah = Ah;
-    scanptr->Al = Al;
-    scanptr++;
-  } else {
-    /* Noninterleaved DC scan for each component */
-    scanptr = fill_scans(scanptr, ncomps, 0, 0, Ah, Al);
-  }
-  return scanptr;
-}
-
-
-/*
- * Create a recommended progressive-JPEG script.
- * cinfo->num_components and cinfo->jpeg_color_space must be correct.
- */
-
-GLOBAL(void)
-jpeg_simple_progression (j_compress_ptr cinfo)
-{
-  int ncomps = cinfo->num_components;
-  int nscans;
-  jpeg_scan_info * scanptr;
-
-  /* Safety check to ensure start_compress not called yet. */
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  /* Figure space needed for script.  Calculation must match code below! */
-  if (ncomps == 3 &&
-      (cinfo->jpeg_color_space == JCS_YCbCr ||
-       cinfo->jpeg_color_space == JCS_BG_YCC)) {
-    /* Custom script for YCC color images. */
-    nscans = 10;
-  } else {
-    /* All-purpose script for other color spaces. */
-    if (ncomps > MAX_COMPS_IN_SCAN)
-      nscans = 6 * ncomps;	/* 2 DC + 4 AC scans per component */
-    else
-      nscans = 2 + 4 * ncomps;	/* 2 DC scans; 4 AC scans per component */
-  }
-
-  /* Allocate space for script.
-   * We need to put it in the permanent pool in case the application performs
-   * multiple compressions without changing the settings.  To avoid a memory
-   * leak if jpeg_simple_progression is called repeatedly for the same JPEG
-   * object, we try to re-use previously allocated space, and we allocate
-   * enough space to handle YCC even if initially asked for grayscale.
-   */
-  if (cinfo->script_space == NULL || cinfo->script_space_size < nscans) {
-    cinfo->script_space_size = MAX(nscans, 10);
-    cinfo->script_space = (jpeg_scan_info *)
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-			cinfo->script_space_size * SIZEOF(jpeg_scan_info));
-  }
-  scanptr = cinfo->script_space;
-  cinfo->scan_info = scanptr;
-  cinfo->num_scans = nscans;
-
-  if (ncomps == 3 &&
-      (cinfo->jpeg_color_space == JCS_YCbCr ||
-       cinfo->jpeg_color_space == JCS_BG_YCC)) {
-    /* Custom script for YCC color images. */
-    /* Initial DC scan */
-    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
-    /* Initial AC scan: get some luma data out in a hurry */
-    scanptr = fill_a_scan(scanptr, 0, 1, 5, 0, 2);
-    /* Chroma data is too small to be worth expending many scans on */
-    scanptr = fill_a_scan(scanptr, 2, 1, 63, 0, 1);
-    scanptr = fill_a_scan(scanptr, 1, 1, 63, 0, 1);
-    /* Complete spectral selection for luma AC */
-    scanptr = fill_a_scan(scanptr, 0, 6, 63, 0, 2);
-    /* Refine next bit of luma AC */
-    scanptr = fill_a_scan(scanptr, 0, 1, 63, 2, 1);
-    /* Finish DC successive approximation */
-    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
-    /* Finish AC successive approximation */
-    scanptr = fill_a_scan(scanptr, 2, 1, 63, 1, 0);
-    scanptr = fill_a_scan(scanptr, 1, 1, 63, 1, 0);
-    /* Luma bottom bit comes last since it's usually largest scan */
-    scanptr = fill_a_scan(scanptr, 0, 1, 63, 1, 0);
-  } else {
-    /* All-purpose script for other color spaces. */
-    /* Successive approximation first pass */
-    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
-    scanptr = fill_scans(scanptr, ncomps, 1, 5, 0, 2);
-    scanptr = fill_scans(scanptr, ncomps, 6, 63, 0, 2);
-    /* Successive approximation second pass */
-    scanptr = fill_scans(scanptr, ncomps, 1, 63, 2, 1);
-    /* Successive approximation final pass */
-    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
-    scanptr = fill_scans(scanptr, ncomps, 1, 63, 1, 0);
-  }
-}
-
-#endif /* C_PROGRESSIVE_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jcprepct.c b/cmake/externals/libjpeg/jcprepct.c
deleted file mode 100644
index 586964bd4..000000000
--- a/cmake/externals/libjpeg/jcprepct.c
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- * jcprepct.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2003-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the compression preprocessing controller.
- * This controller manages the color conversion, downsampling,
- * and edge expansion steps.
- *
- * Most of the complexity here is associated with buffering input rows
- * as required by the downsampler.  See the comments at the head of
- * jcsample.c for the downsampler's needs.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* At present, jcsample.c can request context rows only for smoothing.
- * In the future, we might also need context rows for CCIR601 sampling
- * or other more-complex downsampling procedures.  The code to support
- * context rows should be compiled only if needed.
- */
-#ifdef INPUT_SMOOTHING_SUPPORTED
-#define CONTEXT_ROWS_SUPPORTED
-#endif
-
-
-/*
- * For the simple (no-context-row) case, we just need to buffer one
- * row group's worth of pixels for the downsampling step.  At the bottom of
- * the image, we pad to a full row group by replicating the last pixel row.
- * The downsampler's last output row is then replicated if needed to pad
- * out to a full iMCU row.
- *
- * When providing context rows, we must buffer three row groups' worth of
- * pixels.  Three row groups are physically allocated, but the row pointer
- * arrays are made five row groups high, with the extra pointers above and
- * below "wrapping around" to point to the last and first real row groups.
- * This allows the downsampler to access the proper context rows.
- * At the top and bottom of the image, we create dummy context rows by
- * copying the first or last real pixel row.  This copying could be avoided
- * by pointer hacking as is done in jdmainct.c, but it doesn't seem worth the
- * trouble on the compression side.
- */
-
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_c_prep_controller pub; /* public fields */
-
-  /* Downsampling input buffer.  This buffer holds color-converted data
-   * until we have enough to do a downsample step.
-   */
-  JSAMPARRAY color_buf[MAX_COMPONENTS];
-
-  JDIMENSION rows_to_go;	/* counts rows remaining in source image */
-  int next_buf_row;		/* index of next row to store in color_buf */
-
-#ifdef CONTEXT_ROWS_SUPPORTED	/* only needed for context case */
-  int this_row_group;		/* starting row index of group to process */
-  int next_buf_stop;		/* downsample when we reach this index */
-#endif
-} my_prep_controller;
-
-typedef my_prep_controller * my_prep_ptr;
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_prep (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
-
-  if (pass_mode != JBUF_PASS_THRU)
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-
-  /* Initialize total-height counter for detecting bottom of image */
-  prep->rows_to_go = cinfo->image_height;
-  /* Mark the conversion buffer empty */
-  prep->next_buf_row = 0;
-#ifdef CONTEXT_ROWS_SUPPORTED
-  /* Preset additional state variables for context mode.
-   * These aren't used in non-context mode, so we needn't test which mode.
-   */
-  prep->this_row_group = 0;
-  /* Set next_buf_stop to stop after two row groups have been read in. */
-  prep->next_buf_stop = 2 * cinfo->max_v_samp_factor;
-#endif
-}
-
-
-/*
- * Expand an image vertically from height input_rows to height output_rows,
- * by duplicating the bottom row.
- */
-
-LOCAL(void)
-expand_bottom_edge (JSAMPARRAY image_data, JDIMENSION num_cols,
-		    int input_rows, int output_rows)
-{
-  register int row;
-
-  for (row = input_rows; row < output_rows; row++) {
-    jcopy_sample_rows(image_data + input_rows - 1,
-		      image_data + row,
-		      1, num_cols);
-  }
-}
-
-
-/*
- * Process some data in the simple no-context case.
- *
- * Preprocessor output data is counted in "row groups".  A row group
- * is defined to be v_samp_factor sample rows of each component.
- * Downsampling will produce this much data from each max_v_samp_factor
- * input rows.
- */
-
-METHODDEF(void)
-pre_process_data (j_compress_ptr cinfo,
-		  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-		  JDIMENSION in_rows_avail,
-		  JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
-		  JDIMENSION out_row_groups_avail)
-{
-  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
-  int numrows, ci;
-  JDIMENSION inrows;
-  jpeg_component_info * compptr;
-
-  while (*in_row_ctr < in_rows_avail &&
-	 *out_row_group_ctr < out_row_groups_avail) {
-    /* Do color conversion to fill the conversion buffer. */
-    inrows = in_rows_avail - *in_row_ctr;
-    numrows = cinfo->max_v_samp_factor - prep->next_buf_row;
-    numrows = (int) MIN((JDIMENSION) numrows, inrows);
-    (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
-				       prep->color_buf,
-				       (JDIMENSION) prep->next_buf_row,
-				       numrows);
-    *in_row_ctr += numrows;
-    prep->next_buf_row += numrows;
-    prep->rows_to_go -= numrows;
-    /* If at bottom of image, pad to fill the conversion buffer. */
-    if (prep->rows_to_go == 0 &&
-	prep->next_buf_row < cinfo->max_v_samp_factor) {
-      for (ci = 0; ci < cinfo->num_components; ci++) {
-	expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
-			   prep->next_buf_row, cinfo->max_v_samp_factor);
-      }
-      prep->next_buf_row = cinfo->max_v_samp_factor;
-    }
-    /* If we've filled the conversion buffer, empty it. */
-    if (prep->next_buf_row == cinfo->max_v_samp_factor) {
-      (*cinfo->downsample->downsample) (cinfo,
-					prep->color_buf, (JDIMENSION) 0,
-					output_buf, *out_row_group_ctr);
-      prep->next_buf_row = 0;
-      (*out_row_group_ctr)++;
-    }
-    /* If at bottom of image, pad the output to a full iMCU height.
-     * Note we assume the caller is providing a one-iMCU-height output buffer!
-     */
-    if (prep->rows_to_go == 0 &&
-	*out_row_group_ctr < out_row_groups_avail) {
-      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	   ci++, compptr++) {
-	numrows = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-		  cinfo->min_DCT_v_scaled_size;
-	expand_bottom_edge(output_buf[ci],
-			   compptr->width_in_blocks * compptr->DCT_h_scaled_size,
-			   (int) (*out_row_group_ctr * numrows),
-			   (int) (out_row_groups_avail * numrows));
-      }
-      *out_row_group_ctr = out_row_groups_avail;
-      break;			/* can exit outer loop without test */
-    }
-  }
-}
-
-
-#ifdef CONTEXT_ROWS_SUPPORTED
-
-/*
- * Process some data in the context case.
- */
-
-METHODDEF(void)
-pre_process_context (j_compress_ptr cinfo,
-		     JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-		     JDIMENSION in_rows_avail,
-		     JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
-		     JDIMENSION out_row_groups_avail)
-{
-  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
-  int numrows, ci;
-  int buf_height = cinfo->max_v_samp_factor * 3;
-  JDIMENSION inrows;
-
-  while (*out_row_group_ctr < out_row_groups_avail) {
-    if (*in_row_ctr < in_rows_avail) {
-      /* Do color conversion to fill the conversion buffer. */
-      inrows = in_rows_avail - *in_row_ctr;
-      numrows = prep->next_buf_stop - prep->next_buf_row;
-      numrows = (int) MIN((JDIMENSION) numrows, inrows);
-      (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
-					 prep->color_buf,
-					 (JDIMENSION) prep->next_buf_row,
-					 numrows);
-      /* Pad at top of image, if first time through */
-      if (prep->rows_to_go == cinfo->image_height) {
-	for (ci = 0; ci < cinfo->num_components; ci++) {
-	  int row;
-	  for (row = 1; row <= cinfo->max_v_samp_factor; row++) {
-	    jcopy_sample_rows(prep->color_buf[ci],
-			      prep->color_buf[ci] - row,
-			      1, cinfo->image_width);
-	  }
-	}
-      }
-      *in_row_ctr += numrows;
-      prep->next_buf_row += numrows;
-      prep->rows_to_go -= numrows;
-    } else {
-      /* Return for more data, unless we are at the bottom of the image. */
-      if (prep->rows_to_go != 0)
-	break;
-      /* When at bottom of image, pad to fill the conversion buffer. */
-      if (prep->next_buf_row < prep->next_buf_stop) {
-	for (ci = 0; ci < cinfo->num_components; ci++) {
-	  expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
-			     prep->next_buf_row, prep->next_buf_stop);
-	}
-	prep->next_buf_row = prep->next_buf_stop;
-      }
-    }
-    /* If we've gotten enough data, downsample a row group. */
-    if (prep->next_buf_row == prep->next_buf_stop) {
-      (*cinfo->downsample->downsample) (cinfo,
-					prep->color_buf,
-					(JDIMENSION) prep->this_row_group,
-					output_buf, *out_row_group_ctr);
-      (*out_row_group_ctr)++;
-      /* Advance pointers with wraparound as necessary. */
-      prep->this_row_group += cinfo->max_v_samp_factor;
-      if (prep->this_row_group >= buf_height)
-	prep->this_row_group = 0;
-      if (prep->next_buf_row >= buf_height)
-	prep->next_buf_row = 0;
-      prep->next_buf_stop = prep->next_buf_row + cinfo->max_v_samp_factor;
-    }
-  }
-}
-
-
-/*
- * Create the wrapped-around downsampling input buffer needed for context mode.
- */
-
-LOCAL(void)
-create_context_buffer (j_compress_ptr cinfo)
-{
-  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
-  int rgroup_height = cinfo->max_v_samp_factor;
-  int ci, i;
-  jpeg_component_info * compptr;
-  JSAMPARRAY true_buffer, fake_buffer;
-
-  /* Grab enough space for fake row pointers for all the components;
-   * we need five row groups' worth of pointers for each component.
-   */
-  fake_buffer = (JSAMPARRAY) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE,
-     (cinfo->num_components * 5 * rgroup_height) * SIZEOF(JSAMPROW));
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Allocate the actual buffer space (3 row groups) for this component.
-     * We make the buffer wide enough to allow the downsampler to edge-expand
-     * horizontally within the buffer, if it so chooses.
-     */
-    true_buffer = (*cinfo->mem->alloc_sarray)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       (JDIMENSION) (((long) compptr->width_in_blocks *
-		      cinfo->min_DCT_h_scaled_size *
-		      cinfo->max_h_samp_factor) / compptr->h_samp_factor),
-       (JDIMENSION) (3 * rgroup_height));
-    /* Copy true buffer row pointers into the middle of the fake row array */
-    MEMCOPY(fake_buffer + rgroup_height, true_buffer,
-	    3 * rgroup_height * SIZEOF(JSAMPROW));
-    /* Fill in the above and below wraparound pointers */
-    for (i = 0; i < rgroup_height; i++) {
-      fake_buffer[i] = true_buffer[2 * rgroup_height + i];
-      fake_buffer[4 * rgroup_height + i] = true_buffer[i];
-    }
-    prep->color_buf[ci] = fake_buffer + rgroup_height;
-    fake_buffer += 5 * rgroup_height; /* point to space for next component */
-  }
-}
-
-#endif /* CONTEXT_ROWS_SUPPORTED */
-
-
-/*
- * Initialize preprocessing controller.
- */
-
-GLOBAL(void)
-jinit_c_prep_controller (j_compress_ptr cinfo, boolean need_full_buffer)
-{
-  my_prep_ptr prep;
-  int ci;
-  jpeg_component_info * compptr;
-
-  if (need_full_buffer)		/* safety check */
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-
-  prep = (my_prep_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_prep_controller));
-  cinfo->prep = &prep->pub;
-  prep->pub.start_pass = start_pass_prep;
-
-  /* Allocate the color conversion buffer.
-   * We make the buffer wide enough to allow the downsampler to edge-expand
-   * horizontally within the buffer, if it so chooses.
-   */
-  if (cinfo->downsample->need_context_rows) {
-    /* Set up to provide context rows */
-#ifdef CONTEXT_ROWS_SUPPORTED
-    prep->pub.pre_process_data = pre_process_context;
-    create_context_buffer(cinfo);
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-  } else {
-    /* No context, just make it tall enough for one row group */
-    prep->pub.pre_process_data = pre_process_data;
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      prep->color_buf[ci] = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 (JDIMENSION) (((long) compptr->width_in_blocks *
-			cinfo->min_DCT_h_scaled_size *
-			cinfo->max_h_samp_factor) / compptr->h_samp_factor),
-	 (JDIMENSION) cinfo->max_v_samp_factor);
-    }
-  }
-}
diff --git a/cmake/externals/libjpeg/jcsample.c b/cmake/externals/libjpeg/jcsample.c
deleted file mode 100644
index 2372c4173..000000000
--- a/cmake/externals/libjpeg/jcsample.c
+++ /dev/null
@@ -1,545 +0,0 @@
-/*
- * jcsample.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2003-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains downsampling routines.
- *
- * Downsampling input data is counted in "row groups".  A row group
- * is defined to be max_v_samp_factor pixel rows of each component,
- * from which the downsampler produces v_samp_factor sample rows.
- * A single row group is processed in each call to the downsampler module.
- *
- * The downsampler is responsible for edge-expansion of its output data
- * to fill an integral number of DCT blocks horizontally.  The source buffer
- * may be modified if it is helpful for this purpose (the source buffer is
- * allocated wide enough to correspond to the desired output width).
- * The caller (the prep controller) is responsible for vertical padding.
- *
- * The downsampler may request "context rows" by setting need_context_rows
- * during startup.  In this case, the input arrays will contain at least
- * one row group's worth of pixels above and below the passed-in data;
- * the caller will create dummy rows at image top and bottom by replicating
- * the first or last real pixel row.
- *
- * An excellent reference for image resampling is
- *   Digital Image Warping, George Wolberg, 1990.
- *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
- *
- * The downsampling algorithm used here is a simple average of the source
- * pixels covered by the output pixel.  The hi-falutin sampling literature
- * refers to this as a "box filter".  In general the characteristics of a box
- * filter are not very good, but for the specific cases we normally use (1:1
- * and 2:1 ratios) the box is equivalent to a "triangle filter" which is not
- * nearly so bad.  If you intend to use other sampling ratios, you'd be well
- * advised to improve this code.
- *
- * A simple input-smoothing capability is provided.  This is mainly intended
- * for cleaning up color-dithered GIF input files (if you find it inadequate,
- * we suggest using an external filtering program such as pnmconvol).  When
- * enabled, each input pixel P is replaced by a weighted sum of itself and its
- * eight neighbors.  P's weight is 1-8*SF and each neighbor's weight is SF,
- * where SF = (smoothing_factor / 1024).
- * Currently, smoothing is only supported for 2h2v sampling factors.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Pointer to routine to downsample a single component */
-typedef JMETHOD(void, downsample1_ptr,
-		(j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY output_data));
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_downsampler pub;	/* public fields */
-
-  /* Downsampling method pointers, one per component */
-  downsample1_ptr methods[MAX_COMPONENTS];
-
-  /* Height of an output row group for each component. */
-  int rowgroup_height[MAX_COMPONENTS];
-
-  /* These arrays save pixel expansion factors so that int_downsample need not
-   * recompute them each time.  They are unused for other downsampling methods.
-   */
-  UINT8 h_expand[MAX_COMPONENTS];
-  UINT8 v_expand[MAX_COMPONENTS];
-} my_downsampler;
-
-typedef my_downsampler * my_downsample_ptr;
-
-
-/*
- * Initialize for a downsampling pass.
- */
-
-METHODDEF(void)
-start_pass_downsample (j_compress_ptr cinfo)
-{
-  /* no work for now */
-}
-
-
-/*
- * Expand a component horizontally from width input_cols to width output_cols,
- * by duplicating the rightmost samples.
- */
-
-LOCAL(void)
-expand_right_edge (JSAMPARRAY image_data, int num_rows,
-		   JDIMENSION input_cols, JDIMENSION output_cols)
-{
-  register JSAMPROW ptr;
-  register JSAMPLE pixval;
-  register int count;
-  int row;
-  int numcols = (int) (output_cols - input_cols);
-
-  if (numcols > 0) {
-    for (row = 0; row < num_rows; row++) {
-      ptr = image_data[row] + input_cols;
-      pixval = ptr[-1];		/* don't need GETJSAMPLE() here */
-      for (count = numcols; count > 0; count--)
-	*ptr++ = pixval;
-    }
-  }
-}
-
-
-/*
- * Do downsampling for a whole row group (all components).
- *
- * In this version we simply downsample each component independently.
- */
-
-METHODDEF(void)
-sep_downsample (j_compress_ptr cinfo,
-		JSAMPIMAGE input_buf, JDIMENSION in_row_index,
-		JSAMPIMAGE output_buf, JDIMENSION out_row_group_index)
-{
-  my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample;
-  int ci;
-  jpeg_component_info * compptr;
-  JSAMPARRAY in_ptr, out_ptr;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    in_ptr = input_buf[ci] + in_row_index;
-    out_ptr = output_buf[ci] +
-	      (out_row_group_index * downsample->rowgroup_height[ci]);
-    (*downsample->methods[ci]) (cinfo, compptr, in_ptr, out_ptr);
-  }
-}
-
-
-/*
- * Downsample pixel values of a single component.
- * One row group is processed per call.
- * This version handles arbitrary integral sampling ratios, without smoothing.
- * Note that this version is not actually used for customary sampling ratios.
- */
-
-METHODDEF(void)
-int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample;
-  int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v;
-  JDIMENSION outcol, outcol_h;	/* outcol_h == outcol*h_expand */
-  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
-  JSAMPROW inptr, outptr;
-  INT32 outvalue;
-
-  h_expand = downsample->h_expand[compptr->component_index];
-  v_expand = downsample->v_expand[compptr->component_index];
-  numpix = h_expand * v_expand;
-  numpix2 = numpix/2;
-
-  /* Expand input data enough to let all the output samples be generated
-   * by the standard loop.  Special-casing padded output would be more
-   * efficient.
-   */
-  expand_right_edge(input_data, cinfo->max_v_samp_factor,
-		    cinfo->image_width, output_cols * h_expand);
-
-  inrow = outrow = 0;
-  while (inrow < cinfo->max_v_samp_factor) {
-    outptr = output_data[outrow];
-    for (outcol = 0, outcol_h = 0; outcol < output_cols;
-	 outcol++, outcol_h += h_expand) {
-      outvalue = 0;
-      for (v = 0; v < v_expand; v++) {
-	inptr = input_data[inrow+v] + outcol_h;
-	for (h = 0; h < h_expand; h++) {
-	  outvalue += (INT32) GETJSAMPLE(*inptr++);
-	}
-      }
-      *outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix);
-    }
-    inrow += v_expand;
-    outrow++;
-  }
-}
-
-
-/*
- * Downsample pixel values of a single component.
- * This version handles the special case of a full-size component,
- * without smoothing.
- */
-
-METHODDEF(void)
-fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		     JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  /* Copy the data */
-  jcopy_sample_rows(input_data, output_data,
-		    cinfo->max_v_samp_factor, cinfo->image_width);
-  /* Edge-expand */
-  expand_right_edge(output_data, cinfo->max_v_samp_factor, cinfo->image_width,
-		    compptr->width_in_blocks * compptr->DCT_h_scaled_size);
-}
-
-
-/*
- * Downsample pixel values of a single component.
- * This version handles the common case of 2:1 horizontal and 1:1 vertical,
- * without smoothing.
- *
- * A note about the "bias" calculations: when rounding fractional values to
- * integer, we do not want to always round 0.5 up to the next integer.
- * If we did that, we'd introduce a noticeable bias towards larger values.
- * Instead, this code is arranged so that 0.5 will be rounded up or down at
- * alternate pixel locations (a simple ordered dither pattern).
- */
-
-METHODDEF(void)
-h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  int inrow;
-  JDIMENSION outcol;
-  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
-  register JSAMPROW inptr, outptr;
-  register int bias;
-
-  /* Expand input data enough to let all the output samples be generated
-   * by the standard loop.  Special-casing padded output would be more
-   * efficient.
-   */
-  expand_right_edge(input_data, cinfo->max_v_samp_factor,
-		    cinfo->image_width, output_cols * 2);
-
-  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
-    outptr = output_data[inrow];
-    inptr = input_data[inrow];
-    bias = 0;			/* bias = 0,1,0,1,... for successive samples */
-    for (outcol = 0; outcol < output_cols; outcol++) {
-      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1])
-			      + bias) >> 1);
-      bias ^= 1;		/* 0=>1, 1=>0 */
-      inptr += 2;
-    }
-  }
-}
-
-
-/*
- * Downsample pixel values of a single component.
- * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
- * without smoothing.
- */
-
-METHODDEF(void)
-h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  int inrow, outrow;
-  JDIMENSION outcol;
-  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
-  register JSAMPROW inptr0, inptr1, outptr;
-  register int bias;
-
-  /* Expand input data enough to let all the output samples be generated
-   * by the standard loop.  Special-casing padded output would be more
-   * efficient.
-   */
-  expand_right_edge(input_data, cinfo->max_v_samp_factor,
-		    cinfo->image_width, output_cols * 2);
-
-  inrow = outrow = 0;
-  while (inrow < cinfo->max_v_samp_factor) {
-    outptr = output_data[outrow];
-    inptr0 = input_data[inrow];
-    inptr1 = input_data[inrow+1];
-    bias = 1;			/* bias = 1,2,1,2,... for successive samples */
-    for (outcol = 0; outcol < output_cols; outcol++) {
-      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-			      GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1])
-			      + bias) >> 2);
-      bias ^= 3;		/* 1=>2, 2=>1 */
-      inptr0 += 2; inptr1 += 2;
-    }
-    inrow += 2;
-    outrow++;
-  }
-}
-
-
-#ifdef INPUT_SMOOTHING_SUPPORTED
-
-/*
- * Downsample pixel values of a single component.
- * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
- * with smoothing.  One row of context is required.
- */
-
-METHODDEF(void)
-h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-			JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  int inrow, outrow;
-  JDIMENSION colctr;
-  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
-  register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr;
-  INT32 membersum, neighsum, memberscale, neighscale;
-
-  /* Expand input data enough to let all the output samples be generated
-   * by the standard loop.  Special-casing padded output would be more
-   * efficient.
-   */
-  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
-		    cinfo->image_width, output_cols * 2);
-
-  /* We don't bother to form the individual "smoothed" input pixel values;
-   * we can directly compute the output which is the average of the four
-   * smoothed values.  Each of the four member pixels contributes a fraction
-   * (1-8*SF) to its own smoothed image and a fraction SF to each of the three
-   * other smoothed pixels, therefore a total fraction (1-5*SF)/4 to the final
-   * output.  The four corner-adjacent neighbor pixels contribute a fraction
-   * SF to just one smoothed pixel, or SF/4 to the final output; while the
-   * eight edge-adjacent neighbors contribute SF to each of two smoothed
-   * pixels, or SF/2 overall.  In order to use integer arithmetic, these
-   * factors are scaled by 2^16 = 65536.
-   * Also recall that SF = smoothing_factor / 1024.
-   */
-
-  memberscale = 16384 - cinfo->smoothing_factor * 80; /* scaled (1-5*SF)/4 */
-  neighscale = cinfo->smoothing_factor * 16; /* scaled SF/4 */
-
-  inrow = outrow = 0;
-  while (inrow < cinfo->max_v_samp_factor) {
-    outptr = output_data[outrow];
-    inptr0 = input_data[inrow];
-    inptr1 = input_data[inrow+1];
-    above_ptr = input_data[inrow-1];
-    below_ptr = input_data[inrow+2];
-
-    /* Special case for first column: pretend column -1 is same as column 0 */
-    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
-    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-	       GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) +
-	       GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]);
-    neighsum += neighsum;
-    neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) +
-		GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]);
-    membersum = membersum * memberscale + neighsum * neighscale;
-    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
-    inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
-
-    for (colctr = output_cols - 2; colctr > 0; colctr--) {
-      /* sum of pixels directly mapped to this output element */
-      membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-		  GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
-      /* sum of edge-neighbor pixels */
-      neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-		 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-		 GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) +
-		 GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]);
-      /* The edge-neighbors count twice as much as corner-neighbors */
-      neighsum += neighsum;
-      /* Add in the corner-neighbors */
-      neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) +
-		  GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]);
-      /* form final output scaled up by 2^16 */
-      membersum = membersum * memberscale + neighsum * neighscale;
-      /* round, descale and output it */
-      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
-      inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
-    }
-
-    /* Special case for last column */
-    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
-    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-	       GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) +
-	       GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]);
-    neighsum += neighsum;
-    neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) +
-		GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]);
-    membersum = membersum * memberscale + neighsum * neighscale;
-    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
-
-    inrow += 2;
-    outrow++;
-  }
-}
-
-
-/*
- * Downsample pixel values of a single component.
- * This version handles the special case of a full-size component,
- * with smoothing.  One row of context is required.
- */
-
-METHODDEF(void)
-fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
-			    JSAMPARRAY input_data, JSAMPARRAY output_data)
-{
-  int inrow;
-  JDIMENSION colctr;
-  JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size;
-  register JSAMPROW inptr, above_ptr, below_ptr, outptr;
-  INT32 membersum, neighsum, memberscale, neighscale;
-  int colsum, lastcolsum, nextcolsum;
-
-  /* Expand input data enough to let all the output samples be generated
-   * by the standard loop.  Special-casing padded output would be more
-   * efficient.
-   */
-  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
-		    cinfo->image_width, output_cols);
-
-  /* Each of the eight neighbor pixels contributes a fraction SF to the
-   * smoothed pixel, while the main pixel contributes (1-8*SF).  In order
-   * to use integer arithmetic, these factors are multiplied by 2^16 = 65536.
-   * Also recall that SF = smoothing_factor / 1024.
-   */
-
-  memberscale = 65536L - cinfo->smoothing_factor * 512L; /* scaled 1-8*SF */
-  neighscale = cinfo->smoothing_factor * 64; /* scaled SF */
-
-  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
-    outptr = output_data[inrow];
-    inptr = input_data[inrow];
-    above_ptr = input_data[inrow-1];
-    below_ptr = input_data[inrow+1];
-
-    /* Special case for first column */
-    colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) +
-	     GETJSAMPLE(*inptr);
-    membersum = GETJSAMPLE(*inptr++);
-    nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
-		 GETJSAMPLE(*inptr);
-    neighsum = colsum + (colsum - membersum) + nextcolsum;
-    membersum = membersum * memberscale + neighsum * neighscale;
-    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
-    lastcolsum = colsum; colsum = nextcolsum;
-
-    for (colctr = output_cols - 2; colctr > 0; colctr--) {
-      membersum = GETJSAMPLE(*inptr++);
-      above_ptr++; below_ptr++;
-      nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
-		   GETJSAMPLE(*inptr);
-      neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
-      membersum = membersum * memberscale + neighsum * neighscale;
-      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
-      lastcolsum = colsum; colsum = nextcolsum;
-    }
-
-    /* Special case for last column */
-    membersum = GETJSAMPLE(*inptr);
-    neighsum = lastcolsum + (colsum - membersum) + colsum;
-    membersum = membersum * memberscale + neighsum * neighscale;
-    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
-
-  }
-}
-
-#endif /* INPUT_SMOOTHING_SUPPORTED */
-
-
-/*
- * Module initialization routine for downsampling.
- * Note that we must select a routine for each component.
- */
-
-GLOBAL(void)
-jinit_downsampler (j_compress_ptr cinfo)
-{
-  my_downsample_ptr downsample;
-  int ci;
-  jpeg_component_info * compptr;
-  boolean smoothok = TRUE;
-  int h_in_group, v_in_group, h_out_group, v_out_group;
-
-  downsample = (my_downsample_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_downsampler));
-  cinfo->downsample = &downsample->pub;
-  downsample->pub.start_pass = start_pass_downsample;
-  downsample->pub.downsample = sep_downsample;
-  downsample->pub.need_context_rows = FALSE;
-
-  if (cinfo->CCIR601_sampling)
-    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
-
-  /* Verify we can handle the sampling factors, and set up method pointers */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Compute size of an "output group" for DCT scaling.  This many samples
-     * are to be converted from max_h_samp_factor * max_v_samp_factor pixels.
-     */
-    h_out_group = (compptr->h_samp_factor * compptr->DCT_h_scaled_size) /
-		  cinfo->min_DCT_h_scaled_size;
-    v_out_group = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-		  cinfo->min_DCT_v_scaled_size;
-    h_in_group = cinfo->max_h_samp_factor;
-    v_in_group = cinfo->max_v_samp_factor;
-    downsample->rowgroup_height[ci] = v_out_group; /* save for use later */
-    if (h_in_group == h_out_group && v_in_group == v_out_group) {
-#ifdef INPUT_SMOOTHING_SUPPORTED
-      if (cinfo->smoothing_factor) {
-	downsample->methods[ci] = fullsize_smooth_downsample;
-	downsample->pub.need_context_rows = TRUE;
-      } else
-#endif
-	downsample->methods[ci] = fullsize_downsample;
-    } else if (h_in_group == h_out_group * 2 &&
-	       v_in_group == v_out_group) {
-      smoothok = FALSE;
-      downsample->methods[ci] = h2v1_downsample;
-    } else if (h_in_group == h_out_group * 2 &&
-	       v_in_group == v_out_group * 2) {
-#ifdef INPUT_SMOOTHING_SUPPORTED
-      if (cinfo->smoothing_factor) {
-	downsample->methods[ci] = h2v2_smooth_downsample;
-	downsample->pub.need_context_rows = TRUE;
-      } else
-#endif
-	downsample->methods[ci] = h2v2_downsample;
-    } else if ((h_in_group % h_out_group) == 0 &&
-	       (v_in_group % v_out_group) == 0) {
-      smoothok = FALSE;
-      downsample->methods[ci] = int_downsample;
-      downsample->h_expand[ci] = (UINT8) (h_in_group / h_out_group);
-      downsample->v_expand[ci] = (UINT8) (v_in_group / v_out_group);
-    } else
-      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
-  }
-
-#ifdef INPUT_SMOOTHING_SUPPORTED
-  if (cinfo->smoothing_factor && !smoothok)
-    TRACEMS(cinfo, 0, JTRC_SMOOTH_NOTIMPL);
-#endif
-}
diff --git a/cmake/externals/libjpeg/jctrans.c b/cmake/externals/libjpeg/jctrans.c
deleted file mode 100644
index 261dd2996..000000000
--- a/cmake/externals/libjpeg/jctrans.c
+++ /dev/null
@@ -1,399 +0,0 @@
-/*
- * jctrans.c
- *
- * Copyright (C) 1995-1998, Thomas G. Lane.
- * Modified 2000-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains library routines for transcoding compression,
- * that is, writing raw DCT coefficient arrays to an output JPEG file.
- * The routines in jcapimin.c will also be needed by a transcoder.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Forward declarations */
-LOCAL(void) transencode_master_selection
-	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
-LOCAL(void) transencode_coef_controller
-	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
-
-
-/*
- * Compression initialization for writing raw-coefficient data.
- * Before calling this, all parameters and a data destination must be set up.
- * Call jpeg_finish_compress() to actually write the data.
- *
- * The number of passed virtual arrays must match cinfo->num_components.
- * Note that the virtual arrays need not be filled or even realized at
- * the time write_coefficients is called; indeed, if the virtual arrays
- * were requested from this compression object's memory manager, they
- * typically will be realized during this routine and filled afterwards.
- */
-
-GLOBAL(void)
-jpeg_write_coefficients (j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays)
-{
-  if (cinfo->global_state != CSTATE_START)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  /* Mark all tables to be written */
-  jpeg_suppress_tables(cinfo, FALSE);
-  /* (Re)initialize error mgr and destination modules */
-  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
-  (*cinfo->dest->init_destination) (cinfo);
-  /* Perform master selection of active modules */
-  transencode_master_selection(cinfo, coef_arrays);
-  /* Wait for jpeg_finish_compress() call */
-  cinfo->next_scanline = 0;	/* so jpeg_write_marker works */
-  cinfo->global_state = CSTATE_WRCOEFS;
-}
-
-
-/*
- * Initialize the compression object with default parameters,
- * then copy from the source object all parameters needed for lossless
- * transcoding.  Parameters that can be varied without loss (such as
- * scan script and Huffman optimization) are left in their default states.
- */
-
-GLOBAL(void)
-jpeg_copy_critical_parameters (j_decompress_ptr srcinfo,
-			       j_compress_ptr dstinfo)
-{
-  JQUANT_TBL ** qtblptr;
-  jpeg_component_info *incomp, *outcomp;
-  JQUANT_TBL *c_quant, *slot_quant;
-  int tblno, ci, coefi;
-
-  /* Safety check to ensure start_compress not called yet. */
-  if (dstinfo->global_state != CSTATE_START)
-    ERREXIT1(dstinfo, JERR_BAD_STATE, dstinfo->global_state);
-  /* Copy fundamental image dimensions */
-  dstinfo->image_width = srcinfo->image_width;
-  dstinfo->image_height = srcinfo->image_height;
-  dstinfo->input_components = srcinfo->num_components;
-  dstinfo->in_color_space = srcinfo->jpeg_color_space;
-  dstinfo->jpeg_width = srcinfo->output_width;
-  dstinfo->jpeg_height = srcinfo->output_height;
-  dstinfo->min_DCT_h_scaled_size = srcinfo->min_DCT_h_scaled_size;
-  dstinfo->min_DCT_v_scaled_size = srcinfo->min_DCT_v_scaled_size;
-  /* Initialize all parameters to default values */
-  jpeg_set_defaults(dstinfo);
-  /* jpeg_set_defaults may choose wrong colorspace, eg YCbCr if input is RGB.
-   * Fix it to get the right header markers for the image colorspace.
-   * Note: Entropy table assignment in jpeg_set_colorspace
-   * depends on color_transform.
-   * Adaption is also required for setting the appropriate
-   * entropy coding mode dependent on image data precision.
-   */
-  dstinfo->color_transform = srcinfo->color_transform;
-  jpeg_set_colorspace(dstinfo, srcinfo->jpeg_color_space);
-  dstinfo->data_precision = srcinfo->data_precision;
-  dstinfo->arith_code = srcinfo->data_precision > 8 ? TRUE : FALSE;
-  dstinfo->CCIR601_sampling = srcinfo->CCIR601_sampling;
-  /* Copy the source's quantization tables. */
-  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
-    if (srcinfo->quant_tbl_ptrs[tblno] != NULL) {
-      qtblptr = & dstinfo->quant_tbl_ptrs[tblno];
-      if (*qtblptr == NULL)
-	*qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo);
-      MEMCOPY((*qtblptr)->quantval,
-	      srcinfo->quant_tbl_ptrs[tblno]->quantval,
-	      SIZEOF((*qtblptr)->quantval));
-      (*qtblptr)->sent_table = FALSE;
-    }
-  }
-  /* Copy the source's per-component info.
-   * Note we assume jpeg_set_defaults has allocated the dest comp_info array.
-   */
-  dstinfo->num_components = srcinfo->num_components;
-  if (dstinfo->num_components < 1 || dstinfo->num_components > MAX_COMPONENTS)
-    ERREXIT2(dstinfo, JERR_COMPONENT_COUNT, dstinfo->num_components,
-	     MAX_COMPONENTS);
-  for (ci = 0, incomp = srcinfo->comp_info, outcomp = dstinfo->comp_info;
-       ci < dstinfo->num_components; ci++, incomp++, outcomp++) {
-    outcomp->component_id = incomp->component_id;
-    outcomp->h_samp_factor = incomp->h_samp_factor;
-    outcomp->v_samp_factor = incomp->v_samp_factor;
-    outcomp->quant_tbl_no = incomp->quant_tbl_no;
-    /* Make sure saved quantization table for component matches the qtable
-     * slot.  If not, the input file re-used this qtable slot.
-     * IJG encoder currently cannot duplicate this.
-     */
-    tblno = outcomp->quant_tbl_no;
-    if (tblno < 0 || tblno >= NUM_QUANT_TBLS ||
-	srcinfo->quant_tbl_ptrs[tblno] == NULL)
-      ERREXIT1(dstinfo, JERR_NO_QUANT_TABLE, tblno);
-    slot_quant = srcinfo->quant_tbl_ptrs[tblno];
-    c_quant = incomp->quant_table;
-    if (c_quant != NULL) {
-      for (coefi = 0; coefi < DCTSIZE2; coefi++) {
-	if (c_quant->quantval[coefi] != slot_quant->quantval[coefi])
-	  ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno);
-      }
-    }
-    /* Note: we do not copy the source's entropy table assignments;
-     * instead we rely on jpeg_set_colorspace to have made a suitable choice.
-     */
-  }
-  /* Also copy JFIF version and resolution information, if available.
-   * Strictly speaking this isn't "critical" info, but it's nearly
-   * always appropriate to copy it if available.  In particular,
-   * if the application chooses to copy JFIF 1.02 extension markers from
-   * the source file, we need to copy the version to make sure we don't
-   * emit a file that has 1.02 extensions but a claimed version of 1.01.
-   */
-  if (srcinfo->saw_JFIF_marker) {
-    if (srcinfo->JFIF_major_version == 1 ||
-	srcinfo->JFIF_major_version == 2) {
-      dstinfo->JFIF_major_version = srcinfo->JFIF_major_version;
-      dstinfo->JFIF_minor_version = srcinfo->JFIF_minor_version;
-    }
-    dstinfo->density_unit = srcinfo->density_unit;
-    dstinfo->X_density = srcinfo->X_density;
-    dstinfo->Y_density = srcinfo->Y_density;
-  }
-}
-
-
-LOCAL(void)
-jpeg_calc_trans_dimensions (j_compress_ptr cinfo)
-/* Do computations that are needed before master selection phase */
-{
-  if (cinfo->min_DCT_h_scaled_size != cinfo->min_DCT_v_scaled_size)
-    ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
-	     cinfo->min_DCT_h_scaled_size, cinfo->min_DCT_v_scaled_size);
-
-  cinfo->block_size = cinfo->min_DCT_h_scaled_size;
-}
-
-
-/*
- * Master selection of compression modules for transcoding.
- * This substitutes for jcinit.c's initialization of the full compressor.
- */
-
-LOCAL(void)
-transencode_master_selection (j_compress_ptr cinfo,
-			      jvirt_barray_ptr * coef_arrays)
-{
-  /* Do computations that are needed before master selection phase */
-  jpeg_calc_trans_dimensions(cinfo);
-
-  /* Initialize master control (includes parameter checking/processing) */
-  jinit_c_master_control(cinfo, TRUE /* transcode only */);
-
-  /* Entropy encoding: either Huffman or arithmetic coding. */
-  if (cinfo->arith_code)
-    jinit_arith_encoder(cinfo);
-  else {
-    jinit_huff_encoder(cinfo);
-  }
-
-  /* We need a special coefficient buffer controller. */
-  transencode_coef_controller(cinfo, coef_arrays);
-
-  jinit_marker_writer(cinfo);
-
-  /* We can now tell the memory manager to allocate virtual arrays. */
-  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
-
-  /* Write the datastream header (SOI, JFIF) immediately.
-   * Frame and scan headers are postponed till later.
-   * This lets application insert special markers after the SOI.
-   */
-  (*cinfo->marker->write_file_header) (cinfo);
-}
-
-
-/*
- * The rest of this file is a special implementation of the coefficient
- * buffer controller.  This is similar to jccoefct.c, but it handles only
- * output from presupplied virtual arrays.  Furthermore, we generate any
- * dummy padding blocks on-the-fly rather than expecting them to be present
- * in the arrays.
- */
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_c_coef_controller pub; /* public fields */
-
-  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
-  JDIMENSION MCU_ctr;		/* counts MCUs processed in current row */
-  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
-  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
-
-  /* Virtual block array for each component. */
-  jvirt_barray_ptr * whole_image;
-
-  /* Workspace for constructing dummy blocks at right/bottom edges. */
-  JBLOCK dummy_buffer[C_MAX_BLOCKS_IN_MCU];
-} my_coef_controller;
-
-typedef my_coef_controller * my_coef_ptr;
-
-
-LOCAL(void)
-start_iMCU_row (j_compress_ptr cinfo)
-/* Reset within-iMCU-row counters for a new row */
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  /* In an interleaved scan, an MCU row is the same as an iMCU row.
-   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
-   * But at the bottom of the image, process only what's left.
-   */
-  if (cinfo->comps_in_scan > 1) {
-    coef->MCU_rows_per_iMCU_row = 1;
-  } else {
-    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
-    else
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
-  }
-
-  coef->MCU_ctr = 0;
-  coef->MCU_vert_offset = 0;
-}
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  if (pass_mode != JBUF_CRANK_DEST)
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-
-  coef->iMCU_row_num = 0;
-  start_iMCU_row(cinfo);
-}
-
-
-/*
- * Process some data.
- * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
- * per call, ie, v_samp_factor block rows for each component in the scan.
- * The data is obtained from the virtual arrays and fed to the entropy coder.
- * Returns TRUE if the iMCU row is completed, FALSE if suspended.
- *
- * NB: input_buf is ignored; it is likely to be a NULL pointer.
- */
-
-METHODDEF(boolean)
-compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
-  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  int blkn, ci, xindex, yindex, yoffset, blockcnt;
-  JDIMENSION start_col;
-  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
-  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
-  JBLOCKROW buffer_ptr;
-  jpeg_component_info *compptr;
-
-  /* Align the virtual buffers for the components used in this scan. */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    buffer[ci] = (*cinfo->mem->access_virt_barray)
-      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
-       coef->iMCU_row_num * compptr->v_samp_factor,
-       (JDIMENSION) compptr->v_samp_factor, FALSE);
-  }
-
-  /* Loop to process one whole iMCU row */
-  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
-       yoffset++) {
-    for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
-	 MCU_col_num++) {
-      /* Construct list of pointers to DCT blocks belonging to this MCU */
-      blkn = 0;			/* index of current DCT block within MCU */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
-						: compptr->last_col_width;
-	start_col = MCU_col_num * compptr->MCU_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  if (coef->iMCU_row_num < last_iMCU_row ||
-	      yoffset + yindex < compptr->last_row_height) {
-	    /* Fill in pointers to real blocks in this row */
-	    buffer_ptr = buffer[ci][yoffset + yindex] + start_col;
-	    xindex = blockcnt;
-	    do {
-	      MCU_buffer[blkn++] = buffer_ptr++;
-	    } while (--xindex);
-	    /* Dummy blocks at right edge */
-	    if ((xindex = compptr->MCU_width - blockcnt) == 0)
-	      continue;
-	  } else {
-	    /* At bottom of image, need a whole row of dummy blocks */
-	    xindex = compptr->MCU_width;
-	  }
-	  /* Fill in any dummy blocks needed in this row.
-	   * Dummy blocks are filled in the same way as in jccoefct.c:
-	   * all zeroes in the AC entries, DC entries equal to previous
-	   * block's DC value.  The init routine has already zeroed the
-	   * AC entries, so we need only set the DC entries correctly.
-	   */
-	  buffer_ptr = coef->dummy_buffer + blkn;
-	  do {
-	    buffer_ptr[0][0] = MCU_buffer[blkn-1][0][0];
-	    MCU_buffer[blkn++] = buffer_ptr++;
-	  } while (--xindex);
-	}
-      }
-      /* Try to write the MCU. */
-      if (! (*cinfo->entropy->encode_mcu) (cinfo, MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->MCU_ctr = MCU_col_num;
-	return FALSE;
-      }
-    }
-    /* Completed an MCU row, but perhaps not an iMCU row */
-    coef->MCU_ctr = 0;
-  }
-  /* Completed the iMCU row, advance counters for next one */
-  coef->iMCU_row_num++;
-  start_iMCU_row(cinfo);
-  return TRUE;
-}
-
-
-/*
- * Initialize coefficient buffer controller.
- *
- * Each passed coefficient array must be the right size for that
- * coefficient: width_in_blocks wide and height_in_blocks high,
- * with unitheight at least v_samp_factor.
- */
-
-LOCAL(void)
-transencode_coef_controller (j_compress_ptr cinfo,
-			     jvirt_barray_ptr * coef_arrays)
-{
-  my_coef_ptr coef;
-
-  coef = (my_coef_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_coef_controller));
-  cinfo->coef = &coef->pub;
-  coef->pub.start_pass = start_pass_coef;
-  coef->pub.compress_data = compress_output;
-
-  /* Save pointer to virtual arrays */
-  coef->whole_image = coef_arrays;
-
-  /* Pre-zero space for dummy DCT blocks */
-  MEMZERO(coef->dummy_buffer, SIZEOF(coef->dummy_buffer));
-}
diff --git a/cmake/externals/libjpeg/jdapimin.c b/cmake/externals/libjpeg/jdapimin.c
deleted file mode 100644
index 785e52722..000000000
--- a/cmake/externals/libjpeg/jdapimin.c
+++ /dev/null
@@ -1,412 +0,0 @@
-/*
- * jdapimin.c
- *
- * Copyright (C) 1994-1998, Thomas G. Lane.
- * Modified 2009-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains application interface code for the decompression half
- * of the JPEG library.  These are the "minimum" API routines that may be
- * needed in either the normal full-decompression case or the
- * transcoding-only case.
- *
- * Most of the routines intended to be called directly by an application
- * are in this file or in jdapistd.c.  But also see jcomapi.c for routines
- * shared by compression and decompression, and jdtrans.c for the transcoding
- * case.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * Initialization of a JPEG decompression object.
- * The error manager must already be set up (in case memory manager fails).
- */
-
-GLOBAL(void)
-jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, size_t structsize)
-{
-  int i;
-
-  /* Guard against version mismatches between library and caller. */
-  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
-  if (version != JPEG_LIB_VERSION)
-    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
-  if (structsize != SIZEOF(struct jpeg_decompress_struct))
-    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
-	     (int) SIZEOF(struct jpeg_decompress_struct), (int) structsize);
-
-  /* For debugging purposes, we zero the whole master structure.
-   * But the application has already set the err pointer, and may have set
-   * client_data, so we have to save and restore those fields.
-   * Note: if application hasn't set client_data, tools like Purify may
-   * complain here.
-   */
-  {
-    struct jpeg_error_mgr * err = cinfo->err;
-    void * client_data = cinfo->client_data; /* ignore Purify complaint here */
-    MEMZERO(cinfo, SIZEOF(struct jpeg_decompress_struct));
-    cinfo->err = err;
-    cinfo->client_data = client_data;
-  }
-  cinfo->is_decompressor = TRUE;
-
-  /* Initialize a memory manager instance for this object */
-  jinit_memory_mgr((j_common_ptr) cinfo);
-
-  /* Zero out pointers to permanent structures. */
-  cinfo->progress = NULL;
-  cinfo->src = NULL;
-
-  for (i = 0; i < NUM_QUANT_TBLS; i++)
-    cinfo->quant_tbl_ptrs[i] = NULL;
-
-  for (i = 0; i < NUM_HUFF_TBLS; i++) {
-    cinfo->dc_huff_tbl_ptrs[i] = NULL;
-    cinfo->ac_huff_tbl_ptrs[i] = NULL;
-  }
-
-  /* Initialize marker processor so application can override methods
-   * for COM, APPn markers before calling jpeg_read_header.
-   */
-  cinfo->marker_list = NULL;
-  jinit_marker_reader(cinfo);
-
-  /* And initialize the overall input controller. */
-  jinit_input_controller(cinfo);
-
-  /* OK, I'm ready */
-  cinfo->global_state = DSTATE_START;
-}
-
-
-/*
- * Destruction of a JPEG decompression object
- */
-
-GLOBAL(void)
-jpeg_destroy_decompress (j_decompress_ptr cinfo)
-{
-  jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
-}
-
-
-/*
- * Abort processing of a JPEG decompression operation,
- * but don't destroy the object itself.
- */
-
-GLOBAL(void)
-jpeg_abort_decompress (j_decompress_ptr cinfo)
-{
-  jpeg_abort((j_common_ptr) cinfo); /* use common routine */
-}
-
-
-/*
- * Set default decompression parameters.
- */
-
-LOCAL(void)
-default_decompress_parms (j_decompress_ptr cinfo)
-{
-  int cid0, cid1, cid2, cid3;
-
-  /* Guess the input colorspace, and set output colorspace accordingly. */
-  /* Note application may override our guesses. */
-  switch (cinfo->num_components) {
-  case 1:
-    cinfo->jpeg_color_space = JCS_GRAYSCALE;
-    cinfo->out_color_space = JCS_GRAYSCALE;
-    break;
-
-  case 3:
-    cid0 = cinfo->comp_info[0].component_id;
-    cid1 = cinfo->comp_info[1].component_id;
-    cid2 = cinfo->comp_info[2].component_id;
-
-    /* For robust detection of standard colorspaces
-     * regardless of the presence of special markers,
-     * check component IDs from SOF marker first.
-     */
-    if      (cid0 == 0x01 && cid1 == 0x02 && cid2 == 0x03)
-      cinfo->jpeg_color_space = JCS_YCbCr;
-    else if (cid0 == 0x01 && cid1 == 0x22 && cid2 == 0x23)
-      cinfo->jpeg_color_space = JCS_BG_YCC;
-    else if (cid0 == 0x52 && cid1 == 0x47 && cid2 == 0x42)
-      cinfo->jpeg_color_space = JCS_RGB;	/* ASCII 'R', 'G', 'B' */
-    else if (cid0 == 0x72 && cid1 == 0x67 && cid2 == 0x62)
-      cinfo->jpeg_color_space = JCS_BG_RGB;	/* ASCII 'r', 'g', 'b' */
-    else if (cinfo->saw_JFIF_marker)
-      cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
-    else if (cinfo->saw_Adobe_marker) {
-      switch (cinfo->Adobe_transform) {
-      case 0:
-	cinfo->jpeg_color_space = JCS_RGB;
-	break;
-      case 1:
-	cinfo->jpeg_color_space = JCS_YCbCr;
-	break;
-      default:
-	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
-	cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
-      }
-    } else {
-      TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
-      cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
-    }
-    /* Always guess RGB is proper output colorspace. */
-    cinfo->out_color_space = JCS_RGB;
-    break;
-
-  case 4:
-    cid0 = cinfo->comp_info[0].component_id;
-    cid1 = cinfo->comp_info[1].component_id;
-    cid2 = cinfo->comp_info[2].component_id;
-    cid3 = cinfo->comp_info[3].component_id;
-
-    /* For robust detection of standard colorspaces
-     * regardless of the presence of special markers,
-     * check component IDs from SOF marker first.
-     */
-    if      (cid0 == 0x01 && cid1 == 0x02 && cid2 == 0x03 && cid3 == 0x04)
-      cinfo->jpeg_color_space = JCS_YCCK;
-    else if (cid0 == 0x43 && cid1 == 0x4D && cid2 == 0x59 && cid3 == 0x4B)
-      cinfo->jpeg_color_space = JCS_CMYK;   /* ASCII 'C', 'M', 'Y', 'K' */
-    else if (cinfo->saw_Adobe_marker) {
-      switch (cinfo->Adobe_transform) {
-      case 0:
-	cinfo->jpeg_color_space = JCS_CMYK;
-	break;
-      case 2:
-	cinfo->jpeg_color_space = JCS_YCCK;
-	break;
-      default:
-	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
-	cinfo->jpeg_color_space = JCS_YCCK;	/* assume it's YCCK */
-      }
-    } else {
-      /* Unknown IDs and no special markers, assume straight CMYK. */
-      cinfo->jpeg_color_space = JCS_CMYK;
-    }
-    cinfo->out_color_space = JCS_CMYK;
-    break;
-
-  default:
-    cinfo->jpeg_color_space = JCS_UNKNOWN;
-    cinfo->out_color_space = JCS_UNKNOWN;
-  }
-
-  /* Set defaults for other decompression parameters. */
-  cinfo->scale_num = cinfo->block_size;		/* 1:1 scaling */
-  cinfo->scale_denom = cinfo->block_size;
-  cinfo->output_gamma = 1.0;
-  cinfo->buffered_image = FALSE;
-  cinfo->raw_data_out = FALSE;
-  cinfo->dct_method = JDCT_DEFAULT;
-  cinfo->do_fancy_upsampling = TRUE;
-  cinfo->do_block_smoothing = TRUE;
-  cinfo->quantize_colors = FALSE;
-  /* We set these in case application only sets quantize_colors. */
-  cinfo->dither_mode = JDITHER_FS;
-#ifdef QUANT_2PASS_SUPPORTED
-  cinfo->two_pass_quantize = TRUE;
-#else
-  cinfo->two_pass_quantize = FALSE;
-#endif
-  cinfo->desired_number_of_colors = 256;
-  cinfo->colormap = NULL;
-  /* Initialize for no mode change in buffered-image mode. */
-  cinfo->enable_1pass_quant = FALSE;
-  cinfo->enable_external_quant = FALSE;
-  cinfo->enable_2pass_quant = FALSE;
-}
-
-
-/*
- * Decompression startup: read start of JPEG datastream to see what's there.
- * Need only initialize JPEG object and supply a data source before calling.
- *
- * This routine will read as far as the first SOS marker (ie, actual start of
- * compressed data), and will save all tables and parameters in the JPEG
- * object.  It will also initialize the decompression parameters to default
- * values, and finally return JPEG_HEADER_OK.  On return, the application may
- * adjust the decompression parameters and then call jpeg_start_decompress.
- * (Or, if the application only wanted to determine the image parameters,
- * the data need not be decompressed.  In that case, call jpeg_abort or
- * jpeg_destroy to release any temporary space.)
- * If an abbreviated (tables only) datastream is presented, the routine will
- * return JPEG_HEADER_TABLES_ONLY upon reaching EOI.  The application may then
- * re-use the JPEG object to read the abbreviated image datastream(s).
- * It is unnecessary (but OK) to call jpeg_abort in this case.
- * The JPEG_SUSPENDED return code only occurs if the data source module
- * requests suspension of the decompressor.  In this case the application
- * should load more source data and then re-call jpeg_read_header to resume
- * processing.
- * If a non-suspending data source is used and require_image is TRUE, then the
- * return code need not be inspected since only JPEG_HEADER_OK is possible.
- *
- * This routine is now just a front end to jpeg_consume_input, with some
- * extra error checking.
- */
-
-GLOBAL(int)
-jpeg_read_header (j_decompress_ptr cinfo, boolean require_image)
-{
-  int retcode;
-
-  if (cinfo->global_state != DSTATE_START &&
-      cinfo->global_state != DSTATE_INHEADER)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  retcode = jpeg_consume_input(cinfo);
-
-  switch (retcode) {
-  case JPEG_REACHED_SOS:
-    retcode = JPEG_HEADER_OK;
-    break;
-  case JPEG_REACHED_EOI:
-    if (require_image)		/* Complain if application wanted an image */
-      ERREXIT(cinfo, JERR_NO_IMAGE);
-    /* Reset to start state; it would be safer to require the application to
-     * call jpeg_abort, but we can't change it now for compatibility reasons.
-     * A side effect is to free any temporary memory (there shouldn't be any).
-     */
-    jpeg_abort((j_common_ptr) cinfo); /* sets state = DSTATE_START */
-    retcode = JPEG_HEADER_TABLES_ONLY;
-    break;
-  case JPEG_SUSPENDED:
-    /* no work */
-    break;
-  }
-
-  return retcode;
-}
-
-
-/*
- * Consume data in advance of what the decompressor requires.
- * This can be called at any time once the decompressor object has
- * been created and a data source has been set up.
- *
- * This routine is essentially a state machine that handles a couple
- * of critical state-transition actions, namely initial setup and
- * transition from header scanning to ready-for-start_decompress.
- * All the actual input is done via the input controller's consume_input
- * method.
- */
-
-GLOBAL(int)
-jpeg_consume_input (j_decompress_ptr cinfo)
-{
-  int retcode = JPEG_SUSPENDED;
-
-  /* NB: every possible DSTATE value should be listed in this switch */
-  switch (cinfo->global_state) {
-  case DSTATE_START:
-    /* Start-of-datastream actions: reset appropriate modules */
-    (*cinfo->inputctl->reset_input_controller) (cinfo);
-    /* Initialize application's data source module */
-    (*cinfo->src->init_source) (cinfo);
-    cinfo->global_state = DSTATE_INHEADER;
-    /*FALLTHROUGH*/
-  case DSTATE_INHEADER:
-    retcode = (*cinfo->inputctl->consume_input) (cinfo);
-    if (retcode == JPEG_REACHED_SOS) { /* Found SOS, prepare to decompress */
-      /* Set up default parameters based on header data */
-      default_decompress_parms(cinfo);
-      /* Set global state: ready for start_decompress */
-      cinfo->global_state = DSTATE_READY;
-    }
-    break;
-  case DSTATE_READY:
-    /* Can't advance past first SOS until start_decompress is called */
-    retcode = JPEG_REACHED_SOS;
-    break;
-  case DSTATE_PRELOAD:
-  case DSTATE_PRESCAN:
-  case DSTATE_SCANNING:
-  case DSTATE_RAW_OK:
-  case DSTATE_BUFIMAGE:
-  case DSTATE_BUFPOST:
-  case DSTATE_STOPPING:
-    retcode = (*cinfo->inputctl->consume_input) (cinfo);
-    break;
-  default:
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  }
-  return retcode;
-}
-
-
-/*
- * Have we finished reading the input file?
- */
-
-GLOBAL(boolean)
-jpeg_input_complete (j_decompress_ptr cinfo)
-{
-  /* Check for valid jpeg object */
-  if (cinfo->global_state < DSTATE_START ||
-      cinfo->global_state > DSTATE_STOPPING)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  return cinfo->inputctl->eoi_reached;
-}
-
-
-/*
- * Is there more than one scan?
- */
-
-GLOBAL(boolean)
-jpeg_has_multiple_scans (j_decompress_ptr cinfo)
-{
-  /* Only valid after jpeg_read_header completes */
-  if (cinfo->global_state < DSTATE_READY ||
-      cinfo->global_state > DSTATE_STOPPING)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  return cinfo->inputctl->has_multiple_scans;
-}
-
-
-/*
- * Finish JPEG decompression.
- *
- * This will normally just verify the file trailer and release temp storage.
- *
- * Returns FALSE if suspended.  The return value need be inspected only if
- * a suspending data source is used.
- */
-
-GLOBAL(boolean)
-jpeg_finish_decompress (j_decompress_ptr cinfo)
-{
-  if ((cinfo->global_state == DSTATE_SCANNING ||
-       cinfo->global_state == DSTATE_RAW_OK) && ! cinfo->buffered_image) {
-    /* Terminate final pass of non-buffered mode */
-    if (cinfo->output_scanline < cinfo->output_height)
-      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
-    (*cinfo->master->finish_output_pass) (cinfo);
-    cinfo->global_state = DSTATE_STOPPING;
-  } else if (cinfo->global_state == DSTATE_BUFIMAGE) {
-    /* Finishing after a buffered-image operation */
-    cinfo->global_state = DSTATE_STOPPING;
-  } else if (cinfo->global_state != DSTATE_STOPPING) {
-    /* STOPPING = repeat call after a suspension, anything else is error */
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  }
-  /* Read until EOI */
-  while (! cinfo->inputctl->eoi_reached) {
-    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
-      return FALSE;		/* Suspend, come back later */
-  }
-  /* Do final cleanup */
-  (*cinfo->src->term_source) (cinfo);
-  /* We can use jpeg_abort to release memory and reset global_state */
-  jpeg_abort((j_common_ptr) cinfo);
-  return TRUE;
-}
diff --git a/cmake/externals/libjpeg/jdapistd.c b/cmake/externals/libjpeg/jdapistd.c
deleted file mode 100644
index 7f3a78b25..000000000
--- a/cmake/externals/libjpeg/jdapistd.c
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * jdapistd.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2002-2013 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains application interface code for the decompression half
- * of the JPEG library.  These are the "standard" API routines that are
- * used in the normal full-decompression case.  They are not used by a
- * transcoding-only application.  Note that if an application links in
- * jpeg_start_decompress, it will end up linking in the entire decompressor.
- * We thus must separate this file from jdapimin.c to avoid linking the
- * whole decompression library into a transcoder.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Forward declarations */
-LOCAL(boolean) output_pass_setup JPP((j_decompress_ptr cinfo));
-
-
-/*
- * Decompression initialization.
- * jpeg_read_header must be completed before calling this.
- *
- * If a multipass operating mode was selected, this will do all but the
- * last pass, and thus may take a great deal of time.
- *
- * Returns FALSE if suspended.  The return value need be inspected only if
- * a suspending data source is used.
- */
-
-GLOBAL(boolean)
-jpeg_start_decompress (j_decompress_ptr cinfo)
-{
-  if (cinfo->global_state == DSTATE_READY) {
-    /* First call: initialize master control, select active modules */
-    jinit_master_decompress(cinfo);
-    if (cinfo->buffered_image) {
-      /* No more work here; expecting jpeg_start_output next */
-      cinfo->global_state = DSTATE_BUFIMAGE;
-      return TRUE;
-    }
-    cinfo->global_state = DSTATE_PRELOAD;
-  }
-  if (cinfo->global_state == DSTATE_PRELOAD) {
-    /* If file has multiple scans, absorb them all into the coef buffer */
-    if (cinfo->inputctl->has_multiple_scans) {
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-      for (;;) {
-	int retcode;
-	/* Call progress monitor hook if present */
-	if (cinfo->progress != NULL)
-	  (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-	/* Absorb some more input */
-	retcode = (*cinfo->inputctl->consume_input) (cinfo);
-	if (retcode == JPEG_SUSPENDED)
-	  return FALSE;
-	if (retcode == JPEG_REACHED_EOI)
-	  break;
-	/* Advance progress counter if appropriate */
-	if (cinfo->progress != NULL &&
-	    (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
-	  if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
-	    /* jdmaster underestimated number of scans; ratchet up one scan */
-	    cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
-	  }
-	}
-      }
-#else
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif /* D_MULTISCAN_FILES_SUPPORTED */
-    }
-    cinfo->output_scan_number = cinfo->input_scan_number;
-  } else if (cinfo->global_state != DSTATE_PRESCAN)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  /* Perform any dummy output passes, and set up for the final pass */
-  return output_pass_setup(cinfo);
-}
-
-
-/*
- * Set up for an output pass, and perform any dummy pass(es) needed.
- * Common subroutine for jpeg_start_decompress and jpeg_start_output.
- * Entry: global_state = DSTATE_PRESCAN only if previously suspended.
- * Exit: If done, returns TRUE and sets global_state for proper output mode.
- *       If suspended, returns FALSE and sets global_state = DSTATE_PRESCAN.
- */
-
-LOCAL(boolean)
-output_pass_setup (j_decompress_ptr cinfo)
-{
-  if (cinfo->global_state != DSTATE_PRESCAN) {
-    /* First call: do pass setup */
-    (*cinfo->master->prepare_for_output_pass) (cinfo);
-    cinfo->output_scanline = 0;
-    cinfo->global_state = DSTATE_PRESCAN;
-  }
-  /* Loop over any required dummy passes */
-  while (cinfo->master->is_dummy_pass) {
-#ifdef QUANT_2PASS_SUPPORTED
-    /* Crank through the dummy pass */
-    while (cinfo->output_scanline < cinfo->output_height) {
-      JDIMENSION last_scanline;
-      /* Call progress monitor hook if present */
-      if (cinfo->progress != NULL) {
-	cinfo->progress->pass_counter = (long) cinfo->output_scanline;
-	cinfo->progress->pass_limit = (long) cinfo->output_height;
-	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-      }
-      /* Process some data */
-      last_scanline = cinfo->output_scanline;
-      (*cinfo->main->process_data) (cinfo, (JSAMPARRAY) NULL,
-				    &cinfo->output_scanline, (JDIMENSION) 0);
-      if (cinfo->output_scanline == last_scanline)
-	return FALSE;		/* No progress made, must suspend */
-    }
-    /* Finish up dummy pass, and set up for another one */
-    (*cinfo->master->finish_output_pass) (cinfo);
-    (*cinfo->master->prepare_for_output_pass) (cinfo);
-    cinfo->output_scanline = 0;
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif /* QUANT_2PASS_SUPPORTED */
-  }
-  /* Ready for application to drive output pass through
-   * jpeg_read_scanlines or jpeg_read_raw_data.
-   */
-  cinfo->global_state = cinfo->raw_data_out ? DSTATE_RAW_OK : DSTATE_SCANNING;
-  return TRUE;
-}
-
-
-/*
- * Read some scanlines of data from the JPEG decompressor.
- *
- * The return value will be the number of lines actually read.
- * This may be less than the number requested in several cases,
- * including bottom of image, data source suspension, and operating
- * modes that emit multiple scanlines at a time.
- *
- * Note: we warn about excess calls to jpeg_read_scanlines() since
- * this likely signals an application programmer error.  However,
- * an oversize buffer (max_lines > scanlines remaining) is not an error.
- */
-
-GLOBAL(JDIMENSION)
-jpeg_read_scanlines (j_decompress_ptr cinfo, JSAMPARRAY scanlines,
-		     JDIMENSION max_lines)
-{
-  JDIMENSION row_ctr;
-
-  if (cinfo->global_state != DSTATE_SCANNING)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  if (cinfo->output_scanline >= cinfo->output_height) {
-    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
-    return 0;
-  }
-
-  /* Call progress monitor hook if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
-    cinfo->progress->pass_limit = (long) cinfo->output_height;
-    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-  }
-
-  /* Process some data */
-  row_ctr = 0;
-  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, max_lines);
-  cinfo->output_scanline += row_ctr;
-  return row_ctr;
-}
-
-
-/*
- * Alternate entry point to read raw data.
- * Processes exactly one iMCU row per call, unless suspended.
- */
-
-GLOBAL(JDIMENSION)
-jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data,
-		    JDIMENSION max_lines)
-{
-  JDIMENSION lines_per_iMCU_row;
-
-  if (cinfo->global_state != DSTATE_RAW_OK)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  if (cinfo->output_scanline >= cinfo->output_height) {
-    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
-    return 0;
-  }
-
-  /* Call progress monitor hook if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
-    cinfo->progress->pass_limit = (long) cinfo->output_height;
-    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-  }
-
-  /* Verify that at least one iMCU row can be returned. */
-  lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_v_scaled_size;
-  if (max_lines < lines_per_iMCU_row)
-    ERREXIT(cinfo, JERR_BUFFER_SIZE);
-
-  /* Decompress directly into user's buffer. */
-  if (! (*cinfo->coef->decompress_data) (cinfo, data))
-    return 0;			/* suspension forced, can do nothing more */
-
-  /* OK, we processed one iMCU row. */
-  cinfo->output_scanline += lines_per_iMCU_row;
-  return lines_per_iMCU_row;
-}
-
-
-/* Additional entry points for buffered-image mode. */
-
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-
-/*
- * Initialize for an output pass in buffered-image mode.
- */
-
-GLOBAL(boolean)
-jpeg_start_output (j_decompress_ptr cinfo, int scan_number)
-{
-  if (cinfo->global_state != DSTATE_BUFIMAGE &&
-      cinfo->global_state != DSTATE_PRESCAN)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  /* Limit scan number to valid range */
-  if (scan_number <= 0)
-    scan_number = 1;
-  if (cinfo->inputctl->eoi_reached &&
-      scan_number > cinfo->input_scan_number)
-    scan_number = cinfo->input_scan_number;
-  cinfo->output_scan_number = scan_number;
-  /* Perform any dummy output passes, and set up for the real pass */
-  return output_pass_setup(cinfo);
-}
-
-
-/*
- * Finish up after an output pass in buffered-image mode.
- *
- * Returns FALSE if suspended.  The return value need be inspected only if
- * a suspending data source is used.
- */
-
-GLOBAL(boolean)
-jpeg_finish_output (j_decompress_ptr cinfo)
-{
-  if ((cinfo->global_state == DSTATE_SCANNING ||
-       cinfo->global_state == DSTATE_RAW_OK) && cinfo->buffered_image) {
-    /* Terminate this pass. */
-    /* We do not require the whole pass to have been completed. */
-    (*cinfo->master->finish_output_pass) (cinfo);
-    cinfo->global_state = DSTATE_BUFPOST;
-  } else if (cinfo->global_state != DSTATE_BUFPOST) {
-    /* BUFPOST = repeat call after a suspension, anything else is error */
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  }
-  /* Read markers looking for SOS or EOI */
-  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
-	 ! cinfo->inputctl->eoi_reached) {
-    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
-      return FALSE;		/* Suspend, come back later */
-  }
-  cinfo->global_state = DSTATE_BUFIMAGE;
-  return TRUE;
-}
-
-#endif /* D_MULTISCAN_FILES_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jdarith.c b/cmake/externals/libjpeg/jdarith.c
deleted file mode 100644
index 2c9abe23f..000000000
--- a/cmake/externals/libjpeg/jdarith.c
+++ /dev/null
@@ -1,796 +0,0 @@
-/*
- * jdarith.c
- *
- * Developed 1997-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains portable arithmetic entropy decoding routines for JPEG
- * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81).
- *
- * Both sequential and progressive modes are supported in this single module.
- *
- * Suspension is not currently supported in this module.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Expanded entropy decoder object for arithmetic decoding. */
-
-typedef struct {
-  struct jpeg_entropy_decoder pub; /* public fields */
-
-  INT32 c;       /* C register, base of coding interval + input bit buffer */
-  INT32 a;               /* A register, normalized size of coding interval */
-  int ct;     /* bit shift counter, # of bits left in bit buffer part of C */
-                                                         /* init: ct = -16 */
-                                                         /* run: ct = 0..7 */
-                                                         /* error: ct = -1 */
-  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
-  int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
-
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
-
-  /* Pointers to statistics areas (these workspaces have image lifespan) */
-  unsigned char * dc_stats[NUM_ARITH_TBLS];
-  unsigned char * ac_stats[NUM_ARITH_TBLS];
-
-  /* Statistics bin for coding with fixed probability 0.5 */
-  unsigned char fixed_bin[4];
-} arith_entropy_decoder;
-
-typedef arith_entropy_decoder * arith_entropy_ptr;
-
-/* The following two definitions specify the allocation chunk size
- * for the statistics area.
- * According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
- * 49 statistics bins for DC, and 245 statistics bins for AC coding.
- *
- * We use a compact representation with 1 byte per statistics bin,
- * thus the numbers directly represent byte sizes.
- * This 1 byte per statistics bin contains the meaning of the MPS
- * (more probable symbol) in the highest bit (mask 0x80), and the
- * index into the probability estimation state machine table
- * in the lower bits (mask 0x7F).
- */
-
-#define DC_STAT_BINS 64
-#define AC_STAT_BINS 256
-
-
-LOCAL(int)
-get_byte (j_decompress_ptr cinfo)
-/* Read next input byte; we do not support suspension in this module. */
-{
-  struct jpeg_source_mgr * src = cinfo->src;
-
-  if (src->bytes_in_buffer == 0)
-    if (! (*src->fill_input_buffer) (cinfo))
-      ERREXIT(cinfo, JERR_CANT_SUSPEND);
-  src->bytes_in_buffer--;
-  return GETJOCTET(*src->next_input_byte++);
-}
-
-
-/*
- * The core arithmetic decoding routine (common in JPEG and JBIG).
- * This needs to go as fast as possible.
- * Machine-dependent optimization facilities
- * are not utilized in this portable implementation.
- * However, this code should be fairly efficient and
- * may be a good base for further optimizations anyway.
- *
- * Return value is 0 or 1 (binary decision).
- *
- * Note: I've changed the handling of the code base & bit
- * buffer register C compared to other implementations
- * based on the standards layout & procedures.
- * While it also contains both the actual base of the
- * coding interval (16 bits) and the next-bits buffer,
- * the cut-point between these two parts is floating
- * (instead of fixed) with the bit shift counter CT.
- * Thus, we also need only one (variable instead of
- * fixed size) shift for the LPS/MPS decision, and
- * we can do away with any renormalization update
- * of C (except for new data insertion, of course).
- *
- * I've also introduced a new scheme for accessing
- * the probability estimation state machine table,
- * derived from Markus Kuhn's JBIG implementation.
- */
-
-LOCAL(int)
-arith_decode (j_decompress_ptr cinfo, unsigned char *st)
-{
-  register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
-  register unsigned char nl, nm;
-  register INT32 qe, temp;
-  register int sv, data;
-
-  /* Renormalization & data input per section D.2.6 */
-  while (e->a < 0x8000L) {
-    if (--e->ct < 0) {
-      /* Need to fetch next data byte */
-      if (cinfo->unread_marker)
-	data = 0;		/* stuff zero data */
-      else {
-	data = get_byte(cinfo);	/* read next input byte */
-	if (data == 0xFF) {	/* zero stuff or marker code */
-	  do data = get_byte(cinfo);
-	  while (data == 0xFF);	/* swallow extra 0xFF bytes */
-	  if (data == 0)
-	    data = 0xFF;	/* discard stuffed zero byte */
-	  else {
-	    /* Note: Different from the Huffman decoder, hitting
-	     * a marker while processing the compressed data
-	     * segment is legal in arithmetic coding.
-	     * The convention is to supply zero data
-	     * then until decoding is complete.
-	     */
-	    cinfo->unread_marker = data;
-	    data = 0;
-	  }
-	}
-      }
-      e->c = (e->c << 8) | data; /* insert data into C register */
-      if ((e->ct += 8) < 0)	 /* update bit shift counter */
-	/* Need more initial bytes */
-	if (++e->ct == 0)
-	  /* Got 2 initial bytes -> re-init A and exit loop */
-	  e->a = 0x8000L; /* => e->a = 0x10000L after loop exit */
-    }
-    e->a <<= 1;
-  }
-
-  /* Fetch values from our compact representation of Table D.3(D.2):
-   * Qe values and probability estimation state machine
-   */
-  sv = *st;
-  qe = jpeg_aritab[sv & 0x7F];	/* => Qe_Value */
-  nl = qe & 0xFF; qe >>= 8;	/* Next_Index_LPS + Switch_MPS */
-  nm = qe & 0xFF; qe >>= 8;	/* Next_Index_MPS */
-
-  /* Decode & estimation procedures per sections D.2.4 & D.2.5 */
-  temp = e->a - qe;
-  e->a = temp;
-  temp <<= e->ct;
-  if (e->c >= temp) {
-    e->c -= temp;
-    /* Conditional LPS (less probable symbol) exchange */
-    if (e->a < qe) {
-      e->a = qe;
-      *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
-    } else {
-      e->a = qe;
-      *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
-      sv ^= 0x80;		/* Exchange LPS/MPS */
-    }
-  } else if (e->a < 0x8000L) {
-    /* Conditional MPS (more probable symbol) exchange */
-    if (e->a < qe) {
-      *st = (sv & 0x80) ^ nl;	/* Estimate_after_LPS */
-      sv ^= 0x80;		/* Exchange LPS/MPS */
-    } else {
-      *st = (sv & 0x80) ^ nm;	/* Estimate_after_MPS */
-    }
-  }
-
-  return sv >> 7;
-}
-
-
-/*
- * Check for a restart marker & resynchronize decoder.
- */
-
-LOCAL(void)
-process_restart (j_decompress_ptr cinfo)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  int ci;
-  jpeg_component_info * compptr;
-
-  /* Advance past the RSTn marker */
-  if (! (*cinfo->marker->read_restart_marker) (cinfo))
-    ERREXIT(cinfo, JERR_CANT_SUSPEND);
-
-  /* Re-initialize statistics areas */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
-      MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
-      /* Reset DC predictions to 0 */
-      entropy->last_dc_val[ci] = 0;
-      entropy->dc_context[ci] = 0;
-    }
-    if ((! cinfo->progressive_mode && cinfo->lim_Se) ||
-	(cinfo->progressive_mode && cinfo->Ss)) {
-      MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
-    }
-  }
-
-  /* Reset arithmetic decoding variables */
-  entropy->c = 0;
-  entropy->a = 0;
-  entropy->ct = -16;	/* force reading 2 initial bytes to fill C */
-
-  /* Reset restart counter */
-  entropy->restarts_to_go = cinfo->restart_interval;
-}
-
-
-/*
- * Arithmetic MCU decoding.
- * Each of these routines decodes and returns one MCU's worth of
- * arithmetic-compressed coefficients.
- * The coefficients are reordered from zigzag order into natural array order,
- * but are not dequantized.
- *
- * The i'th block of the MCU is stored into the block pointed to by
- * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
- */
-
-/*
- * MCU decoding for DC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  JBLOCKROW block;
-  unsigned char *st;
-  int blkn, ci, tbl, sign;
-  int v, m;
-
-  /* Process restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      process_restart(cinfo);
-    entropy->restarts_to_go--;
-  }
-
-  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
-
-  /* Outer loop handles each block in the MCU */
-
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
-    ci = cinfo->MCU_membership[blkn];
-    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
-
-    /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
-
-    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
-    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
-
-    /* Figure F.19: Decode_DC_DIFF */
-    if (arith_decode(cinfo, st) == 0)
-      entropy->dc_context[ci] = 0;
-    else {
-      /* Figure F.21: Decoding nonzero value v */
-      /* Figure F.22: Decoding the sign of v */
-      sign = arith_decode(cinfo, st + 1);
-      st += 2; st += sign;
-      /* Figure F.23: Decoding the magnitude category of v */
-      if ((m = arith_decode(cinfo, st)) != 0) {
-	st = entropy->dc_stats[tbl] + 20;	/* Table F.4: X1 = 20 */
-	while (arith_decode(cinfo, st)) {
-	  if ((m <<= 1) == (int) 0x8000U) {
-	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	    entropy->ct = -1;			/* magnitude overflow */
-	    return TRUE;
-	  }
-	  st += 1;
-	}
-      }
-      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
-      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
-	entropy->dc_context[ci] = 0;		   /* zero diff category */
-      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
-	entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
-      else
-	entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
-      v = m;
-      /* Figure F.24: Decoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	if (arith_decode(cinfo, st)) v |= m;
-      v += 1; if (sign) v = -v;
-      entropy->last_dc_val[ci] += v;
-    }
-
-    /* Scale and output the DC coefficient (assumes jpeg_natural_order[0]=0) */
-    (*block)[0] = (JCOEF) (entropy->last_dc_val[ci] << cinfo->Al);
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for AC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  JBLOCKROW block;
-  unsigned char *st;
-  int tbl, sign, k;
-  int v, m;
-  const int * natural_order;
-
-  /* Process restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      process_restart(cinfo);
-    entropy->restarts_to_go--;
-  }
-
-  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
-
-  natural_order = cinfo->natural_order;
-
-  /* There is always only one block per MCU */
-  block = MCU_data[0];
-  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
-
-  /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
-
-  /* Figure F.20: Decode_AC_coefficients */
-  k = cinfo->Ss - 1;
-  do {
-    st = entropy->ac_stats[tbl] + 3 * k;
-    if (arith_decode(cinfo, st)) break;		/* EOB flag */
-    for (;;) {
-      k++;
-      if (arith_decode(cinfo, st + 1)) break;
-      st += 3;
-      if (k >= cinfo->Se) {
-	WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	entropy->ct = -1;			/* spectral overflow */
-	return TRUE;
-      }
-    }
-    /* Figure F.21: Decoding nonzero value v */
-    /* Figure F.22: Decoding the sign of v */
-    sign = arith_decode(cinfo, entropy->fixed_bin);
-    st += 2;
-    /* Figure F.23: Decoding the magnitude category of v */
-    if ((m = arith_decode(cinfo, st)) != 0) {
-      if (arith_decode(cinfo, st)) {
-	m <<= 1;
-	st = entropy->ac_stats[tbl] +
-	     (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
-	while (arith_decode(cinfo, st)) {
-	  if ((m <<= 1) == (int) 0x8000U) {
-	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	    entropy->ct = -1;			/* magnitude overflow */
-	    return TRUE;
-	  }
-	  st += 1;
-	}
-      }
-    }
-    v = m;
-    /* Figure F.24: Decoding the magnitude bit pattern of v */
-    st += 14;
-    while (m >>= 1)
-      if (arith_decode(cinfo, st)) v |= m;
-    v += 1; if (sign) v = -v;
-    /* Scale and output coefficient in natural (dezigzagged) order */
-    (*block)[natural_order[k]] = (JCOEF) (v << cinfo->Al);
-  } while (k < cinfo->Se);
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for DC successive approximation refinement scan.
- * Note: we assume such scans can be multi-component,
- * although the spec is not very clear on the point.
- */
-
-METHODDEF(boolean)
-decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  unsigned char *st;
-  JCOEF p1;
-  int blkn;
-
-  /* Process restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      process_restart(cinfo);
-    entropy->restarts_to_go--;
-  }
-
-  st = entropy->fixed_bin;	/* use fixed probability estimation */
-  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
-
-  /* Outer loop handles each block in the MCU */
-
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    /* Encoded data is simply the next bit of the two's-complement DC value */
-    if (arith_decode(cinfo, st))
-      MCU_data[blkn][0][0] |= p1;
-  }
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for AC successive approximation refinement scan.
- */
-
-METHODDEF(boolean)
-decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  JBLOCKROW block;
-  JCOEFPTR thiscoef;
-  unsigned char *st;
-  int tbl, k, kex;
-  JCOEF p1, m1;
-  const int * natural_order;
-
-  /* Process restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      process_restart(cinfo);
-    entropy->restarts_to_go--;
-  }
-
-  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
-
-  natural_order = cinfo->natural_order;
-
-  /* There is always only one block per MCU */
-  block = MCU_data[0];
-  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
-
-  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
-  m1 = -p1;			/* -1 in the bit position being coded */
-
-  /* Establish EOBx (previous stage end-of-block) index */
-  kex = cinfo->Se;
-  do {
-    if ((*block)[natural_order[kex]]) break;
-  } while (--kex);
-
-  k = cinfo->Ss - 1;
-  do {
-    st = entropy->ac_stats[tbl] + 3 * k;
-    if (k >= kex)
-      if (arith_decode(cinfo, st)) break;	/* EOB flag */
-    for (;;) {
-      thiscoef = *block + natural_order[++k];
-      if (*thiscoef) {				/* previously nonzero coef */
-	if (arith_decode(cinfo, st + 2)) {
-	  if (*thiscoef < 0)
-	    *thiscoef += m1;
-	  else
-	    *thiscoef += p1;
-	}
-	break;
-      }
-      if (arith_decode(cinfo, st + 1)) {	/* newly nonzero coef */
-	if (arith_decode(cinfo, entropy->fixed_bin))
-	  *thiscoef = m1;
-	else
-	  *thiscoef = p1;
-	break;
-      }
-      st += 3;
-      if (k >= cinfo->Se) {
-	WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	entropy->ct = -1;			/* spectral overflow */
-	return TRUE;
-      }
-    }
-  } while (k < cinfo->Se);
-
-  return TRUE;
-}
-
-
-/*
- * Decode one MCU's worth of arithmetic-compressed coefficients.
- */
-
-METHODDEF(boolean)
-decode_mcu (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  jpeg_component_info * compptr;
-  JBLOCKROW block;
-  unsigned char *st;
-  int blkn, ci, tbl, sign, k;
-  int v, m;
-  const int * natural_order;
-
-  /* Process restart marker if needed */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      process_restart(cinfo);
-    entropy->restarts_to_go--;
-  }
-
-  if (entropy->ct == -1) return TRUE;	/* if error do nothing */
-
-  natural_order = cinfo->natural_order;
-
-  /* Outer loop handles each block in the MCU */
-
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
-    ci = cinfo->MCU_membership[blkn];
-    compptr = cinfo->cur_comp_info[ci];
-
-    /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
-
-    tbl = compptr->dc_tbl_no;
-
-    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
-    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
-
-    /* Figure F.19: Decode_DC_DIFF */
-    if (arith_decode(cinfo, st) == 0)
-      entropy->dc_context[ci] = 0;
-    else {
-      /* Figure F.21: Decoding nonzero value v */
-      /* Figure F.22: Decoding the sign of v */
-      sign = arith_decode(cinfo, st + 1);
-      st += 2; st += sign;
-      /* Figure F.23: Decoding the magnitude category of v */
-      if ((m = arith_decode(cinfo, st)) != 0) {
-	st = entropy->dc_stats[tbl] + 20;	/* Table F.4: X1 = 20 */
-	while (arith_decode(cinfo, st)) {
-	  if ((m <<= 1) == (int) 0x8000U) {
-	    WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	    entropy->ct = -1;			/* magnitude overflow */
-	    return TRUE;
-	  }
-	  st += 1;
-	}
-      }
-      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
-      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
-	entropy->dc_context[ci] = 0;		   /* zero diff category */
-      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
-	entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
-      else
-	entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
-      v = m;
-      /* Figure F.24: Decoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	if (arith_decode(cinfo, st)) v |= m;
-      v += 1; if (sign) v = -v;
-      entropy->last_dc_val[ci] += v;
-    }
-
-    (*block)[0] = (JCOEF) entropy->last_dc_val[ci];
-
-    /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
-
-    if (cinfo->lim_Se == 0) continue;
-    tbl = compptr->ac_tbl_no;
-    k = 0;
-
-    /* Figure F.20: Decode_AC_coefficients */
-    do {
-      st = entropy->ac_stats[tbl] + 3 * k;
-      if (arith_decode(cinfo, st)) break;	/* EOB flag */
-      for (;;) {
-	k++;
-	if (arith_decode(cinfo, st + 1)) break;
-	st += 3;
-	if (k >= cinfo->lim_Se) {
-	  WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	  entropy->ct = -1;			/* spectral overflow */
-	  return TRUE;
-	}
-      }
-      /* Figure F.21: Decoding nonzero value v */
-      /* Figure F.22: Decoding the sign of v */
-      sign = arith_decode(cinfo, entropy->fixed_bin);
-      st += 2;
-      /* Figure F.23: Decoding the magnitude category of v */
-      if ((m = arith_decode(cinfo, st)) != 0) {
-	if (arith_decode(cinfo, st)) {
-	  m <<= 1;
-	  st = entropy->ac_stats[tbl] +
-	       (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
-	  while (arith_decode(cinfo, st)) {
-	    if ((m <<= 1) == (int) 0x8000U) {
-	      WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
-	      entropy->ct = -1;			/* magnitude overflow */
-	      return TRUE;
-	    }
-	    st += 1;
-	  }
-	}
-      }
-      v = m;
-      /* Figure F.24: Decoding the magnitude bit pattern of v */
-      st += 14;
-      while (m >>= 1)
-	if (arith_decode(cinfo, st)) v |= m;
-      v += 1; if (sign) v = -v;
-      (*block)[natural_order[k]] = (JCOEF) v;
-    } while (k < cinfo->lim_Se);
-  }
-
-  return TRUE;
-}
-
-
-/*
- * Initialize for an arithmetic-compressed scan.
- */
-
-METHODDEF(void)
-start_pass (j_decompress_ptr cinfo)
-{
-  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  int ci, tbl;
-  jpeg_component_info * compptr;
-
-  if (cinfo->progressive_mode) {
-    /* Validate progressive scan parameters */
-    if (cinfo->Ss == 0) {
-      if (cinfo->Se != 0)
-	goto bad;
-    } else {
-      /* need not check Ss/Se < 0 since they came from unsigned bytes */
-      if (cinfo->Se < cinfo->Ss || cinfo->Se > cinfo->lim_Se)
-	goto bad;
-      /* AC scans may have only one component */
-      if (cinfo->comps_in_scan != 1)
-	goto bad;
-    }
-    if (cinfo->Ah != 0) {
-      /* Successive approximation refinement scan: must have Al = Ah-1. */
-      if (cinfo->Ah-1 != cinfo->Al)
-	goto bad;
-    }
-    if (cinfo->Al > 13) {	/* need not check for < 0 */
-      bad:
-      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
-	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
-    }
-    /* Update progression status, and verify that scan order is legal.
-     * Note that inter-scan inconsistencies are treated as warnings
-     * not fatal errors ... not clear if this is right way to behave.
-     */
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
-      int *coef_bit_ptr = & cinfo->coef_bits[cindex][0];
-      if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
-	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
-      for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
-	int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
-	if (cinfo->Ah != expected)
-	  WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
-	coef_bit_ptr[coefi] = cinfo->Al;
-      }
-    }
-    /* Select MCU decoding routine */
-    if (cinfo->Ah == 0) {
-      if (cinfo->Ss == 0)
-	entropy->pub.decode_mcu = decode_mcu_DC_first;
-      else
-	entropy->pub.decode_mcu = decode_mcu_AC_first;
-    } else {
-      if (cinfo->Ss == 0)
-	entropy->pub.decode_mcu = decode_mcu_DC_refine;
-      else
-	entropy->pub.decode_mcu = decode_mcu_AC_refine;
-    }
-  } else {
-    /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
-     * This ought to be an error condition, but we make it a warning.
-     */
-    if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 ||
-	(cinfo->Se < DCTSIZE2 && cinfo->Se != cinfo->lim_Se))
-      WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
-    /* Select MCU decoding routine */
-    entropy->pub.decode_mcu = decode_mcu;
-  }
-
-  /* Allocate & initialize requested statistics areas */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
-      tbl = compptr->dc_tbl_no;
-      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
-	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
-      if (entropy->dc_stats[tbl] == NULL)
-	entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
-      MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
-      /* Initialize DC predictions to 0 */
-      entropy->last_dc_val[ci] = 0;
-      entropy->dc_context[ci] = 0;
-    }
-    if ((! cinfo->progressive_mode && cinfo->lim_Se) ||
-	(cinfo->progressive_mode && cinfo->Ss)) {
-      tbl = compptr->ac_tbl_no;
-      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
-	ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
-      if (entropy->ac_stats[tbl] == NULL)
-	entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
-      MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
-    }
-  }
-
-  /* Initialize arithmetic decoding variables */
-  entropy->c = 0;
-  entropy->a = 0;
-  entropy->ct = -16;	/* force reading 2 initial bytes to fill C */
-
-  /* Initialize restart counter */
-  entropy->restarts_to_go = cinfo->restart_interval;
-}
-
-
-/*
- * Finish up at the end of an arithmetic-compressed scan.
- */
-
-METHODDEF(void)
-finish_pass (j_decompress_ptr cinfo)
-{
-  /* no work necessary here */
-}
-
-
-/*
- * Module initialization routine for arithmetic entropy decoding.
- */
-
-GLOBAL(void)
-jinit_arith_decoder (j_decompress_ptr cinfo)
-{
-  arith_entropy_ptr entropy;
-  int i;
-
-  entropy = (arith_entropy_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(arith_entropy_decoder));
-  cinfo->entropy = &entropy->pub;
-  entropy->pub.start_pass = start_pass;
-  entropy->pub.finish_pass = finish_pass;
-
-  /* Mark tables unallocated */
-  for (i = 0; i < NUM_ARITH_TBLS; i++) {
-    entropy->dc_stats[i] = NULL;
-    entropy->ac_stats[i] = NULL;
-  }
-
-  /* Initialize index for fixed probability estimation */
-  entropy->fixed_bin[0] = 113;
-
-  if (cinfo->progressive_mode) {
-    /* Create progression status table */
-    int *coef_bit_ptr, ci;
-    cinfo->coef_bits = (int (*)[DCTSIZE2]) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       cinfo->num_components * DCTSIZE2 * SIZEOF(int));
-    coef_bit_ptr = & cinfo->coef_bits[0][0];
-    for (ci = 0; ci < cinfo->num_components; ci++) 
-      for (i = 0; i < DCTSIZE2; i++)
-	*coef_bit_ptr++ = -1;
-  }
-}
diff --git a/cmake/externals/libjpeg/jdatadst.c b/cmake/externals/libjpeg/jdatadst.c
deleted file mode 100644
index b3b4798ea..000000000
--- a/cmake/externals/libjpeg/jdatadst.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * jdatadst.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2009-2022 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains compression data destination routines for the case of
- * emitting JPEG data to memory or to a file (or any stdio stream).
- * While these routines are sufficient for most applications,
- * some will want to use a different destination manager.
- * IMPORTANT: we assume that fwrite() will correctly transcribe an array of
- * JOCTETs into 8-bit-wide elements on external storage.  If char is wider
- * than 8 bits on your machine, you may need to do some tweaking.
- */
-
-/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jerror.h"
-
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
-extern void * malloc JPP((size_t size));
-extern void free JPP((void *ptr));
-#endif
-
-
-/* Expanded data destination object for stdio output */
-
-#define OUTPUT_BUF_SIZE  4096	/* choose an efficiently fwrite'able size */
-
-typedef struct {
-  struct jpeg_destination_mgr pub; /* public fields */
-
-  FILE * outfile;		/* target stream */
-  JOCTET buffer[OUTPUT_BUF_SIZE]; /* output buffer */
-} my_destination_mgr;
-
-typedef my_destination_mgr * my_dest_ptr;
-
-
-/* Expanded data destination object for memory output */
-
-typedef struct {
-  struct jpeg_destination_mgr pub; /* public fields */
-
-  unsigned char ** outbuffer;	/* target buffer */
-  size_t * outsize;
-  unsigned char * newbuffer;	/* newly allocated buffer */
-  JOCTET * buffer;		/* start of buffer */
-  size_t bufsize;
-} my_mem_destination_mgr;
-
-typedef my_mem_destination_mgr * my_mem_dest_ptr;
-
-
-/*
- * Initialize destination --- called by jpeg_start_compress
- * before any data is actually written.
- */
-
-METHODDEF(void)
-init_destination (j_compress_ptr cinfo)
-{
-  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
-
-  dest->pub.next_output_byte = dest->buffer;
-  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
-}
-
-METHODDEF(void)
-init_mem_destination (j_compress_ptr cinfo)
-{
-  /* no work necessary here */
-}
-
-
-/*
- * Empty the output buffer --- called whenever buffer fills up.
- *
- * In typical applications, this should write the entire output buffer
- * (ignoring the current state of next_output_byte & free_in_buffer),
- * reset the pointer & count to the start of the buffer, and return TRUE
- * indicating that the buffer has been dumped.
- *
- * In applications that need to be able to suspend compression due to output
- * overrun, a FALSE return indicates that the buffer cannot be emptied now.
- * In this situation, the compressor will return to its caller (possibly with
- * an indication that it has not accepted all the supplied scanlines).  The
- * application should resume compression after it has made more room in the
- * output buffer.  Note that there are substantial restrictions on the use of
- * suspension --- see the documentation.
- *
- * When suspending, the compressor will back up to a convenient restart point
- * (typically the start of the current MCU). next_output_byte & free_in_buffer
- * indicate where the restart point will be if the current call returns FALSE.
- * Data beyond this point will be regenerated after resumption, so do not
- * write it out when emptying the buffer externally.
- */
-
-METHODDEF(boolean)
-empty_output_buffer (j_compress_ptr cinfo)
-{
-  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
-
-  if (JFWRITE(dest->outfile, dest->buffer, OUTPUT_BUF_SIZE) !=
-      (size_t) OUTPUT_BUF_SIZE)
-    ERREXIT(cinfo, JERR_FILE_WRITE);
-
-  dest->pub.next_output_byte = dest->buffer;
-  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
-
-  return TRUE;
-}
-
-METHODDEF(boolean)
-empty_mem_output_buffer (j_compress_ptr cinfo)
-{
-  size_t nextsize;
-  JOCTET * nextbuffer;
-  my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
-
-  /* Try to allocate new buffer with double size */
-  nextsize = dest->bufsize * 2;
-  nextbuffer = (JOCTET *) malloc(nextsize);
-
-  if (nextbuffer == NULL)
-    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 11);
-
-  MEMCOPY(nextbuffer, dest->buffer, dest->bufsize);
-
-  if (dest->newbuffer != NULL)
-    free(dest->newbuffer);
-
-  dest->newbuffer = nextbuffer;
-
-  dest->pub.next_output_byte = nextbuffer + dest->bufsize;
-  dest->pub.free_in_buffer = dest->bufsize;
-
-  dest->buffer = nextbuffer;
-  dest->bufsize = nextsize;
-
-  return TRUE;
-}
-
-
-/*
- * Terminate destination --- called by jpeg_finish_compress
- * after all data has been written.  Usually needs to flush buffer.
- *
- * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
- * application must deal with any cleanup that should happen even
- * for error exit.
- */
-
-METHODDEF(void)
-term_destination (j_compress_ptr cinfo)
-{
-  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
-  size_t datacount = OUTPUT_BUF_SIZE - dest->pub.free_in_buffer;
-
-  /* Write any data remaining in the buffer */
-  if (datacount > 0) {
-    if (JFWRITE(dest->outfile, dest->buffer, datacount) != datacount)
-      ERREXIT(cinfo, JERR_FILE_WRITE);
-  }
-  JFFLUSH(dest->outfile);
-  /* Make sure we wrote the output file OK */
-  if (JFERROR(dest->outfile))
-    ERREXIT(cinfo, JERR_FILE_WRITE);
-}
-
-METHODDEF(void)
-term_mem_destination (j_compress_ptr cinfo)
-{
-  my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
-
-  *dest->outbuffer = dest->buffer;
-  *dest->outsize = dest->bufsize - dest->pub.free_in_buffer;
-}
-
-
-/*
- * Prepare for output to a stdio stream.
- * The caller must have already opened the stream,
- * and is responsible for closing it after finishing compression.
- */
-
-GLOBAL(void)
-jpeg_stdio_dest (j_compress_ptr cinfo, FILE * outfile)
-{
-  my_dest_ptr dest;
-
-  /* The destination object is made permanent so that multiple JPEG images
-   * can be written to the same file without re-executing jpeg_stdio_dest.
-   * This makes it dangerous to use this manager and a different destination
-   * manager serially with the same JPEG object, because their private object
-   * sizes may be different.  Caveat programmer.
-   */
-  if (cinfo->dest == NULL) {	/* first time for this JPEG object? */
-    cinfo->dest = (struct jpeg_destination_mgr *) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(my_destination_mgr));
-  }
-
-  dest = (my_dest_ptr) cinfo->dest;
-  dest->pub.init_destination = init_destination;
-  dest->pub.empty_output_buffer = empty_output_buffer;
-  dest->pub.term_destination = term_destination;
-  dest->outfile = outfile;
-}
-
-
-/*
- * Prepare for output to a memory buffer.
- * The caller may supply an own initial buffer with appropriate size.
- * Otherwise, or when the actual data output exceeds the given size,
- * the library adapts the buffer size as necessary.
- * The standard library functions malloc/free are used for allocating
- * larger memory, so the buffer is available to the application after
- * finishing compression, and then the application is responsible for
- * freeing the requested memory.
- * Note:  An initial buffer supplied by the caller is expected to be
- * managed by the application.  The library does not free such buffer
- * when allocating a larger buffer.
- */
-
-GLOBAL(void)
-jpeg_mem_dest (j_compress_ptr cinfo,
-	       unsigned char ** outbuffer, size_t * outsize)
-{
-  my_mem_dest_ptr dest;
-
-  if (outbuffer == NULL || outsize == NULL)	/* sanity check */
-    ERREXIT(cinfo, JERR_BUFFER_SIZE);
-
-  /* The destination object is made permanent so that multiple JPEG images
-   * can be written to the same buffer without re-executing jpeg_mem_dest.
-   */
-  if (cinfo->dest == NULL) {	/* first time for this JPEG object? */
-    cinfo->dest = (struct jpeg_destination_mgr *) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(my_mem_destination_mgr));
-  }
-
-  dest = (my_mem_dest_ptr) cinfo->dest;
-  dest->pub.init_destination = init_mem_destination;
-  dest->pub.empty_output_buffer = empty_mem_output_buffer;
-  dest->pub.term_destination = term_mem_destination;
-  dest->outbuffer = outbuffer;
-  dest->outsize = outsize;
-  dest->newbuffer = NULL;
-
-  if (*outbuffer == NULL || *outsize == 0) {
-    /* Allocate initial buffer */
-    dest->newbuffer = *outbuffer = (unsigned char *) malloc(OUTPUT_BUF_SIZE);
-    if (dest->newbuffer == NULL)
-      ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
-    *outsize = OUTPUT_BUF_SIZE;
-  }
-
-  dest->pub.next_output_byte = dest->buffer = *outbuffer;
-  dest->pub.free_in_buffer = dest->bufsize = *outsize;
-}
diff --git a/cmake/externals/libjpeg/jdatasrc.c b/cmake/externals/libjpeg/jdatasrc.c
deleted file mode 100644
index fd7a1a594..000000000
--- a/cmake/externals/libjpeg/jdatasrc.c
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * jdatasrc.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2009-2022 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains decompression data source routines for the case of
- * reading JPEG data from memory or from a file (or any stdio stream).
- * While these routines are sufficient for most applications,
- * some will want to use a different source manager.
- * IMPORTANT: we assume that fread() will correctly transcribe an array of
- * JOCTETs from 8-bit-wide elements on external storage.  If char is wider
- * than 8 bits on your machine, you may need to do some tweaking.
- */
-
-/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jerror.h"
-
-
-/* Expanded data source object for stdio input */
-
-#define INPUT_BUF_SIZE  4096	/* choose an efficiently fread'able size */
-
-typedef struct {
-  struct jpeg_source_mgr pub;	/* public fields */
-
-  FILE * infile;		/* source stream */
-  JOCTET buffer[INPUT_BUF_SIZE]; /* input buffer */
-  boolean start_of_file;	/* have we gotten any data yet? */
-} my_source_mgr;
-
-typedef my_source_mgr * my_src_ptr;
-
-
-/*
- * Initialize source --- called by jpeg_read_header
- * before any data is actually read.
- */
-
-METHODDEF(void)
-init_source (j_decompress_ptr cinfo)
-{
-  my_src_ptr src = (my_src_ptr) cinfo->src;
-
-  /* We reset the empty-input-file flag for each image,
-   * but we don't clear the input buffer.
-   * This is correct behavior for reading a series of images from one source.
-   */
-  src->start_of_file = TRUE;
-}
-
-METHODDEF(void)
-init_mem_source (j_decompress_ptr cinfo)
-{
-  /* no work necessary here */
-}
-
-
-/*
- * Fill the input buffer --- called whenever buffer is emptied.
- *
- * In typical applications, this should read fresh data into the buffer
- * (ignoring the current state of next_input_byte & bytes_in_buffer),
- * reset the pointer & count to the start of the buffer, and return TRUE
- * indicating that the buffer has been reloaded.  It is not necessary to
- * fill the buffer entirely, only to obtain at least one more byte.
- *
- * There is no such thing as an EOF return.  If the end of the file has been
- * reached, the routine has a choice of ERREXIT() or inserting fake data into
- * the buffer.  In most cases, generating a warning message and inserting a
- * fake EOI marker is the best course of action --- this will allow the
- * decompressor to output however much of the image is there.  However,
- * the resulting error message is misleading if the real problem is an empty
- * input file, so we handle that case specially.
- *
- * In applications that need to be able to suspend compression due to input
- * not being available yet, a FALSE return indicates that no more data can be
- * obtained right now, but more may be forthcoming later.  In this situation,
- * the decompressor will return to its caller (with an indication of the
- * number of scanlines it has read, if any).  The application should resume
- * decompression after it has loaded more data into the input buffer.  Note
- * that there are substantial restrictions on the use of suspension --- see
- * the documentation.
- *
- * When suspending, the decompressor will back up to a convenient restart point
- * (typically the start of the current MCU). next_input_byte & bytes_in_buffer
- * indicate where the restart point will be if the current call returns FALSE.
- * Data beyond this point must be rescanned after resumption, so move it to
- * the front of the buffer rather than discarding it.
- */
-
-METHODDEF(boolean)
-fill_input_buffer (j_decompress_ptr cinfo)
-{
-  my_src_ptr src = (my_src_ptr) cinfo->src;
-  size_t nbytes;
-
-  nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE);
-
-  if (nbytes <= 0) {
-    if (src->start_of_file)	/* Treat empty input file as fatal error */
-      ERREXIT(cinfo, JERR_INPUT_EMPTY);
-    WARNMS(cinfo, JWRN_JPEG_EOF);
-    /* Insert a fake EOI marker */
-    src->buffer[0] = (JOCTET) 0xFF;
-    src->buffer[1] = (JOCTET) JPEG_EOI;
-    nbytes = 2;
-  }
-
-  src->pub.next_input_byte = src->buffer;
-  src->pub.bytes_in_buffer = nbytes;
-  src->start_of_file = FALSE;
-
-  return TRUE;
-}
-
-METHODDEF(boolean)
-fill_mem_input_buffer (j_decompress_ptr cinfo)
-{
-  static const JOCTET mybuffer[4] = {
-    (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0
-  };
-
-  /* The whole JPEG data is expected to reside in the supplied memory
-   * buffer, so any request for more data beyond the given buffer size
-   * is treated as an error.
-   */
-  WARNMS(cinfo, JWRN_JPEG_EOF);
-
-  /* Insert a fake EOI marker */
-
-  cinfo->src->next_input_byte = mybuffer;
-  cinfo->src->bytes_in_buffer = 2;
-
-  return TRUE;
-}
-
-
-/*
- * Skip data --- used to skip over a potentially large amount of
- * uninteresting data (such as an APPn marker).
- *
- * Writers of suspendable-input applications must note that skip_input_data
- * is not granted the right to give a suspension return.  If the skip extends
- * beyond the data currently in the buffer, the buffer can be marked empty so
- * that the next read will cause a fill_input_buffer call that can suspend.
- * Arranging for additional bytes to be discarded before reloading the input
- * buffer is the application writer's problem.
- */
-
-METHODDEF(void)
-skip_input_data (j_decompress_ptr cinfo, long num_bytes)
-{
-  struct jpeg_source_mgr * src = cinfo->src;
-  size_t nbytes;
-
-  /* Just a dumb implementation for now.  Could use fseek() except
-   * it doesn't work on pipes.  Not clear that being smart is worth
-   * any trouble anyway --- large skips are infrequent.
-   */
-  if (num_bytes > 0) {
-    nbytes = (size_t) num_bytes;
-    while (nbytes > src->bytes_in_buffer) {
-      nbytes -= src->bytes_in_buffer;
-      (void) (*src->fill_input_buffer) (cinfo);
-      /* note we assume that fill_input_buffer will never return FALSE,
-       * so suspension need not be handled.
-       */
-    }
-    src->next_input_byte += nbytes;
-    src->bytes_in_buffer -= nbytes;
-  }
-}
-
-
-/*
- * An additional method that can be provided by data source modules is the
- * resync_to_restart method for error recovery in the presence of RST markers.
- * For the moment, this source module just uses the default resync method
- * provided by the JPEG library.  That method assumes that no backtracking
- * is possible.
- */
-
-
-/*
- * Terminate source --- called by jpeg_finish_decompress
- * after all data has been read.  Often a no-op.
- *
- * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
- * application must deal with any cleanup that should happen even
- * for error exit.
- */
-
-METHODDEF(void)
-term_source (j_decompress_ptr cinfo)
-{
-  /* no work necessary here */
-}
-
-
-/*
- * Prepare for input from a stdio stream.
- * The caller must have already opened the stream,
- * and is responsible for closing it after finishing decompression.
- */
-
-GLOBAL(void)
-jpeg_stdio_src (j_decompress_ptr cinfo, FILE * infile)
-{
-  my_src_ptr src;
-
-  /* The source object including the input buffer is made permanent so that
-   * a series of JPEG images can be read from the same file by calling
-   * jpeg_stdio_src only before the first one.  (If we discarded the buffer
-   * at the end of one image, we'd likely lose the start of the next one.)
-   * This makes it unsafe to use this manager and a different source
-   * manager serially with the same JPEG object.  Caveat programmer.
-   */
-  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
-    cinfo->src = (struct jpeg_source_mgr *) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(my_source_mgr));
-  }
-
-  src = (my_src_ptr) cinfo->src;
-  src->pub.init_source = init_source;
-  src->pub.fill_input_buffer = fill_input_buffer;
-  src->pub.skip_input_data = skip_input_data;
-  src->pub.resync_to_restart = jpeg_resync_to_restart; /* use default method */
-  src->pub.term_source = term_source;
-  src->infile = infile;
-  src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */
-  src->pub.next_input_byte = NULL; /* until buffer loaded */
-}
-
-
-/*
- * Prepare for input from a supplied memory buffer.
- * The buffer must contain the whole JPEG data.
- */
-
-GLOBAL(void)
-jpeg_mem_src (j_decompress_ptr cinfo,
-	      const unsigned char * inbuffer, size_t insize)
-{
-  struct jpeg_source_mgr * src;
-
-  if (inbuffer == NULL || insize == 0)	/* Treat empty input as fatal error */
-    ERREXIT(cinfo, JERR_INPUT_EMPTY);
-
-  /* The source object is made permanent so that a series of JPEG images
-   * can be read from the same buffer by calling jpeg_mem_src only before
-   * the first one.
-   */
-  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
-    cinfo->src = (struct jpeg_source_mgr *) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(struct jpeg_source_mgr));
-  }
-
-  src = cinfo->src;
-  src->init_source = init_mem_source;
-  src->fill_input_buffer = fill_mem_input_buffer;
-  src->skip_input_data = skip_input_data;
-  src->resync_to_restart = jpeg_resync_to_restart; /* use default method */
-  src->term_source = term_source;
-  src->bytes_in_buffer = insize;
-  src->next_input_byte = (const JOCTET *) inbuffer;
-}
diff --git a/cmake/externals/libjpeg/jdcoefct.c b/cmake/externals/libjpeg/jdcoefct.c
deleted file mode 100644
index 79ba42014..000000000
--- a/cmake/externals/libjpeg/jdcoefct.c
+++ /dev/null
@@ -1,744 +0,0 @@
-/*
- * jdcoefct.c
- *
- * Copyright (C) 1994-1997, Thomas G. Lane.
- * Modified 2002-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the coefficient buffer controller for decompression.
- * This controller is the top level of the JPEG decompressor proper.
- * The coefficient buffer lies between entropy decoding and inverse-DCT steps.
- *
- * In buffered-image mode, this controller is the interface between
- * input-oriented processing and output-oriented processing.
- * Also, the input side (only) is used when reading a file for transcoding.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Block smoothing is only applicable for progressive JPEG, so: */
-#ifndef D_PROGRESSIVE_SUPPORTED
-#undef BLOCK_SMOOTHING_SUPPORTED
-#endif
-
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_d_coef_controller pub; /* public fields */
-
-  /* These variables keep track of the current location of the input side. */
-  /* cinfo->input_iMCU_row is also used for this. */
-  JDIMENSION MCU_ctr;		/* counts MCUs processed in current row */
-  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
-  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
-
-  /* The output side's location is represented by cinfo->output_iMCU_row. */
-
-  /* In single-pass modes, it's sufficient to buffer just one MCU.
-   * We append a workspace of D_MAX_BLOCKS_IN_MCU coefficient blocks,
-   * and let the entropy decoder write into that workspace each time.
-   * In multi-pass modes, this array points to the current MCU's blocks
-   * within the virtual arrays; it is used only by the input side.
-   */
-  JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU];
-
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-  /* In multi-pass modes, we need a virtual block array for each component. */
-  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
-#endif
-
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-  /* When doing block smoothing, we latch coefficient Al values here */
-  int * coef_bits_latch;
-#define SAVED_COEFS  6		/* we save coef_bits[0..5] */
-#endif
-
-  /* Workspace for single-pass modes (omitted otherwise). */
-  JBLOCK blk_buffer[D_MAX_BLOCKS_IN_MCU];
-} my_coef_controller;
-
-typedef my_coef_controller * my_coef_ptr;
-
-
-/* Forward declarations */
-METHODDEF(int) decompress_onepass
-	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-METHODDEF(int) decompress_data
-	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
-#endif
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-LOCAL(boolean) smoothing_ok JPP((j_decompress_ptr cinfo));
-METHODDEF(int) decompress_smooth_data
-	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
-#endif
-
-
-LOCAL(void)
-start_iMCU_row (j_decompress_ptr cinfo)
-/* Reset within-iMCU-row counters for a new row (input side) */
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  /* In an interleaved scan, an MCU row is the same as an iMCU row.
-   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
-   * But at the bottom of the image, process only what's left.
-   */
-  if (cinfo->comps_in_scan > 1) {
-    coef->MCU_rows_per_iMCU_row = 1;
-  } else {
-    if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1))
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
-    else
-      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
-  }
-
-  coef->MCU_ctr = 0;
-  coef->MCU_vert_offset = 0;
-}
-
-
-/*
- * Initialize for an input processing pass.
- */
-
-METHODDEF(void)
-start_input_pass (j_decompress_ptr cinfo)
-{
-  cinfo->input_iMCU_row = 0;
-  start_iMCU_row(cinfo);
-}
-
-
-/*
- * Initialize for an output processing pass.
- */
-
-METHODDEF(void)
-start_output_pass (j_decompress_ptr cinfo)
-{
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-
-  /* If multipass, check to see whether to use block smoothing on this pass */
-  if (coef->pub.coef_arrays != NULL) {
-    if (cinfo->do_block_smoothing && smoothing_ok(cinfo))
-      coef->pub.decompress_data = decompress_smooth_data;
-    else
-      coef->pub.decompress_data = decompress_data;
-  }
-#endif
-  cinfo->output_iMCU_row = 0;
-}
-
-
-/*
- * Decompress and return some data in the single-pass case.
- * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
- * Input and output must run in lockstep since we have only a one-MCU buffer.
- * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
- *
- * NB: output_buf contains a plane for each component in image,
- * which we index according to the component's SOF position.
- */
-
-METHODDEF(int)
-decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
-  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  int ci, xindex, yindex, yoffset, useful_width;
-  JBLOCKROW blkp;
-  JSAMPARRAY output_ptr;
-  JDIMENSION start_col, output_col;
-  jpeg_component_info *compptr;
-  inverse_DCT_method_ptr inverse_DCT;
-
-  /* Loop to process as much as one whole iMCU row */
-  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
-       yoffset++) {
-    for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
-	 MCU_col_num++) {
-      blkp = coef->blk_buffer;	/* pointer to current DCT block within MCU */
-      /* Try to fetch an MCU.  Entropy decoder expects buffer to be zeroed. */
-      if (cinfo->lim_Se)	/* can bypass in DC only case */
-	MEMZERO(blkp, cinfo->blocks_in_MCU * SIZEOF(JBLOCK));
-      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->MCU_ctr = MCU_col_num;
-	return JPEG_SUSPENDED;
-      }
-      /* Determine where data should go in output_buf and do the IDCT thing.
-       * We skip dummy blocks at the right and bottom edges (but blkp gets
-       * incremented past them!).
-       */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	/* Don't bother to IDCT an uninteresting component. */
-	if (! compptr->component_needed) {
-	  blkp += compptr->MCU_blocks;
-	  continue;
-	}
-	inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index];
-	output_ptr = output_buf[compptr->component_index] +
-	  yoffset * compptr->DCT_v_scaled_size;
-	useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
-						    : compptr->last_col_width;
-	start_col = MCU_col_num * compptr->MCU_sample_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  if (cinfo->input_iMCU_row < last_iMCU_row ||
-	      yoffset + yindex < compptr->last_row_height) {
-	    output_col = start_col;
-	    for (xindex = 0; xindex < useful_width; xindex++) {
-	      (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) (blkp + xindex),
-			      output_ptr, output_col);
-	      output_col += compptr->DCT_h_scaled_size;
-	    }
-	    output_ptr += compptr->DCT_v_scaled_size;
-	  }
-	  blkp += compptr->MCU_width;
-	}
-      }
-    }
-    /* Completed an MCU row, but perhaps not an iMCU row */
-    coef->MCU_ctr = 0;
-  }
-  /* Completed the iMCU row, advance counters for next one */
-  cinfo->output_iMCU_row++;
-  if (++(cinfo->input_iMCU_row) <= last_iMCU_row) {
-    start_iMCU_row(cinfo);
-    return JPEG_ROW_COMPLETED;
-  }
-  /* Completed the scan */
-  (*cinfo->inputctl->finish_input_pass) (cinfo);
-  return JPEG_SCAN_COMPLETED;
-}
-
-
-/*
- * Dummy consume-input routine for single-pass operation.
- */
-
-METHODDEF(int)
-dummy_consume_data (j_decompress_ptr cinfo)
-{
-  return JPEG_SUSPENDED;	/* Always indicate nothing was done */
-}
-
-
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-
-/*
- * Consume input data and store it in the full-image coefficient buffer.
- * We read as much as one fully interleaved MCU row ("iMCU" row) per call,
- * ie, v_samp_factor block rows for each component in the scan.
- * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
- */
-
-METHODDEF(int)
-consume_data (j_decompress_ptr cinfo)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
-  int ci, xindex, yindex, yoffset;
-  JDIMENSION start_col;
-  JBLOCKARRAY blkp;
-  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
-  JBLOCKROW buffer_ptr;
-  jpeg_component_info *compptr;
-
-  /* Align the virtual buffers for the components used in this scan. */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    buffer[ci] = (*cinfo->mem->access_virt_barray)
-      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
-       cinfo->input_iMCU_row * compptr->v_samp_factor,
-       (JDIMENSION) compptr->v_samp_factor, TRUE);
-    /* Note: entropy decoder expects buffer to be zeroed,
-     * but this is handled automatically by the memory manager
-     * because we requested a pre-zeroed array.
-     */
-  }
-
-  /* Loop to process one whole iMCU row */
-  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
-       yoffset++) {
-    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
-	 MCU_col_num++) {
-      /* Construct list of pointers to DCT blocks belonging to this MCU */
-      blkp = coef->MCU_buffer;	/* pointer to current DCT block within MCU */
-      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	start_col = MCU_col_num * compptr->MCU_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  buffer_ptr = buffer[ci][yoffset + yindex] + start_col;
-	  xindex = compptr->MCU_width;
-	  do {
-	    *blkp++ = buffer_ptr++;
-	  } while (--xindex);
-	}
-      }
-      /* Try to fetch the MCU. */
-      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->MCU_ctr = MCU_col_num;
-	return JPEG_SUSPENDED;
-      }
-    }
-    /* Completed an MCU row, but perhaps not an iMCU row */
-    coef->MCU_ctr = 0;
-  }
-  /* Completed the iMCU row, advance counters for next one */
-  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
-    start_iMCU_row(cinfo);
-    return JPEG_ROW_COMPLETED;
-  }
-  /* Completed the scan */
-  (*cinfo->inputctl->finish_input_pass) (cinfo);
-  return JPEG_SCAN_COMPLETED;
-}
-
-
-/*
- * Decompress and return some data in the multi-pass case.
- * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
- * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
- *
- * NB: output_buf contains a plane for each component in image.
- */
-
-METHODDEF(int)
-decompress_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  JDIMENSION block_num;
-  int ci, block_row, block_rows;
-  JBLOCKARRAY buffer;
-  JBLOCKROW buffer_ptr;
-  JSAMPARRAY output_ptr;
-  JDIMENSION output_col;
-  jpeg_component_info *compptr;
-  inverse_DCT_method_ptr inverse_DCT;
-
-  /* Force some input to be done if we are getting ahead of the input. */
-  while (cinfo->input_scan_number < cinfo->output_scan_number ||
-	 (cinfo->input_scan_number == cinfo->output_scan_number &&
-	  cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) {
-    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
-      return JPEG_SUSPENDED;
-  }
-
-  /* OK, output from the virtual arrays. */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Don't bother to IDCT an uninteresting component. */
-    if (! compptr->component_needed)
-      continue;
-    /* Align the virtual buffer for this component. */
-    buffer = (*cinfo->mem->access_virt_barray)
-      ((j_common_ptr) cinfo, coef->whole_image[ci],
-       cinfo->output_iMCU_row * compptr->v_samp_factor,
-       (JDIMENSION) compptr->v_samp_factor, FALSE);
-    /* Count non-dummy DCT block rows in this iMCU row. */
-    if (cinfo->output_iMCU_row < last_iMCU_row)
-      block_rows = compptr->v_samp_factor;
-    else {
-      /* NB: can't use last_row_height here; it is input-side-dependent! */
-      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
-      if (block_rows == 0) block_rows = compptr->v_samp_factor;
-    }
-    inverse_DCT = cinfo->idct->inverse_DCT[ci];
-    output_ptr = output_buf[ci];
-    /* Loop over all DCT blocks to be processed. */
-    for (block_row = 0; block_row < block_rows; block_row++) {
-      buffer_ptr = buffer[block_row];
-      output_col = 0;
-      for (block_num = 0; block_num < compptr->width_in_blocks; block_num++) {
-	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr,
-			output_ptr, output_col);
-	buffer_ptr++;
-	output_col += compptr->DCT_h_scaled_size;
-      }
-      output_ptr += compptr->DCT_v_scaled_size;
-    }
-  }
-
-  if (++(cinfo->output_iMCU_row) <= last_iMCU_row)
-    return JPEG_ROW_COMPLETED;
-  return JPEG_SCAN_COMPLETED;
-}
-
-#endif /* D_MULTISCAN_FILES_SUPPORTED */
-
-
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-
-/*
- * This code applies interblock smoothing as described by section K.8
- * of the JPEG standard: the first 5 AC coefficients are estimated from
- * the DC values of a DCT block and its 8 neighboring blocks.
- * We apply smoothing only for progressive JPEG decoding, and only if
- * the coefficients it can estimate are not yet known to full precision.
- */
-
-/* Natural-order array positions of the first 5 zigzag-order coefficients */
-#define Q01_POS  1
-#define Q10_POS  8
-#define Q20_POS  16
-#define Q11_POS  9
-#define Q02_POS  2
-
-/*
- * Determine whether block smoothing is applicable and safe.
- * We also latch the current states of the coef_bits[] entries for the
- * AC coefficients; otherwise, if the input side of the decompressor
- * advances into a new scan, we might think the coefficients are known
- * more accurately than they really are.
- */
-
-LOCAL(boolean)
-smoothing_ok (j_decompress_ptr cinfo)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  boolean smoothing_useful = FALSE;
-  int ci, coefi;
-  jpeg_component_info *compptr;
-  JQUANT_TBL * qtable;
-  int * coef_bits;
-  int * coef_bits_latch;
-
-  if (! cinfo->progressive_mode || cinfo->coef_bits == NULL)
-    return FALSE;
-
-  /* Allocate latch area if not already done */
-  if (coef->coef_bits_latch == NULL)
-    coef->coef_bits_latch = (int *) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       cinfo->num_components * (SAVED_COEFS * SIZEOF(int)));
-  coef_bits_latch = coef->coef_bits_latch;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* All components' quantization values must already be latched. */
-    if ((qtable = compptr->quant_table) == NULL)
-      return FALSE;
-    /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */
-    if (qtable->quantval[0] == 0 ||
-	qtable->quantval[Q01_POS] == 0 ||
-	qtable->quantval[Q10_POS] == 0 ||
-	qtable->quantval[Q20_POS] == 0 ||
-	qtable->quantval[Q11_POS] == 0 ||
-	qtable->quantval[Q02_POS] == 0)
-      return FALSE;
-    /* DC values must be at least partly known for all components. */
-    coef_bits = cinfo->coef_bits[ci];
-    if (coef_bits[0] < 0)
-      return FALSE;
-    /* Block smoothing is helpful if some AC coefficients remain inaccurate. */
-    for (coefi = 1; coefi <= 5; coefi++) {
-      coef_bits_latch[coefi] = coef_bits[coefi];
-      if (coef_bits[coefi] != 0)
-	smoothing_useful = TRUE;
-    }
-    coef_bits_latch += SAVED_COEFS;
-  }
-
-  return smoothing_useful;
-}
-
-
-/*
- * Variant of decompress_data for use when doing block smoothing.
- */
-
-METHODDEF(int)
-decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
-{
-  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
-  JDIMENSION block_num, last_block_column;
-  int ci, block_row, block_rows, access_rows;
-  JBLOCKARRAY buffer;
-  JBLOCKROW buffer_ptr, prev_block_row, next_block_row;
-  JSAMPARRAY output_ptr;
-  JDIMENSION output_col;
-  jpeg_component_info *compptr;
-  inverse_DCT_method_ptr inverse_DCT;
-  boolean first_row, last_row;
-  JBLOCK workspace;
-  int *coef_bits;
-  JQUANT_TBL *quanttbl;
-  INT32 Q00,Q01,Q02,Q10,Q11,Q20, num;
-  int DC1,DC2,DC3,DC4,DC5,DC6,DC7,DC8,DC9;
-  int Al, pred;
-
-  /* Force some input to be done if we are getting ahead of the input. */
-  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
-	 ! cinfo->inputctl->eoi_reached) {
-    if (cinfo->input_scan_number == cinfo->output_scan_number) {
-      /* If input is working on current scan, we ordinarily want it to
-       * have completed the current row.  But if input scan is DC,
-       * we want it to keep one row ahead so that next block row's DC
-       * values are up to date.
-       */
-      JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0;
-      if (cinfo->input_iMCU_row > cinfo->output_iMCU_row+delta)
-	break;
-    }
-    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
-      return JPEG_SUSPENDED;
-  }
-
-  /* OK, output from the virtual arrays. */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Don't bother to IDCT an uninteresting component. */
-    if (! compptr->component_needed)
-      continue;
-    /* Count non-dummy DCT block rows in this iMCU row. */
-    if (cinfo->output_iMCU_row < last_iMCU_row) {
-      block_rows = compptr->v_samp_factor;
-      access_rows = block_rows * 2; /* this and next iMCU row */
-      last_row = FALSE;
-    } else {
-      /* NB: can't use last_row_height here; it is input-side-dependent! */
-      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
-      if (block_rows == 0) block_rows = compptr->v_samp_factor;
-      access_rows = block_rows; /* this iMCU row only */
-      last_row = TRUE;
-    }
-    /* Align the virtual buffer for this component. */
-    if (cinfo->output_iMCU_row > 0) {
-      access_rows += compptr->v_samp_factor; /* prior iMCU row too */
-      buffer = (*cinfo->mem->access_virt_barray)
-	((j_common_ptr) cinfo, coef->whole_image[ci],
-	 (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
-	 (JDIMENSION) access_rows, FALSE);
-      buffer += compptr->v_samp_factor;	/* point to current iMCU row */
-      first_row = FALSE;
-    } else {
-      buffer = (*cinfo->mem->access_virt_barray)
-	((j_common_ptr) cinfo, coef->whole_image[ci],
-	 (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE);
-      first_row = TRUE;
-    }
-    /* Fetch component-dependent info */
-    coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
-    quanttbl = compptr->quant_table;
-    Q00 = quanttbl->quantval[0];
-    Q01 = quanttbl->quantval[Q01_POS];
-    Q10 = quanttbl->quantval[Q10_POS];
-    Q20 = quanttbl->quantval[Q20_POS];
-    Q11 = quanttbl->quantval[Q11_POS];
-    Q02 = quanttbl->quantval[Q02_POS];
-    inverse_DCT = cinfo->idct->inverse_DCT[ci];
-    output_ptr = output_buf[ci];
-    /* Loop over all DCT blocks to be processed. */
-    for (block_row = 0; block_row < block_rows; block_row++) {
-      buffer_ptr = buffer[block_row];
-      if (first_row && block_row == 0)
-	prev_block_row = buffer_ptr;
-      else
-	prev_block_row = buffer[block_row-1];
-      if (last_row && block_row == block_rows-1)
-	next_block_row = buffer_ptr;
-      else
-	next_block_row = buffer[block_row+1];
-      /* We fetch the surrounding DC values using a sliding-register approach.
-       * Initialize all nine here so as to do the right thing on narrow pics.
-       */
-      DC1 = DC2 = DC3 = (int) prev_block_row[0][0];
-      DC4 = DC5 = DC6 = (int) buffer_ptr[0][0];
-      DC7 = DC8 = DC9 = (int) next_block_row[0][0];
-      output_col = 0;
-      last_block_column = compptr->width_in_blocks - 1;
-      for (block_num = 0; block_num <= last_block_column; block_num++) {
-	/* Fetch current DCT block into workspace so we can modify it. */
-	jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1);
-	/* Update DC values */
-	if (block_num < last_block_column) {
-	  DC3 = (int) prev_block_row[1][0];
-	  DC6 = (int) buffer_ptr[1][0];
-	  DC9 = (int) next_block_row[1][0];
-	}
-	/* Compute coefficient estimates per K.8.
-	 * An estimate is applied only if coefficient is still zero,
-	 * and is not known to be fully accurate.
-	 */
-	/* AC01 */
-	if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) {
-	  num = 36 * Q00 * (DC4 - DC6);
-	  if (num >= 0) {
-	    pred = (int) (((Q01<<7) + num) / (Q01<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q01<<7) - num) / (Q01<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[1] = (JCOEF) pred;
-	}
-	/* AC10 */
-	if ((Al=coef_bits[2]) != 0 && workspace[8] == 0) {
-	  num = 36 * Q00 * (DC2 - DC8);
-	  if (num >= 0) {
-	    pred = (int) (((Q10<<7) + num) / (Q10<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q10<<7) - num) / (Q10<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[8] = (JCOEF) pred;
-	}
-	/* AC20 */
-	if ((Al=coef_bits[3]) != 0 && workspace[16] == 0) {
-	  num = 9 * Q00 * (DC2 + DC8 - 2*DC5);
-	  if (num >= 0) {
-	    pred = (int) (((Q20<<7) + num) / (Q20<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q20<<7) - num) / (Q20<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[16] = (JCOEF) pred;
-	}
-	/* AC11 */
-	if ((Al=coef_bits[4]) != 0 && workspace[9] == 0) {
-	  num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
-	  if (num >= 0) {
-	    pred = (int) (((Q11<<7) + num) / (Q11<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q11<<7) - num) / (Q11<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[9] = (JCOEF) pred;
-	}
-	/* AC02 */
-	if ((Al=coef_bits[5]) != 0 && workspace[2] == 0) {
-	  num = 9 * Q00 * (DC4 + DC6 - 2*DC5);
-	  if (num >= 0) {
-	    pred = (int) (((Q02<<7) + num) / (Q02<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q02<<7) - num) / (Q02<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[2] = (JCOEF) pred;
-	}
-	/* OK, do the IDCT */
-	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) workspace,
-			output_ptr, output_col);
-	/* Advance for next column */
-	DC1 = DC2; DC2 = DC3;
-	DC4 = DC5; DC5 = DC6;
-	DC7 = DC8; DC8 = DC9;
-	buffer_ptr++, prev_block_row++, next_block_row++;
-	output_col += compptr->DCT_h_scaled_size;
-      }
-      output_ptr += compptr->DCT_v_scaled_size;
-    }
-  }
-
-  if (++(cinfo->output_iMCU_row) <= last_iMCU_row)
-    return JPEG_ROW_COMPLETED;
-  return JPEG_SCAN_COMPLETED;
-}
-
-#endif /* BLOCK_SMOOTHING_SUPPORTED */
-
-
-/*
- * Initialize coefficient buffer controller.
- */
-
-GLOBAL(void)
-jinit_d_coef_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
-{
-  my_coef_ptr coef;
-
-  if (need_full_buffer) {
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-    /* Allocate a full-image virtual array for each component, */
-    /* padded to a multiple of samp_factor DCT blocks in each direction. */
-    /* Note we ask for a pre-zeroed array. */
-    int ci, access_rows;
-    jpeg_component_info *compptr;
-
-    coef = (my_coef_ptr) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       SIZEOF(my_coef_controller) - SIZEOF(coef->blk_buffer));
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      access_rows = compptr->v_samp_factor;
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-      /* If block smoothing could be used, need a bigger window */
-      if (cinfo->progressive_mode)
-	access_rows *= 3;
-#endif
-      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE,
-	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
-				(long) compptr->h_samp_factor),
-	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
-				(long) compptr->v_samp_factor),
-	 (JDIMENSION) access_rows);
-    }
-    coef->pub.consume_data = consume_data;
-    coef->pub.decompress_data = decompress_data;
-    coef->pub.coef_arrays = coef->whole_image; /* link to virtual arrays */
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-  } else {
-    /* We only need a single-MCU buffer. */
-    JBLOCKARRAY blkp;
-    JBLOCKROW buffer_ptr;
-    int bi;
-
-    coef = (my_coef_ptr) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_coef_controller));
-    buffer_ptr = coef->blk_buffer;
-    if (cinfo->lim_Se == 0)	/* DC only case: want to bypass later */
-      MEMZERO(buffer_ptr, SIZEOF(coef->blk_buffer));
-    blkp = coef->MCU_buffer;
-    bi = D_MAX_BLOCKS_IN_MCU;
-    do {
-      *blkp++ = buffer_ptr++;
-    } while (--bi);
-    coef->pub.consume_data = dummy_consume_data;
-    coef->pub.decompress_data = decompress_onepass;
-    coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
-  }
-
-  coef->pub.start_input_pass = start_input_pass;
-  coef->pub.start_output_pass = start_output_pass;
-#ifdef BLOCK_SMOOTHING_SUPPORTED
-  coef->coef_bits_latch = NULL;
-#endif
-  cinfo->coef = &coef->pub;
-}
diff --git a/cmake/externals/libjpeg/jdcolor.c b/cmake/externals/libjpeg/jdcolor.c
deleted file mode 100644
index 6b40fb534..000000000
--- a/cmake/externals/libjpeg/jdcolor.c
+++ /dev/null
@@ -1,769 +0,0 @@
-/*
- * jdcolor.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2011-2023 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains output colorspace conversion routines.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-#if RANGE_BITS < 2
-  /* Deliberate syntax err */
-  Sorry, this code requires 2 or more range extension bits.
-#endif
-
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_color_deconverter pub; /* public fields */
-
-  /* Private state for YCbCr->RGB and BG_YCC->RGB conversion */
-  int * Cr_r_tab;		/* => table for Cr to R conversion */
-  int * Cb_b_tab;		/* => table for Cb to B conversion */
-  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
-  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
-
-  /* Private state for RGB->Y conversion */
-  INT32 * R_y_tab;		/* => table for R to Y conversion */
-  INT32 * G_y_tab;		/* => table for G to Y conversion */
-  INT32 * B_y_tab;		/* => table for B to Y conversion */
-} my_color_deconverter;
-
-typedef my_color_deconverter * my_cconvert_ptr;
-
-
-/***************  YCbCr -> RGB conversion: most common case **************/
-/*************** BG_YCC -> RGB conversion: less common case **************/
-/***************    RGB -> Y   conversion: less common case **************/
-
-/*
- * YCbCr is defined per Recommendation ITU-R BT.601-7 (03/2011),
- * previously known as Recommendation CCIR 601-1, except that Cb and Cr
- * are normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
- * sRGB (standard RGB color space) is defined per IEC 61966-2-1:1999.
- * sYCC (standard luma-chroma-chroma color space with extended gamut)
- * is defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex F.
- * bg-sRGB and bg-sYCC (big gamut standard color spaces)
- * are defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex G.
- * Note that the derived conversion coefficients given in some of these
- * documents are imprecise.  The general conversion equations are
- *
- *	R = Y + K * (1 - Kr) * Cr
- *	G = Y - K * (Kb * (1 - Kb) * Cb + Kr * (1 - Kr) * Cr) / (1 - Kr - Kb)
- *	B = Y + K * (1 - Kb) * Cb
- *
- *	Y = Kr * R + (1 - Kr - Kb) * G + Kb * B
- *
- * With Kr = 0.299 and Kb = 0.114 (derived according to SMPTE RP 177-1993
- * from the 1953 FCC NTSC primaries and CIE Illuminant C), K = 2 for sYCC,
- * the conversion equations to be implemented are therefore
- *
- *	R = Y + 1.402 * Cr
- *	G = Y - 0.344136286 * Cb - 0.714136286 * Cr
- *	B = Y + 1.772 * Cb
- *
- *	Y = 0.299 * R + 0.587 * G + 0.114 * B
- *
- * where Cb and Cr represent the incoming values less CENTERJSAMPLE.
- * For bg-sYCC, with K = 4, the equations are
- *
- *	R = Y + 2.804 * Cr
- *	G = Y - 0.688272572 * Cb - 1.428272572 * Cr
- *	B = Y + 3.544 * Cb
- *
- * To avoid floating-point arithmetic, we represent the fractional constants
- * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
- * the products by 2^16, with appropriate rounding, to get the correct answer.
- * Notice that Y, being an integral input, does not contribute any fraction
- * so it need not participate in the rounding.
- *
- * For even more speed, we avoid doing any multiplications in the inner loop
- * by precalculating the constants times Cb and Cr for all possible values.
- * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
- * for 9-bit to 12-bit samples it is still acceptable.  It's not very
- * reasonable for 16-bit samples, but if you want lossless storage
- * you shouldn't be changing colorspace anyway.
- * The Cr=>R and Cb=>B values can be rounded to integers in advance;
- * the values for the G calculation are left scaled up,
- * since we must add them together before rounding.
- */
-
-#define SCALEBITS	16	/* speediest right-shift on some machines */
-#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
-
-
-/*
- * Initialize tables for YCbCr->RGB and BG_YCC->RGB colorspace conversion.
- */
-
-LOCAL(void)
-build_ycc_rgb_table (j_decompress_ptr cinfo)
-/* Normal case, sYCC */
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  int i;
-  INT32 x;
-  SHIFT_TEMPS
-
-  cconvert->Cr_r_tab = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
-  cconvert->Cb_b_tab = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
-  cconvert->Cr_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-  cconvert->Cb_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-
-  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
-    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
-    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
-    /* Cr=>R value is nearest int to 1.402 * x */
-    cconvert->Cr_r_tab[i] = (int) DESCALE(FIX(1.402) * x, SCALEBITS);
-    /* Cb=>B value is nearest int to 1.772 * x */
-    cconvert->Cb_b_tab[i] = (int) DESCALE(FIX(1.772) * x, SCALEBITS);
-    /* Cr=>G value is scaled-up -0.714136286 * x */
-    cconvert->Cr_g_tab[i] = (- FIX(0.714136286)) * x;
-    /* Cb=>G value is scaled-up -0.344136286 * x */
-    /* We also add in ONE_HALF so that need not do it in inner loop */
-    cconvert->Cb_g_tab[i] = (- FIX(0.344136286)) * x + ONE_HALF;
-  }
-}
-
-
-LOCAL(void)
-build_bg_ycc_rgb_table (j_decompress_ptr cinfo)
-/* Wide gamut case, bg-sYCC */
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  int i;
-  INT32 x;
-  SHIFT_TEMPS
-
-  cconvert->Cr_r_tab = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
-  cconvert->Cb_b_tab = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
-  cconvert->Cr_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-  cconvert->Cb_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-
-  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
-    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
-    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
-    /* Cr=>R value is nearest int to 2.804 * x */
-    cconvert->Cr_r_tab[i] = (int) DESCALE(FIX(2.804) * x, SCALEBITS);
-    /* Cb=>B value is nearest int to 3.544 * x */
-    cconvert->Cb_b_tab[i] = (int) DESCALE(FIX(3.544) * x, SCALEBITS);
-    /* Cr=>G value is scaled-up -1.428272572 * x */
-    cconvert->Cr_g_tab[i] = (- FIX(1.428272572)) * x;
-    /* Cb=>G value is scaled-up -0.688272572 * x */
-    /* We also add in ONE_HALF so that need not do it in inner loop */
-    cconvert->Cb_g_tab[i] = (- FIX(0.688272572)) * x + ONE_HALF;
-  }
-}
-
-
-/*
- * Convert some rows of samples to the output colorspace.
- *
- * Note that we change from noninterleaved, one-plane-per-component format
- * to interleaved-pixel format.  The output buffer is therefore three times
- * as wide as the input buffer.
- *
- * A starting row offset is provided only for the input buffer.  The caller
- * can easily adjust the passed output_buf value to accommodate any row
- * offset required on that side.
- */
-
-METHODDEF(void)
-ycc_rgb_convert (j_decompress_ptr cinfo,
-		 JSAMPIMAGE input_buf, JDIMENSION input_row,
-		 JSAMPARRAY output_buf, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int y, cb, cr;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  register int * Crrtab = cconvert->Cr_r_tab;
-  register int * Cbbtab = cconvert->Cb_b_tab;
-  register INT32 * Crgtab = cconvert->Cr_g_tab;
-  register INT32 * Cbgtab = cconvert->Cb_g_tab;
-  SHIFT_TEMPS
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      y  = GETJSAMPLE(inptr0[col]);
-      cb = GETJSAMPLE(inptr1[col]);
-      cr = GETJSAMPLE(inptr2[col]);
-      /* Range-limiting is essential due to noise introduced by DCT losses,
-       * for extended gamut (sYCC) and wide gamut (bg-sYCC) encodings.
-       */
-      outptr[RGB_RED]   = range_limit[y + Crrtab[cr]];
-      outptr[RGB_GREEN] = range_limit[y +
-			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
-						 SCALEBITS))];
-      outptr[RGB_BLUE]  = range_limit[y + Cbbtab[cb]];
-      outptr += RGB_PIXELSIZE;
-    }
-  }
-}
-
-
-/**************** Cases other than YCC -> RGB ****************/
-
-
-/*
- * Initialize for RGB->grayscale colorspace conversion.
- */
-
-LOCAL(void)
-build_rgb_y_table (j_decompress_ptr cinfo)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  INT32 i;
-
-  cconvert->R_y_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-  cconvert->G_y_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-  cconvert->B_y_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-
-  for (i = 0; i <= MAXJSAMPLE; i++) {
-    cconvert->R_y_tab[i] = FIX(0.299) * i;
-    cconvert->G_y_tab[i] = FIX(0.587) * i;
-    cconvert->B_y_tab[i] = FIX(0.114) * i + ONE_HALF;
-  }
-}
-
-
-/*
- * Convert RGB to grayscale.
- */
-
-METHODDEF(void)
-rgb_gray_convert (j_decompress_ptr cinfo,
-		  JSAMPIMAGE input_buf, JDIMENSION input_row,
-		  JSAMPARRAY output_buf, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register INT32 y;
-  register INT32 * Rytab = cconvert->R_y_tab;
-  register INT32 * Gytab = cconvert->G_y_tab;
-  register INT32 * Bytab = cconvert->B_y_tab;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      y  = Rytab[GETJSAMPLE(inptr0[col])];
-      y += Gytab[GETJSAMPLE(inptr1[col])];
-      y += Bytab[GETJSAMPLE(inptr2[col])];
-      outptr[col] = (JSAMPLE) (y >> SCALEBITS);
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the output colorspace.
- * [R-G,G,B-G] to [R,G,B] conversion with modulo calculation
- * (inverse color transform).
- * This can be seen as an adaption of the general YCbCr->RGB
- * conversion equation with Kr = Kb = 0, while replacing the
- * normalization by modulo calculation.
- */
-
-METHODDEF(void)
-rgb1_rgb_convert (j_decompress_ptr cinfo,
-		  JSAMPIMAGE input_buf, JDIMENSION input_row,
-		  JSAMPARRAY output_buf, int num_rows)
-{
-  register int r, g, b;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr0[col]);
-      g = GETJSAMPLE(inptr1[col]);
-      b = GETJSAMPLE(inptr2[col]);
-      /* Assume that MAXJSAMPLE+1 is a power of 2, so that the MOD
-       * (modulo) operator is equivalent to the bitmask operator AND.
-       */
-      outptr[RGB_RED]   = (JSAMPLE) ((r + g - CENTERJSAMPLE) & MAXJSAMPLE);
-      outptr[RGB_GREEN] = (JSAMPLE) g;
-      outptr[RGB_BLUE]  = (JSAMPLE) ((b + g - CENTERJSAMPLE) & MAXJSAMPLE);
-      outptr += RGB_PIXELSIZE;
-    }
-  }
-}
-
-
-/*
- * [R-G,G,B-G] to grayscale conversion with modulo calculation
- * (inverse color transform).
- */
-
-METHODDEF(void)
-rgb1_gray_convert (j_decompress_ptr cinfo,
-		   JSAMPIMAGE input_buf, JDIMENSION input_row,
-		   JSAMPARRAY output_buf, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int r, g, b;
-  register INT32 y;
-  register INT32 * Rytab = cconvert->R_y_tab;
-  register INT32 * Gytab = cconvert->G_y_tab;
-  register INT32 * Bytab = cconvert->B_y_tab;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr0[col]);
-      g = GETJSAMPLE(inptr1[col]);
-      b = GETJSAMPLE(inptr2[col]);
-      /* Assume that MAXJSAMPLE+1 is a power of 2, so that the MOD
-       * (modulo) operator is equivalent to the bitmask operator AND.
-       */
-      y  = Rytab[(r + g - CENTERJSAMPLE) & MAXJSAMPLE];
-      y += Gytab[g];
-      y += Bytab[(b + g - CENTERJSAMPLE) & MAXJSAMPLE];
-      outptr[col] = (JSAMPLE) (y >> SCALEBITS);
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the output colorspace.
- * No colorspace change, but conversion from separate-planes
- * to interleaved representation.
- */
-
-METHODDEF(void)
-rgb_convert (j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION input_row,
-	     JSAMPARRAY output_buf, int num_rows)
-{
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      /* We can dispense with GETJSAMPLE() here */
-      outptr[RGB_RED]   = inptr0[col];
-      outptr[RGB_GREEN] = inptr1[col];
-      outptr[RGB_BLUE]  = inptr2[col];
-      outptr += RGB_PIXELSIZE;
-    }
-  }
-}
-
-
-/*
- * Color conversion for no colorspace change: just copy the data,
- * converting from separate-planes to interleaved representation.
- * Note: Omit uninteresting components in output buffer.
- */
-
-METHODDEF(void)
-null_convert (j_decompress_ptr cinfo,
-	      JSAMPIMAGE input_buf, JDIMENSION input_row,
-	      JSAMPARRAY output_buf, int num_rows)
-{
-  register JSAMPROW outptr;
-  register JSAMPROW inptr;
-  register JDIMENSION count;
-  register int out_comps = cinfo->out_color_components;
-  JDIMENSION num_cols = cinfo->output_width;
-  JSAMPROW startptr;
-  int ci;
-  jpeg_component_info *compptr;
-
-  while (--num_rows >= 0) {
-    /* It seems fastest to make a separate pass for each component. */
-    startptr = *output_buf++;
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      if (! compptr->component_needed)
-	continue;		/* skip uninteresting component */
-      inptr = input_buf[ci][input_row];
-      outptr = startptr++;
-      for (count = num_cols; count > 0; count--) {
-	*outptr = *inptr++;	/* don't need GETJSAMPLE() here */
-	outptr += out_comps;
-      }
-    }
-    input_row++;
-  }
-}
-
-
-/*
- * Color conversion for grayscale: just copy the data.
- * This also works for YCC -> grayscale conversion, in which
- * we just copy the Y (luminance) component and ignore chrominance.
- */
-
-METHODDEF(void)
-grayscale_convert (j_decompress_ptr cinfo,
-		   JSAMPIMAGE input_buf, JDIMENSION input_row,
-		   JSAMPARRAY output_buf, int num_rows)
-{
-  jcopy_sample_rows(input_buf[0] + input_row, output_buf,
-		    num_rows, cinfo->output_width);
-}
-
-
-/*
- * Convert grayscale to RGB: just duplicate the graylevel three times.
- * This is provided to support applications that don't want to cope
- * with grayscale as a separate case.
- */
-
-METHODDEF(void)
-gray_rgb_convert (j_decompress_ptr cinfo,
-		  JSAMPIMAGE input_buf, JDIMENSION input_row,
-		  JSAMPARRAY output_buf, int num_rows)
-{
-  register JSAMPROW outptr;
-  register JSAMPROW inptr;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-
-  while (--num_rows >= 0) {
-    inptr = input_buf[0][input_row++];
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      /* We can dispense with GETJSAMPLE() here */
-      outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
-      outptr += RGB_PIXELSIZE;
-    }
-  }
-}
-
-
-/*
- * Convert some rows of samples to the output colorspace.
- * This version handles Adobe-style YCCK->CMYK conversion,
- * where we convert YCbCr to R=1-C, G=1-M, and B=1-Y using the
- * same conversion as above, while passing K (black) unchanged.
- * We assume build_ycc_rgb_table has been called.
- */
-
-METHODDEF(void)
-ycck_cmyk_convert (j_decompress_ptr cinfo,
-		   JSAMPIMAGE input_buf, JDIMENSION input_row,
-		   JSAMPARRAY output_buf, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int y, cb, cr;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2, inptr3;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  register int * Crrtab = cconvert->Cr_r_tab;
-  register int * Cbbtab = cconvert->Cb_b_tab;
-  register INT32 * Crgtab = cconvert->Cr_g_tab;
-  register INT32 * Cbgtab = cconvert->Cb_g_tab;
-  SHIFT_TEMPS
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    inptr3 = input_buf[3][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      y  = GETJSAMPLE(inptr0[col]);
-      cb = GETJSAMPLE(inptr1[col]);
-      cr = GETJSAMPLE(inptr2[col]);
-      /* Range-limiting is essential due to noise introduced by DCT losses,
-       * and for extended gamut encodings (sYCC).
-       */
-      outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];	/* red */
-      outptr[1] = range_limit[MAXJSAMPLE - (y +			/* green */
-			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
-						 SCALEBITS)))];
-      outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])];	/* blue */
-      /* K passes through unchanged */
-      outptr[3] = inptr3[col];	/* don't need GETJSAMPLE here */
-      outptr += 4;
-    }
-  }
-}
-
-
-/*
- * Convert CMYK to YK part of YCCK for colorless output.
- * We assume build_rgb_y_table has been called.
- */
-
-METHODDEF(void)
-cmyk_yk_convert (j_decompress_ptr cinfo,
-		 JSAMPIMAGE input_buf, JDIMENSION input_row,
-		 JSAMPARRAY output_buf, int num_rows)
-{
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register INT32 y;
-  register INT32 * Rytab = cconvert->R_y_tab;
-  register INT32 * Gytab = cconvert->G_y_tab;
-  register INT32 * Bytab = cconvert->B_y_tab;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2, inptr3;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    inptr3 = input_buf[3][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      y  = Rytab[MAXJSAMPLE - GETJSAMPLE(inptr0[col])];
-      y += Gytab[MAXJSAMPLE - GETJSAMPLE(inptr1[col])];
-      y += Bytab[MAXJSAMPLE - GETJSAMPLE(inptr2[col])];
-      outptr[0] = (JSAMPLE) (y >> SCALEBITS);
-      /* K passes through unchanged */
-      outptr[1] = inptr3[col];	/* don't need GETJSAMPLE here */
-      outptr += 2;
-    }
-  }
-}
-
-
-/*
- * Empty method for start_pass.
- */
-
-METHODDEF(void)
-start_pass_dcolor (j_decompress_ptr cinfo)
-{
-  /* no work needed */
-}
-
-
-/*
- * Module initialization routine for output colorspace conversion.
- */
-
-GLOBAL(void)
-jinit_color_deconverter (j_decompress_ptr cinfo)
-{
-  my_cconvert_ptr cconvert;
-  int ci, i;
-
-  cconvert = (my_cconvert_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_color_deconverter));
-  cinfo->cconvert = &cconvert->pub;
-  cconvert->pub.start_pass = start_pass_dcolor;
-
-  /* Make sure num_components agrees with jpeg_color_space */
-  switch (cinfo->jpeg_color_space) {
-  case JCS_GRAYSCALE:
-    if (cinfo->num_components != 1)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    break;
-
-  case JCS_RGB:
-  case JCS_YCbCr:
-  case JCS_BG_RGB:
-  case JCS_BG_YCC:
-    if (cinfo->num_components != 3)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    break;
-
-  case JCS_CMYK:
-  case JCS_YCCK:
-    if (cinfo->num_components != 4)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    break;
-
-  default:			/* JCS_UNKNOWN can be anything */
-    if (cinfo->num_components < 1)
-      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-  }
-
-  /* Support color transform only for RGB colorspaces */
-  if (cinfo->color_transform &&
-      cinfo->jpeg_color_space != JCS_RGB &&
-      cinfo->jpeg_color_space != JCS_BG_RGB)
-    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-
-  /* Set out_color_components and conversion method based on requested space.
-   * Also adjust the component_needed flags for any unused components,
-   * so that earlier pipeline stages can avoid useless computation.
-   */
-
-  switch (cinfo->out_color_space) {
-  case JCS_GRAYSCALE:
-    cinfo->out_color_components = 1;
-    switch (cinfo->jpeg_color_space) {
-    case JCS_GRAYSCALE:
-    case JCS_YCbCr:
-    case JCS_BG_YCC:
-      cconvert->pub.color_convert = grayscale_convert;
-      /* For color->grayscale conversion, only the Y (0) component is needed */
-      for (ci = 1; ci < cinfo->num_components; ci++)
-	cinfo->comp_info[ci].component_needed = FALSE;
-      break;
-    case JCS_RGB:
-      switch (cinfo->color_transform) {
-      case JCT_NONE:
-	cconvert->pub.color_convert = rgb_gray_convert;
-	break;
-      case JCT_SUBTRACT_GREEN:
-	cconvert->pub.color_convert = rgb1_gray_convert;
-	break;
-      default:
-	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-      }
-      build_rgb_y_table(cinfo);
-      break;
-    default:
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    }
-    break;
-
-  case JCS_RGB:
-    cinfo->out_color_components = RGB_PIXELSIZE;
-    switch (cinfo->jpeg_color_space) {
-    case JCS_GRAYSCALE:
-      cconvert->pub.color_convert = gray_rgb_convert;
-      break;
-    case JCS_YCbCr:
-      cconvert->pub.color_convert = ycc_rgb_convert;
-      build_ycc_rgb_table(cinfo);
-      break;
-    case JCS_BG_YCC:
-      cconvert->pub.color_convert = ycc_rgb_convert;
-      build_bg_ycc_rgb_table(cinfo);
-      break;
-    case JCS_RGB:
-      switch (cinfo->color_transform) {
-      case JCT_NONE:
-	cconvert->pub.color_convert = rgb_convert;
-	break;
-      case JCT_SUBTRACT_GREEN:
-	cconvert->pub.color_convert = rgb1_rgb_convert;
-	break;
-      default:
-	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-      }
-      break;
-    default:
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    }
-    break;
-
-  case JCS_BG_RGB:
-    if (cinfo->jpeg_color_space != JCS_BG_RGB)
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    cinfo->out_color_components = RGB_PIXELSIZE;
-    switch (cinfo->color_transform) {
-    case JCT_NONE:
-      cconvert->pub.color_convert = rgb_convert;
-      break;
-    case JCT_SUBTRACT_GREEN:
-      cconvert->pub.color_convert = rgb1_rgb_convert;
-      break;
-    default:
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    }
-    break;
-
-  case JCS_CMYK:
-    if (cinfo->jpeg_color_space != JCS_YCCK)
-      goto def_label;
-    cinfo->out_color_components = 4;
-    cconvert->pub.color_convert = ycck_cmyk_convert;
-    build_ycc_rgb_table(cinfo);
-    break;
-
-  case JCS_YCCK:
-    if (cinfo->jpeg_color_space != JCS_CMYK ||
-	/* Support only YK part of YCCK for colorless output */
-	! cinfo->comp_info[0].component_needed ||
-	  cinfo->comp_info[1].component_needed ||
-	  cinfo->comp_info[2].component_needed ||
-	! cinfo->comp_info[3].component_needed)
-      goto def_label;
-    cinfo->out_color_components = 2;
-    /* Need all components on input side */
-    cinfo->comp_info[1].component_needed = TRUE;
-    cinfo->comp_info[2].component_needed = TRUE;
-    cconvert->pub.color_convert = cmyk_yk_convert;
-    build_rgb_y_table(cinfo);
-    break;
-
-  default: def_label:	/* permit null conversion to same output space */
-    if (cinfo->out_color_space != cinfo->jpeg_color_space)
-      /* unsupported non-null conversion */
-      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    i = 0;
-    for (ci = 0; ci < cinfo->num_components; ci++)
-      if (cinfo->comp_info[ci].component_needed)
-	i++;		/* count output color components */
-    cinfo->out_color_components = i;
-    cconvert->pub.color_convert = null_convert;
-  }
-
-  if (cinfo->quantize_colors)
-    cinfo->output_components = 1; /* single colormapped output component */
-  else
-    cinfo->output_components = cinfo->out_color_components;
-}
diff --git a/cmake/externals/libjpeg/jdct.h b/cmake/externals/libjpeg/jdct.h
deleted file mode 100644
index 0f251590c..000000000
--- a/cmake/externals/libjpeg/jdct.h
+++ /dev/null
@@ -1,409 +0,0 @@
-/*
- * jdct.h
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2002-2023 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This include file contains common declarations for the forward and
- * inverse DCT modules.  These declarations are private to the DCT managers
- * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms.
- * The individual DCT algorithms are kept in separate files to ease 
- * machine-dependent tuning (e.g., assembly coding).
- */
-
-
-/*
- * A forward DCT routine is given a pointer to an input sample array and
- * a pointer to a work area of type DCTELEM[]; the DCT is to be performed
- * in-place in that buffer.  Type DCTELEM is int for 8-bit samples, INT32
- * for 12-bit samples.  (NOTE: Floating-point DCT implementations use an
- * array of type FAST_FLOAT, instead.)
- * The input data is to be fetched from the sample array starting at a
- * specified column.  (Any row offset needed will be applied to the array
- * pointer before it is passed to the FDCT code.)
- * Note that the number of samples fetched by the FDCT routine is
- * DCT_h_scaled_size * DCT_v_scaled_size.
- * The DCT outputs are returned scaled up by a factor of 8; they therefore
- * have a range of +-8K for 8-bit data, +-128K for 12-bit data.  This
- * convention improves accuracy in integer implementations and saves some
- * work in floating-point ones.
- * Quantization of the output coefficients is done by jcdctmgr.c.
- */
-
-#if BITS_IN_JSAMPLE == 8
-typedef int DCTELEM;		/* 16 or 32 bits is fine */
-#else
-typedef INT32 DCTELEM;		/* must have 32 bits */
-#endif
-
-typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data,
-					       JSAMPARRAY sample_data,
-					       JDIMENSION start_col));
-typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data,
-					     JSAMPARRAY sample_data,
-					     JDIMENSION start_col));
-
-
-/*
- * An inverse DCT routine is given a pointer to the input JBLOCK and a pointer
- * to an output sample array.  The routine must dequantize the input data as
- * well as perform the IDCT; for dequantization, it uses the multiplier table
- * pointed to by compptr->dct_table.  The output data is to be placed into the
- * sample array starting at a specified column.  (Any row offset needed will
- * be applied to the array pointer before it is passed to the IDCT code.)
- * Note that the number of samples emitted by the IDCT routine is
- * DCT_h_scaled_size * DCT_v_scaled_size.
- */
-
-/* typedef inverse_DCT_method_ptr is declared in jpegint.h */
-
-/*
- * Each IDCT routine has its own ideas about the best dct_table element type.
- */
-
-typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */
-#if BITS_IN_JSAMPLE == 8
-typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */
-#define IFAST_SCALE_BITS  2	/* fractional bits in scale factors */
-#else
-typedef INT32 IFAST_MULT_TYPE;	/* need 32 bits for scaled quantizers */
-#define IFAST_SCALE_BITS  13	/* fractional bits in scale factors */
-#endif
-typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
-
-
-/*
- * Each IDCT routine is responsible for range-limiting its results and
- * converting them to unsigned form (0..MAXJSAMPLE).  The raw outputs could
- * be quite far out of range if the input data is corrupt, so a bulletproof
- * range-limiting step is required.  We use a mask-and-table-lookup method
- * to do the combined operations quickly, assuming that RANGE_CENTER
- * (defined in jpegint.h) is a power of 2.  See the comments with
- * prepare_range_limit_table (in jdmaster.c) for more info.
- */
-
-#define RANGE_MASK  (RANGE_CENTER * 2 - 1)
-#define RANGE_SUBSET  (RANGE_CENTER - CENTERJSAMPLE)
-
-#define IDCT_range_limit(cinfo)  ((cinfo)->sample_range_limit - RANGE_SUBSET)
-
-
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_fdct_islow		jFDislow
-#define jpeg_fdct_ifast		jFDifast
-#define jpeg_fdct_float		jFDfloat
-#define jpeg_fdct_7x7		jFD7x7
-#define jpeg_fdct_6x6		jFD6x6
-#define jpeg_fdct_5x5		jFD5x5
-#define jpeg_fdct_4x4		jFD4x4
-#define jpeg_fdct_3x3		jFD3x3
-#define jpeg_fdct_2x2		jFD2x2
-#define jpeg_fdct_1x1		jFD1x1
-#define jpeg_fdct_9x9		jFD9x9
-#define jpeg_fdct_10x10		jFD10x10
-#define jpeg_fdct_11x11		jFD11x11
-#define jpeg_fdct_12x12		jFD12x12
-#define jpeg_fdct_13x13		jFD13x13
-#define jpeg_fdct_14x14		jFD14x14
-#define jpeg_fdct_15x15		jFD15x15
-#define jpeg_fdct_16x16		jFD16x16
-#define jpeg_fdct_16x8		jFD16x8
-#define jpeg_fdct_14x7		jFD14x7
-#define jpeg_fdct_12x6		jFD12x6
-#define jpeg_fdct_10x5		jFD10x5
-#define jpeg_fdct_8x4		jFD8x4
-#define jpeg_fdct_6x3		jFD6x3
-#define jpeg_fdct_4x2		jFD4x2
-#define jpeg_fdct_2x1		jFD2x1
-#define jpeg_fdct_8x16		jFD8x16
-#define jpeg_fdct_7x14		jFD7x14
-#define jpeg_fdct_6x12		jFD6x12
-#define jpeg_fdct_5x10		jFD5x10
-#define jpeg_fdct_4x8		jFD4x8
-#define jpeg_fdct_3x6		jFD3x6
-#define jpeg_fdct_2x4		jFD2x4
-#define jpeg_fdct_1x2		jFD1x2
-#define jpeg_idct_islow		jRDislow
-#define jpeg_idct_ifast		jRDifast
-#define jpeg_idct_float		jRDfloat
-#define jpeg_idct_7x7		jRD7x7
-#define jpeg_idct_6x6		jRD6x6
-#define jpeg_idct_5x5		jRD5x5
-#define jpeg_idct_4x4		jRD4x4
-#define jpeg_idct_3x3		jRD3x3
-#define jpeg_idct_2x2		jRD2x2
-#define jpeg_idct_1x1		jRD1x1
-#define jpeg_idct_9x9		jRD9x9
-#define jpeg_idct_10x10		jRD10x10
-#define jpeg_idct_11x11		jRD11x11
-#define jpeg_idct_12x12		jRD12x12
-#define jpeg_idct_13x13		jRD13x13
-#define jpeg_idct_14x14		jRD14x14
-#define jpeg_idct_15x15		jRD15x15
-#define jpeg_idct_16x16		jRD16x16
-#define jpeg_idct_16x8		jRD16x8
-#define jpeg_idct_14x7		jRD14x7
-#define jpeg_idct_12x6		jRD12x6
-#define jpeg_idct_10x5		jRD10x5
-#define jpeg_idct_8x4		jRD8x4
-#define jpeg_idct_6x3		jRD6x3
-#define jpeg_idct_4x2		jRD4x2
-#define jpeg_idct_2x1		jRD2x1
-#define jpeg_idct_8x16		jRD8x16
-#define jpeg_idct_7x14		jRD7x14
-#define jpeg_idct_6x12		jRD6x12
-#define jpeg_idct_5x10		jRD5x10
-#define jpeg_idct_4x8		jRD4x8
-#define jpeg_idct_3x6		jRD3x6
-#define jpeg_idct_2x4		jRD2x4
-#define jpeg_idct_1x2		jRD1x2
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-/* Extern declarations for the forward and inverse DCT routines. */
-
-EXTERN(void) jpeg_fdct_islow
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_ifast
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_float
-    JPP((FAST_FLOAT * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_7x7
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_6x6
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_5x5
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_4x4
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_3x3
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_2x2
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_1x1
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_9x9
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_10x10
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_11x11
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_12x12
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_13x13
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_14x14
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_15x15
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_16x16
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_16x8
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_14x7
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_12x6
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_10x5
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_8x4
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_6x3
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_4x2
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_2x1
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_8x16
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_7x14
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_6x12
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_5x10
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_4x8
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_3x6
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_2x4
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-EXTERN(void) jpeg_fdct_1x2
-    JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col));
-
-EXTERN(void) jpeg_idct_islow
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_ifast
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_float
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_7x7
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_6x6
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_5x5
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_4x4
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_3x3
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_2x2
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_1x1
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_9x9
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_10x10
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_11x11
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_12x12
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_13x13
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_14x14
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_15x15
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_16x16
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_16x8
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_14x7
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_12x6
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_10x5
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_8x4
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_6x3
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_4x2
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_2x1
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_8x16
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_7x14
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_6x12
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_5x10
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_4x8
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_3x6
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_2x4
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-EXTERN(void) jpeg_idct_1x2
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
-
-
-/*
- * Macros for handling fixed-point arithmetic; these are used by many
- * but not all of the DCT/IDCT modules.
- *
- * All values are expected to be of type INT32.
- * Fractional constants are scaled left by CONST_BITS bits.
- * CONST_BITS is defined within each module using these macros,
- * and may differ from one module to the next.
- */
-
-#define ONE	((INT32) 1)
-#define CONST_SCALE (ONE << CONST_BITS)
-
-/* Convert a positive real constant to an integer scaled by CONST_SCALE.
- * Caution: some C compilers fail to reduce "FIX(constant)" at compile time,
- * thus causing a lot of useless floating-point operations at run time.
- */
-
-#define FIX(x)	((INT32) ((x) * CONST_SCALE + 0.5))
-
-/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
- * This macro is used only when the two inputs will actually be no more than
- * 16 bits wide, so that a 16x16->32 bit multiply can be used instead of a
- * full 32x32 multiply.  This provides a useful speedup on many machines.
- * Unfortunately there is no way to specify a 16x16->32 multiply portably
- * in C, but some C compilers will do the right thing if you provide the
- * correct combination of casts.
- */
-
-#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
-#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT16) (const)))
-#endif
-#ifdef SHORTxLCONST_32		/* known to work with Microsoft C 6.0 */
-#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT32) (const)))
-#endif
-
-#ifndef MULTIPLY16C16		/* default definition */
-#define MULTIPLY16C16(var,const)  ((var) * (const))
-#endif
-
-/* Same except both inputs are variables. */
-
-#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
-#define MULTIPLY16V16(var1,var2)  (((INT16) (var1)) * ((INT16) (var2)))
-#endif
-
-#ifndef MULTIPLY16V16		/* default definition */
-#define MULTIPLY16V16(var1,var2)  ((var1) * (var2))
-#endif
-
-/* Like RIGHT_SHIFT, but applies to a DCTELEM.
- * We assume that int right shift is unsigned if INT32 right shift is.
- */
-
-#ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define ISHIFT_TEMPS	DCTELEM ishift_temp;
-#if BITS_IN_JSAMPLE == 8
-#define DCTELEMBITS  16		/* DCTELEM may be 16 or 32 bits */
-#else
-#define DCTELEMBITS  32		/* DCTELEM must be 32 bits */
-#endif
-#define IRIGHT_SHIFT(x,shft)  \
-    ((ishift_temp = (x)) < 0 ? \
-     (ishift_temp >> (shft)) | ((~((DCTELEM) 0)) << (DCTELEMBITS-(shft))) : \
-     (ishift_temp >> (shft)))
-#else
-#define ISHIFT_TEMPS
-#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
-#endif
diff --git a/cmake/externals/libjpeg/jddctmgr.c b/cmake/externals/libjpeg/jddctmgr.c
deleted file mode 100644
index 9ecfbb510..000000000
--- a/cmake/externals/libjpeg/jddctmgr.c
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * jddctmgr.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2002-2013 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the inverse-DCT management logic.
- * This code selects a particular IDCT implementation to be used,
- * and it performs related housekeeping chores.  No code in this file
- * is executed per IDCT step, only during output pass setup.
- *
- * Note that the IDCT routines are responsible for performing coefficient
- * dequantization as well as the IDCT proper.  This module sets up the
- * dequantization multiplier table needed by the IDCT routine.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-
-/*
- * The decompressor input side (jdinput.c) saves away the appropriate
- * quantization table for each component at the start of the first scan
- * involving that component.  (This is necessary in order to correctly
- * decode files that reuse Q-table slots.)
- * When we are ready to make an output pass, the saved Q-table is converted
- * to a multiplier table that will actually be used by the IDCT routine.
- * The multiplier table contents are IDCT-method-dependent.  To support
- * application changes in IDCT method between scans, we can remake the
- * multiplier tables if necessary.
- * In buffered-image mode, the first output pass may occur before any data
- * has been seen for some components, and thus before their Q-tables have
- * been saved away.  To handle this case, multiplier tables are preset
- * to zeroes; the result of the IDCT will be a neutral gray level.
- */
-
-
-/* Private subobject for this module */
-
-typedef struct {
-  struct jpeg_inverse_dct pub;	/* public fields */
-
-  /* This array contains the IDCT method code that each multiplier table
-   * is currently set up for, or -1 if it's not yet set up.
-   * The actual multiplier tables are pointed to by dct_table in the
-   * per-component comp_info structures.
-   */
-  int cur_method[MAX_COMPONENTS];
-} my_idct_controller;
-
-typedef my_idct_controller * my_idct_ptr;
-
-
-/* Allocated multiplier tables: big enough for any supported variant */
-
-typedef union {
-  ISLOW_MULT_TYPE islow_array[DCTSIZE2];
-#ifdef DCT_IFAST_SUPPORTED
-  IFAST_MULT_TYPE ifast_array[DCTSIZE2];
-#endif
-#ifdef DCT_FLOAT_SUPPORTED
-  FLOAT_MULT_TYPE float_array[DCTSIZE2];
-#endif
-} multiplier_table;
-
-
-/* The current scaled-IDCT routines require ISLOW-style multiplier tables,
- * so be sure to compile that code if either ISLOW or SCALING is requested.
- */
-#ifdef DCT_ISLOW_SUPPORTED
-#define PROVIDE_ISLOW_TABLES
-#else
-#ifdef IDCT_SCALING_SUPPORTED
-#define PROVIDE_ISLOW_TABLES
-#endif
-#endif
-
-
-/*
- * Prepare for an output pass.
- * Here we select the proper IDCT routine for each component and build
- * a matching multiplier table.
- */
-
-METHODDEF(void)
-start_pass (j_decompress_ptr cinfo)
-{
-  my_idct_ptr idct = (my_idct_ptr) cinfo->idct;
-  int ci, i;
-  jpeg_component_info *compptr;
-  int method = 0;
-  inverse_DCT_method_ptr method_ptr = NULL;
-  JQUANT_TBL * qtbl;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Select the proper IDCT routine for this component's scaling */
-    switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
-#ifdef IDCT_SCALING_SUPPORTED
-    case ((1 << 8) + 1):
-      method_ptr = jpeg_idct_1x1;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((2 << 8) + 2):
-      method_ptr = jpeg_idct_2x2;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((3 << 8) + 3):
-      method_ptr = jpeg_idct_3x3;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((4 << 8) + 4):
-      method_ptr = jpeg_idct_4x4;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((5 << 8) + 5):
-      method_ptr = jpeg_idct_5x5;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((6 << 8) + 6):
-      method_ptr = jpeg_idct_6x6;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((7 << 8) + 7):
-      method_ptr = jpeg_idct_7x7;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((9 << 8) + 9):
-      method_ptr = jpeg_idct_9x9;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((10 << 8) + 10):
-      method_ptr = jpeg_idct_10x10;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((11 << 8) + 11):
-      method_ptr = jpeg_idct_11x11;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((12 << 8) + 12):
-      method_ptr = jpeg_idct_12x12;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((13 << 8) + 13):
-      method_ptr = jpeg_idct_13x13;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((14 << 8) + 14):
-      method_ptr = jpeg_idct_14x14;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((15 << 8) + 15):
-      method_ptr = jpeg_idct_15x15;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((16 << 8) + 16):
-      method_ptr = jpeg_idct_16x16;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((16 << 8) + 8):
-      method_ptr = jpeg_idct_16x8;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((14 << 8) + 7):
-      method_ptr = jpeg_idct_14x7;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((12 << 8) + 6):
-      method_ptr = jpeg_idct_12x6;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((10 << 8) + 5):
-      method_ptr = jpeg_idct_10x5;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((8 << 8) + 4):
-      method_ptr = jpeg_idct_8x4;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((6 << 8) + 3):
-      method_ptr = jpeg_idct_6x3;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((4 << 8) + 2):
-      method_ptr = jpeg_idct_4x2;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((2 << 8) + 1):
-      method_ptr = jpeg_idct_2x1;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((8 << 8) + 16):
-      method_ptr = jpeg_idct_8x16;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((7 << 8) + 14):
-      method_ptr = jpeg_idct_7x14;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((6 << 8) + 12):
-      method_ptr = jpeg_idct_6x12;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((5 << 8) + 10):
-      method_ptr = jpeg_idct_5x10;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((4 << 8) + 8):
-      method_ptr = jpeg_idct_4x8;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((3 << 8) + 6):
-      method_ptr = jpeg_idct_3x6;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((2 << 8) + 4):
-      method_ptr = jpeg_idct_2x4;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-    case ((1 << 8) + 2):
-      method_ptr = jpeg_idct_1x2;
-      method = JDCT_ISLOW;	/* jidctint uses islow-style table */
-      break;
-#endif
-    case ((DCTSIZE << 8) + DCTSIZE):
-      switch (cinfo->dct_method) {
-#ifdef DCT_ISLOW_SUPPORTED
-      case JDCT_ISLOW:
-	method_ptr = jpeg_idct_islow;
-	method = JDCT_ISLOW;
-	break;
-#endif
-#ifdef DCT_IFAST_SUPPORTED
-      case JDCT_IFAST:
-	method_ptr = jpeg_idct_ifast;
-	method = JDCT_IFAST;
-	break;
-#endif
-#ifdef DCT_FLOAT_SUPPORTED
-      case JDCT_FLOAT:
-	method_ptr = jpeg_idct_float;
-	method = JDCT_FLOAT;
-	break;
-#endif
-      default:
-	ERREXIT(cinfo, JERR_NOT_COMPILED);
-	break;
-      }
-      break;
-    default:
-      ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
-	       compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
-      break;
-    }
-    idct->pub.inverse_DCT[ci] = method_ptr;
-    /* Create multiplier table from quant table.
-     * However, we can skip this if the component is uninteresting
-     * or if we already built the table.  Also, if no quant table
-     * has yet been saved for the component, we leave the
-     * multiplier table all-zero; we'll be reading zeroes from the
-     * coefficient controller's buffer anyway.
-     */
-    if (! compptr->component_needed || idct->cur_method[ci] == method)
-      continue;
-    qtbl = compptr->quant_table;
-    if (qtbl == NULL)		/* happens if no data yet for component */
-      continue;
-    idct->cur_method[ci] = method;
-    switch (method) {
-#ifdef PROVIDE_ISLOW_TABLES
-    case JDCT_ISLOW:
-      {
-	/* For LL&M IDCT method, multipliers are equal to raw quantization
-	 * coefficients, but are stored as ints to ensure access efficiency.
-	 */
-	ISLOW_MULT_TYPE * ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table;
-	for (i = 0; i < DCTSIZE2; i++) {
-	  ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i];
-	}
-      }
-      break;
-#endif
-#ifdef DCT_IFAST_SUPPORTED
-    case JDCT_IFAST:
-      {
-	/* For AA&N IDCT method, multipliers are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * For integer operation, the multiplier table is to be scaled by
-	 * IFAST_SCALE_BITS.
-	 */
-	IFAST_MULT_TYPE * ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table;
-#define CONST_BITS 14
-	static const INT16 aanscales[DCTSIZE2] = {
-	  /* precomputed values scaled up by 14 bits */
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
-	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
-	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
-	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
-	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
-	};
-	SHIFT_TEMPS
-
-	for (i = 0; i < DCTSIZE2; i++) {
-	  ifmtbl[i] = (IFAST_MULT_TYPE)
-	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
-				  (INT32) aanscales[i]),
-		    CONST_BITS-IFAST_SCALE_BITS);
-	}
-      }
-      break;
-#endif
-#ifdef DCT_FLOAT_SUPPORTED
-    case JDCT_FLOAT:
-      {
-	/* For float AA&N IDCT method, multipliers are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * We apply a further scale factor of 1/8.
-	 */
-	FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table;
-	int row, col;
-	static const double aanscalefactor[DCTSIZE] = {
-	  1.0, 1.387039845, 1.306562965, 1.175875602,
-	  1.0, 0.785694958, 0.541196100, 0.275899379
-	};
-
-	i = 0;
-	for (row = 0; row < DCTSIZE; row++) {
-	  for (col = 0; col < DCTSIZE; col++) {
-	    fmtbl[i] = (FLOAT_MULT_TYPE)
-	      ((double) qtbl->quantval[i] *
-	       aanscalefactor[row] * aanscalefactor[col] * 0.125);
-	    i++;
-	  }
-	}
-      }
-      break;
-#endif
-    default:
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-      break;
-    }
-  }
-}
-
-
-/*
- * Initialize IDCT manager.
- */
-
-GLOBAL(void)
-jinit_inverse_dct (j_decompress_ptr cinfo)
-{
-  my_idct_ptr idct;
-  int ci;
-  jpeg_component_info *compptr;
-
-  idct = (my_idct_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_idct_controller));
-  cinfo->idct = &idct->pub;
-  idct->pub.start_pass = start_pass;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Allocate and pre-zero a multiplier table for each component */
-    compptr->dct_table =
-      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(multiplier_table));
-    MEMZERO(compptr->dct_table, SIZEOF(multiplier_table));
-    /* Mark multiplier table not yet set up for any method */
-    idct->cur_method[ci] = -1;
-  }
-}
diff --git a/cmake/externals/libjpeg/jdhuff.c b/cmake/externals/libjpeg/jdhuff.c
deleted file mode 100644
index f175f0c32..000000000
--- a/cmake/externals/libjpeg/jdhuff.c
+++ /dev/null
@@ -1,1559 +0,0 @@
-/*
- * jdhuff.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2006-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains Huffman entropy decoding routines.
- * Both sequential and progressive modes are supported in this single module.
- *
- * Much of the complexity here has to do with supporting input suspension.
- * If the data source module demands suspension, we want to be able to back
- * up to the start of the current MCU.  To do this, we copy state variables
- * into local working storage, and update them back to the permanent
- * storage only upon successful completion of an MCU.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Derived data constructed for each Huffman table */
-
-#define HUFF_LOOKAHEAD	8	/* # of bits of lookahead */
-
-typedef struct {
-  /* Basic tables: (element [0] of each array is unused) */
-  INT32 maxcode[18];		/* largest code of length k (-1 if none) */
-  /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */
-  INT32 valoffset[17];		/* huffval[] offset for codes of length k */
-  /* valoffset[k] = huffval[] index of 1st symbol of code length k, less
-   * the smallest code of length k; so given a code of length k, the
-   * corresponding symbol is huffval[code + valoffset[k]]
-   */
-
-  /* Link to public Huffman table (needed only in jpeg_huff_decode) */
-  JHUFF_TBL *pub;
-
-  /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of
-   * the input data stream.  If the next Huffman code is no more
-   * than HUFF_LOOKAHEAD bits long, we can obtain its length and
-   * the corresponding symbol directly from these tables.
-   */
-  int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */
-  UINT8 look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */
-} d_derived_tbl;
-
-
-/*
- * Fetching the next N bits from the input stream is a time-critical operation
- * for the Huffman decoders.  We implement it with a combination of inline
- * macros and out-of-line subroutines.  Note that N (the number of bits
- * demanded at one time) never exceeds 15 for JPEG use.
- *
- * We read source bytes into get_buffer and dole out bits as needed.
- * If get_buffer already contains enough bits, they are fetched in-line
- * by the macros CHECK_BIT_BUFFER and GET_BITS.  When there aren't enough
- * bits, jpeg_fill_bit_buffer is called; it will attempt to fill get_buffer
- * as full as possible (not just to the number of bits needed; this
- * prefetching reduces the overhead cost of calling jpeg_fill_bit_buffer).
- * Note that jpeg_fill_bit_buffer may return FALSE to indicate suspension.
- * On TRUE return, jpeg_fill_bit_buffer guarantees that get_buffer contains
- * at least the requested number of bits --- dummy zeroes are inserted if
- * necessary.
- */
-
-typedef INT32 bit_buf_type;	/* type of bit-extraction buffer */
-#define BIT_BUF_SIZE  32	/* size of buffer in bits */
-
-/* If long is > 32 bits on your machine, and shifting/masking longs is
- * reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE
- * appropriately should be a win.  Unfortunately we can't define the size
- * with something like  #define BIT_BUF_SIZE (sizeof(bit_buf_type)*8)
- * because not all machines measure sizeof in 8-bit bytes.
- */
-
-typedef struct {		/* Bitreading state saved across MCUs */
-  bit_buf_type get_buffer;	/* current bit-extraction buffer */
-  int bits_left;		/* # of unused bits in it */
-} bitread_perm_state;
-
-typedef struct {		/* Bitreading working state within an MCU */
-  /* Current data source location */
-  /* We need a copy, rather than munging the original, in case of suspension */
-  const JOCTET * next_input_byte; /* => next byte to read from source */
-  size_t bytes_in_buffer;	/* # of bytes remaining in source buffer */
-  /* Bit input buffer --- note these values are kept in register variables,
-   * not in this struct, inside the inner loops.
-   */
-  bit_buf_type get_buffer;	/* current bit-extraction buffer */
-  int bits_left;		/* # of unused bits in it */
-  /* Pointer needed by jpeg_fill_bit_buffer. */
-  j_decompress_ptr cinfo;	/* back link to decompress master record */
-} bitread_working_state;
-
-/* Macros to declare and load/save bitread local variables. */
-#define BITREAD_STATE_VARS  \
-	register bit_buf_type get_buffer;  \
-	register int bits_left;  \
-	bitread_working_state br_state
-
-#define BITREAD_LOAD_STATE(cinfop,permstate)  \
-	br_state.cinfo = cinfop; \
-	br_state.next_input_byte = cinfop->src->next_input_byte; \
-	br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \
-	get_buffer = permstate.get_buffer; \
-	bits_left = permstate.bits_left;
-
-#define BITREAD_SAVE_STATE(cinfop,permstate)  \
-	cinfop->src->next_input_byte = br_state.next_input_byte; \
-	cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \
-	permstate.get_buffer = get_buffer; \
-	permstate.bits_left = bits_left
-
-/*
- * These macros provide the in-line portion of bit fetching.
- * Use CHECK_BIT_BUFFER to ensure there are N bits in get_buffer
- * before using GET_BITS, PEEK_BITS, or DROP_BITS.
- * The variables get_buffer and bits_left are assumed to be locals,
- * but the state struct might not be (jpeg_huff_decode needs this).
- *	CHECK_BIT_BUFFER(state,n,action);
- *		Ensure there are N bits in get_buffer; if suspend, take action.
- *      val = GET_BITS(n);
- *		Fetch next N bits.
- *      val = PEEK_BITS(n);
- *		Fetch next N bits without removing them from the buffer.
- *	DROP_BITS(n);
- *		Discard next N bits.
- * The value N should be a simple variable, not an expression, because it
- * is evaluated multiple times.
- */
-
-#define CHECK_BIT_BUFFER(state,nbits,action) \
-	{ if (bits_left < (nbits)) {  \
-	    if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits))  \
-	      { action; }  \
-	    get_buffer = (state).get_buffer; bits_left = (state).bits_left; } }
-
-#define GET_BITS(nbits) \
-	(((int) (get_buffer >> (bits_left -= (nbits)))) & BIT_MASK(nbits))
-
-#define PEEK_BITS(nbits) \
-	(((int) (get_buffer >> (bits_left -  (nbits)))) & BIT_MASK(nbits))
-
-#define DROP_BITS(nbits) \
-	(bits_left -= (nbits))
-
-
-/*
- * Code for extracting next Huffman-coded symbol from input bit stream.
- * Again, this is time-critical and we make the main paths be macros.
- *
- * We use a lookahead table to process codes of up to HUFF_LOOKAHEAD bits
- * without looping.  Usually, more than 95% of the Huffman codes will be 8
- * or fewer bits long.  The few overlength codes are handled with a loop,
- * which need not be inline code.
- *
- * Notes about the HUFF_DECODE macro:
- * 1. Near the end of the data segment, we may fail to get enough bits
- *    for a lookahead.  In that case, we do it the hard way.
- * 2. If the lookahead table contains no entry, the next code must be
- *    more than HUFF_LOOKAHEAD bits long.
- * 3. jpeg_huff_decode returns -1 if forced to suspend.
- */
-
-#define HUFF_DECODE(result,state,htbl,failaction,slowlabel) \
-{ register int nb, look; \
-  if (bits_left < HUFF_LOOKAHEAD) { \
-    if (! jpeg_fill_bit_buffer(&state,get_buffer,bits_left, 0)) {failaction;} \
-    get_buffer = state.get_buffer; bits_left = state.bits_left; \
-    if (bits_left < HUFF_LOOKAHEAD) { \
-      nb = 1; goto slowlabel; \
-    } \
-  } \
-  look = PEEK_BITS(HUFF_LOOKAHEAD); \
-  if ((nb = htbl->look_nbits[look]) != 0) { \
-    DROP_BITS(nb); \
-    result = htbl->look_sym[look]; \
-  } else { \
-    nb = HUFF_LOOKAHEAD+1; \
-slowlabel: \
-    if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \
-	{ failaction; } \
-    get_buffer = state.get_buffer; bits_left = state.bits_left; \
-  } \
-}
-
-
-/*
- * Expanded entropy decoder object for Huffman decoding.
- *
- * The savable_state subrecord contains fields that change within an MCU,
- * but must not be updated permanently until we complete the MCU.
- */
-
-typedef struct {
-  unsigned int EOBRUN;			/* remaining EOBs in EOBRUN */
-  int last_dc_val[MAX_COMPS_IN_SCAN];	/* last DC coef for each component */
-} savable_state;
-
-/* This macro is to work around compilers with missing or broken
- * structure assignment.  You'll need to fix this code if you have
- * such a compiler and you change MAX_COMPS_IN_SCAN.
- */
-
-#ifndef NO_STRUCT_ASSIGN
-#define ASSIGN_STATE(dest,src)  ((dest) = (src))
-#else
-#if MAX_COMPS_IN_SCAN == 4
-#define ASSIGN_STATE(dest,src)  \
-	((dest).EOBRUN = (src).EOBRUN, \
-	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
-	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
-	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
-	 (dest).last_dc_val[3] = (src).last_dc_val[3])
-#endif
-#endif
-
-
-typedef struct {
-  struct jpeg_entropy_decoder pub; /* public fields */
-
-  /* These fields are loaded into local variables at start of each MCU.
-   * In case of suspension, we exit WITHOUT updating them.
-   */
-  bitread_perm_state bitstate;	/* Bit buffer at start of MCU */
-  savable_state saved;		/* Other state at start of MCU */
-
-  /* These fields are NOT loaded into local working state. */
-  boolean insufficient_data;	/* set TRUE after emitting warning */
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
-
-  /* Following two fields used only in progressive mode */
-
-  /* Pointers to derived tables (these workspaces have image lifespan) */
-  d_derived_tbl * derived_tbls[NUM_HUFF_TBLS];
-
-  d_derived_tbl * ac_derived_tbl; /* active table during an AC scan */
-
-  /* Following fields used only in sequential mode */
-
-  /* Pointers to derived tables (these workspaces have image lifespan) */
-  d_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
-  d_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
-
-  /* Precalculated info set up by start_pass for use in decode_mcu: */
-
-  /* Pointers to derived tables to be used for each block within an MCU */
-  d_derived_tbl * dc_cur_tbls[D_MAX_BLOCKS_IN_MCU];
-  d_derived_tbl * ac_cur_tbls[D_MAX_BLOCKS_IN_MCU];
-  /* Whether we care about the DC and AC coefficient values for each block */
-  int coef_limit[D_MAX_BLOCKS_IN_MCU];
-} huff_entropy_decoder;
-
-typedef huff_entropy_decoder * huff_entropy_ptr;
-
-
-static const int jpeg_zigzag_order[8][8] = {
-  {  0,  1,  5,  6, 14, 15, 27, 28 },
-  {  2,  4,  7, 13, 16, 26, 29, 42 },
-  {  3,  8, 12, 17, 25, 30, 41, 43 },
-  {  9, 11, 18, 24, 31, 40, 44, 53 },
-  { 10, 19, 23, 32, 39, 45, 52, 54 },
-  { 20, 22, 33, 38, 46, 51, 55, 60 },
-  { 21, 34, 37, 47, 50, 56, 59, 61 },
-  { 35, 36, 48, 49, 57, 58, 62, 63 }
-};
-
-static const int jpeg_zigzag_order7[7][7] = {
-  {  0,  1,  5,  6, 14, 15, 27 },
-  {  2,  4,  7, 13, 16, 26, 28 },
-  {  3,  8, 12, 17, 25, 29, 38 },
-  {  9, 11, 18, 24, 30, 37, 39 },
-  { 10, 19, 23, 31, 36, 40, 45 },
-  { 20, 22, 32, 35, 41, 44, 46 },
-  { 21, 33, 34, 42, 43, 47, 48 }
-};
-
-static const int jpeg_zigzag_order6[6][6] = {
-  {  0,  1,  5,  6, 14, 15 },
-  {  2,  4,  7, 13, 16, 25 },
-  {  3,  8, 12, 17, 24, 26 },
-  {  9, 11, 18, 23, 27, 32 },
-  { 10, 19, 22, 28, 31, 33 },
-  { 20, 21, 29, 30, 34, 35 }
-};
-
-static const int jpeg_zigzag_order5[5][5] = {
-  {  0,  1,  5,  6, 14 },
-  {  2,  4,  7, 13, 15 },
-  {  3,  8, 12, 16, 21 },
-  {  9, 11, 17, 20, 22 },
-  { 10, 18, 19, 23, 24 }
-};
-
-static const int jpeg_zigzag_order4[4][4] = {
-  { 0,  1,  5,  6 },
-  { 2,  4,  7, 12 },
-  { 3,  8, 11, 13 },
-  { 9, 10, 14, 15 }
-};
-
-static const int jpeg_zigzag_order3[3][3] = {
-  { 0, 1, 5 },
-  { 2, 4, 6 },
-  { 3, 7, 8 }
-};
-
-static const int jpeg_zigzag_order2[2][2] = {
-  { 0, 1 },
-  { 2, 3 }
-};
-
-
-/*
- * Compute the derived values for a Huffman table.
- * This routine also performs some validation checks on the table.
- */
-
-LOCAL(void)
-jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
-			 d_derived_tbl ** pdtbl)
-{
-  JHUFF_TBL *htbl;
-  d_derived_tbl *dtbl;
-  int p, i, l, si, numsymbols;
-  int lookbits, ctr;
-  char huffsize[257];
-  unsigned int huffcode[257];
-  unsigned int code;
-
-  /* Note that huffsize[] and huffcode[] are filled in code-length order,
-   * paralleling the order of the symbols themselves in htbl->huffval[].
-   */
-
-  /* Find the input Huffman table */
-  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
-    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
-  htbl =
-    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
-  if (htbl == NULL)
-    htbl = jpeg_std_huff_table((j_common_ptr) cinfo, isDC, tblno);
-
-  /* Allocate a workspace if we haven't already done so. */
-  if (*pdtbl == NULL)
-    *pdtbl = (d_derived_tbl *) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(d_derived_tbl));
-  dtbl = *pdtbl;
-  dtbl->pub = htbl;		/* fill in back link */
-  
-  /* Figure C.1: make table of Huffman code length for each symbol */
-
-  p = 0;
-  for (l = 1; l <= 16; l++) {
-    i = (int) htbl->bits[l];
-    if (i < 0 || p + i > 256)	/* protect against table overrun */
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    while (i--)
-      huffsize[p++] = (char) l;
-  }
-  huffsize[p] = 0;
-  numsymbols = p;
-  
-  /* Figure C.2: generate the codes themselves */
-  /* We also validate that the counts represent a legal Huffman code tree. */
-  
-  code = 0;
-  si = huffsize[0];
-  p = 0;
-  while (huffsize[p]) {
-    while (((int) huffsize[p]) == si) {
-      huffcode[p++] = code;
-      code++;
-    }
-    /* code is now 1 more than the last code used for codelength si; but
-     * it must still fit in si bits, since no code is allowed to be all ones.
-     */
-    if (((INT32) code) >= (((INT32) 1) << si))
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    code <<= 1;
-    si++;
-  }
-
-  /* Figure F.15: generate decoding tables for bit-sequential decoding */
-
-  p = 0;
-  for (l = 1; l <= 16; l++) {
-    if (htbl->bits[l]) {
-      /* valoffset[l] = huffval[] index of 1st symbol of code length l,
-       * minus the minimum code of length l
-       */
-      dtbl->valoffset[l] = (INT32) p - (INT32) huffcode[p];
-      p += htbl->bits[l];
-      dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
-    } else {
-      dtbl->maxcode[l] = -1;	/* -1 if no codes of this length */
-    }
-  }
-  dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */
-
-  /* Compute lookahead tables to speed up decoding.
-   * First we set all the table entries to 0, indicating "too long";
-   * then we iterate through the Huffman codes that are short enough and
-   * fill in all the entries that correspond to bit sequences starting
-   * with that code.
-   */
-
-  MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits));
-
-  p = 0;
-  for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
-    for (i = 1; i <= (int) htbl->bits[l]; i++, p++) {
-      /* l = current code's length, p = its index in huffcode[] & huffval[]. */
-      /* Generate left-justified code followed by all possible bit sequences */
-      lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
-      for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
-	dtbl->look_nbits[lookbits] = l;
-	dtbl->look_sym[lookbits] = htbl->huffval[p];
-	lookbits++;
-      }
-    }
-  }
-
-  /* Validate symbols as being reasonable.
-   * For AC tables, we make no check, but accept all byte values 0..255.
-   * For DC tables, we require the symbols to be in range 0..15.
-   * (Tighter bounds could be applied depending on the data depth and mode,
-   * but this is sufficient to ensure safe decoding.)
-   */
-  if (isDC) {
-    for (i = 0; i < numsymbols; i++) {
-      int sym = htbl->huffval[i];
-      if (sym < 0 || sym > 15)
-	ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-    }
-  }
-}
-
-
-/*
- * Out-of-line code for bit fetching.
- * Note: current values of get_buffer and bits_left are passed as parameters,
- * but are returned in the corresponding fields of the state struct.
- *
- * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width
- * of get_buffer to be used.  (On machines with wider words, an even larger
- * buffer could be used.)  However, on some machines 32-bit shifts are
- * quite slow and take time proportional to the number of places shifted.
- * (This is true with most PC compilers, for instance.)  In this case it may
- * be a win to set MIN_GET_BITS to the minimum value of 15.  This reduces the
- * average shift distance at the cost of more calls to jpeg_fill_bit_buffer.
- */
-
-#ifdef SLOW_SHIFT_32
-#define MIN_GET_BITS  15	/* minimum allowable value */
-#else
-#define MIN_GET_BITS  (BIT_BUF_SIZE-7)
-#endif
-
-
-LOCAL(boolean)
-jpeg_fill_bit_buffer (bitread_working_state * state,
-		      register bit_buf_type get_buffer, register int bits_left,
-		      int nbits)
-/* Load up the bit buffer to a depth of at least nbits */
-{
-  /* Copy heavily used state fields into locals (hopefully registers) */
-  register const JOCTET * next_input_byte = state->next_input_byte;
-  register size_t bytes_in_buffer = state->bytes_in_buffer;
-  j_decompress_ptr cinfo = state->cinfo;
-
-  /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */
-  /* (It is assumed that no request will be for more than that many bits.) */
-  /* We fail to do so only if we hit a marker or are forced to suspend. */
-
-  if (cinfo->unread_marker == 0) {	/* cannot advance past a marker */
-    while (bits_left < MIN_GET_BITS) {
-      register int c;
-
-      /* Attempt to read a byte */
-      if (bytes_in_buffer == 0) {
-	if (! (*cinfo->src->fill_input_buffer) (cinfo))
-	  return FALSE;
-	next_input_byte = cinfo->src->next_input_byte;
-	bytes_in_buffer = cinfo->src->bytes_in_buffer;
-      }
-      bytes_in_buffer--;
-      c = GETJOCTET(*next_input_byte++);
-
-      /* If it's 0xFF, check and discard stuffed zero byte */
-      if (c == 0xFF) {
-	/* Loop here to discard any padding FF's on terminating marker,
-	 * so that we can save a valid unread_marker value.  NOTE: we will
-	 * accept multiple FF's followed by a 0 as meaning a single FF data
-	 * byte.  This data pattern is not valid according to the standard.
-	 */
-	do {
-	  if (bytes_in_buffer == 0) {
-	    if (! (*cinfo->src->fill_input_buffer) (cinfo))
-	      return FALSE;
-	    next_input_byte = cinfo->src->next_input_byte;
-	    bytes_in_buffer = cinfo->src->bytes_in_buffer;
-	  }
-	  bytes_in_buffer--;
-	  c = GETJOCTET(*next_input_byte++);
-	} while (c == 0xFF);
-
-	if (c == 0) {
-	  /* Found FF/00, which represents an FF data byte */
-	  c = 0xFF;
-	} else {
-	  /* Oops, it's actually a marker indicating end of compressed data.
-	   * Save the marker code for later use.
-	   * Fine point: it might appear that we should save the marker into
-	   * bitread working state, not straight into permanent state.  But
-	   * once we have hit a marker, we cannot need to suspend within the
-	   * current MCU, because we will read no more bytes from the data
-	   * source.  So it is OK to update permanent state right away.
-	   */
-	  cinfo->unread_marker = c;
-	  /* See if we need to insert some fake zero bits. */
-	  goto no_more_bytes;
-	}
-      }
-
-      /* OK, load c into get_buffer */
-      get_buffer = (get_buffer << 8) | c;
-      bits_left += 8;
-    } /* end while */
-  } else {
-  no_more_bytes:
-    /* We get here if we've read the marker that terminates the compressed
-     * data segment.  There should be enough bits in the buffer register
-     * to satisfy the request; if so, no problem.
-     */
-    if (nbits > bits_left) {
-      /* Uh-oh.  Report corrupted data to user and stuff zeroes into
-       * the data stream, so that we can produce some kind of image.
-       * We use a nonvolatile flag to ensure that only one warning message
-       * appears per data segment.
-       */
-      if (! ((huff_entropy_ptr) cinfo->entropy)->insufficient_data) {
-	WARNMS(cinfo, JWRN_HIT_MARKER);
-	((huff_entropy_ptr) cinfo->entropy)->insufficient_data = TRUE;
-      }
-      /* Fill the buffer with zero bits */
-      get_buffer <<= MIN_GET_BITS - bits_left;
-      bits_left = MIN_GET_BITS;
-    }
-  }
-
-  /* Unload the local registers */
-  state->next_input_byte = next_input_byte;
-  state->bytes_in_buffer = bytes_in_buffer;
-  state->get_buffer = get_buffer;
-  state->bits_left = bits_left;
-
-  return TRUE;
-}
-
-
-/*
- * Figure F.12: extend sign bit.
- * On some machines, a shift and sub will be faster than a table lookup.
- */
-
-#ifdef AVOID_TABLES
-
-#define BIT_MASK(nbits)   ((1<<(nbits))-1)
-#define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) - ((1<<(s))-1) : (x))
-
-#else
-
-#define BIT_MASK(nbits)   bmask[nbits]
-#define HUFF_EXTEND(x,s)  ((x) <= bmask[(s) - 1] ? (x) - bmask[s] : (x))
-
-static const int bmask[16] =	/* bmask[n] is mask for n rightmost bits */
-  { 0, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF,
-    0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF };
-
-#endif /* AVOID_TABLES */
-
-
-/*
- * Out-of-line code for Huffman code decoding.
- */
-
-LOCAL(int)
-jpeg_huff_decode (bitread_working_state * state,
-		  register bit_buf_type get_buffer, register int bits_left,
-		  d_derived_tbl * htbl, int min_bits)
-{
-  register int l = min_bits;
-  register INT32 code;
-
-  /* HUFF_DECODE has determined that the code is at least min_bits */
-  /* bits long, so fetch that many bits in one swoop. */
-
-  CHECK_BIT_BUFFER(*state, l, return -1);
-  code = GET_BITS(l);
-
-  /* Collect the rest of the Huffman code one bit at a time. */
-  /* This is per Figure F.16 in the JPEG spec. */
-
-  while (code > htbl->maxcode[l]) {
-    code <<= 1;
-    CHECK_BIT_BUFFER(*state, 1, return -1);
-    code |= GET_BITS(1);
-    l++;
-  }
-
-  /* Unload the local registers */
-  state->get_buffer = get_buffer;
-  state->bits_left = bits_left;
-
-  /* With garbage input we may reach the sentinel value l = 17. */
-
-  if (l > 16) {
-    WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE);
-    return 0;			/* fake a zero as the safest result */
-  }
-
-  return htbl->pub->huffval[ (int) (code + htbl->valoffset[l]) ];
-}
-
-
-/*
- * Finish up at the end of a Huffman-compressed scan.
- */
-
-METHODDEF(void)
-finish_pass_huff (j_decompress_ptr cinfo)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-
-  /* Throw away any unused bits remaining in bit buffer; */
-  /* include any full bytes in next_marker's count of discarded bytes */
-  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
-  entropy->bitstate.bits_left = 0;
-}
-
-
-/*
- * Check for a restart marker & resynchronize decoder.
- * Returns FALSE if must suspend.
- */
-
-LOCAL(boolean)
-process_restart (j_decompress_ptr cinfo)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int ci;
-
-  finish_pass_huff(cinfo);
-
-  /* Advance past the RSTn marker */
-  if (! (*cinfo->marker->read_restart_marker) (cinfo))
-    return FALSE;
-
-  /* Re-initialize DC predictions to 0 */
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++)
-    entropy->saved.last_dc_val[ci] = 0;
-  /* Re-init EOB run count, too */
-  entropy->saved.EOBRUN = 0;
-
-  /* Reset restart counter */
-  entropy->restarts_to_go = cinfo->restart_interval;
-
-  /* Reset out-of-data flag, unless read_restart_marker left us smack up
-   * against a marker.  In that case we will end up treating the next data
-   * segment as empty, and we can avoid producing bogus output pixels by
-   * leaving the flag set.
-   */
-  if (cinfo->unread_marker == 0)
-    entropy->insufficient_data = FALSE;
-
-  return TRUE;
-}
-
-
-/*
- * Huffman MCU decoding.
- * Each of these routines decodes and returns one MCU's worth of
- * Huffman-compressed coefficients. 
- * The coefficients are reordered from zigzag order into natural array order,
- * but are not dequantized.
- *
- * The i'th block of the MCU is stored into the block pointed to by
- * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
- * (Wholesale zeroing is usually a little faster than retail...)
- *
- * We return FALSE if data source requested suspension.  In that case no
- * changes have been made to permanent state.  (Exception: some output
- * coefficients may already have been assigned.  This is harmless for
- * spectral selection, since we'll just re-assign them on the next call.
- * Successive approximation AC refinement has to be more careful, however.)
- */
-
-/*
- * MCU decoding for DC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int Al = cinfo->Al;
-  register int s, r;
-  int blkn, ci;
-  JBLOCKROW block;
-  BITREAD_STATE_VARS;
-  savable_state state;
-  d_derived_tbl * tbl;
-  jpeg_component_info * compptr;
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* If we've run out of data, just leave the MCU set to zeroes.
-   * This way, we return uniform gray for the remainder of the segment.
-   */
-  if (! entropy->insufficient_data) {
-
-    /* Load up working state */
-    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-    ASSIGN_STATE(state, entropy->saved);
-
-    /* Outer loop handles each block in the MCU */
-
-    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-      block = MCU_data[blkn];
-      ci = cinfo->MCU_membership[blkn];
-      compptr = cinfo->cur_comp_info[ci];
-      tbl = entropy->derived_tbls[compptr->dc_tbl_no];
-
-      /* Decode a single block's worth of coefficients */
-
-      /* Section F.2.2.1: decode the DC coefficient difference */
-      HUFF_DECODE(s, br_state, tbl, return FALSE, label1);
-      if (s) {
-	CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	r = GET_BITS(s);
-	s = HUFF_EXTEND(r, s);
-      }
-
-      /* Convert DC difference to actual value, update last_dc_val */
-      s += state.last_dc_val[ci];
-      state.last_dc_val[ci] = s;
-      /* Scale and output the coefficient (assumes jpeg_natural_order[0]=0) */
-      (*block)[0] = (JCOEF) (s << Al);
-    }
-
-    /* Completed MCU, so update state */
-    BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-    ASSIGN_STATE(entropy->saved, state);
-  }
-
-  /* Account for restart interval if using restarts */
-  if (cinfo->restart_interval)
-    entropy->restarts_to_go--;
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for AC initial scan (either spectral selection,
- * or first pass of successive approximation).
- */
-
-METHODDEF(boolean)
-decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  register int s, k, r;
-  unsigned int EOBRUN;
-  int Se, Al;
-  const int * natural_order;
-  JBLOCKROW block;
-  BITREAD_STATE_VARS;
-  d_derived_tbl * tbl;
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* If we've run out of data, just leave the MCU set to zeroes.
-   * This way, we return uniform gray for the remainder of the segment.
-   */
-  if (! entropy->insufficient_data) {
-
-    /* Load up working state.
-     * We can avoid loading/saving bitread state if in an EOB run.
-     */
-    EOBRUN = entropy->saved.EOBRUN;	/* only part of saved state we need */
-
-    /* There is always only one block per MCU */
-
-    if (EOBRUN)			/* if it's a band of zeroes... */
-      EOBRUN--;			/* ...process it now (we do nothing) */
-    else {
-      BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-      Se = cinfo->Se;
-      Al = cinfo->Al;
-      natural_order = cinfo->natural_order;
-      block = MCU_data[0];
-      tbl = entropy->ac_derived_tbl;
-
-      for (k = cinfo->Ss; k <= Se; k++) {
-	HUFF_DECODE(s, br_state, tbl, return FALSE, label2);
-	r = s >> 4;
-	s &= 15;
-	if (s) {
-	  k += r;
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  r = GET_BITS(s);
-	  s = HUFF_EXTEND(r, s);
-	  /* Scale and output coefficient in natural (dezigzagged) order */
-	  (*block)[natural_order[k]] = (JCOEF) (s << Al);
-	} else {
-	  if (r != 15) {	/* EOBr, run length is 2^r + appended bits */
-	    if (r) {		/* EOBr, r > 0 */
-	      EOBRUN = 1 << r;
-	      CHECK_BIT_BUFFER(br_state, r, return FALSE);
-	      r = GET_BITS(r);
-	      EOBRUN += r;
-	      EOBRUN--;		/* this band is processed at this moment */
-	    }
-	    break;		/* force end-of-band */
-	  }
-	  k += 15;		/* ZRL: skip 15 zeroes in band */
-	}
-      }
-
-      BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-    }
-
-    /* Completed MCU, so update state */
-    entropy->saved.EOBRUN = EOBRUN;	/* only part of saved state we need */
-  }
-
-  /* Account for restart interval if using restarts */
-  if (cinfo->restart_interval)
-    entropy->restarts_to_go--;
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for DC successive approximation refinement scan.
- * Note: we assume such scans can be multi-component,
- * although the spec is not very clear on the point.
- */
-
-METHODDEF(boolean)
-decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  JCOEF p1;
-  int blkn;
-  BITREAD_STATE_VARS;
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* Not worth the cycles to check insufficient_data here,
-   * since we will not change the data anyway if we read zeroes.
-   */
-
-  /* Load up working state */
-  BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-
-  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
-
-  /* Outer loop handles each block in the MCU */
-
-  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    /* Encoded data is simply the next bit of the two's-complement DC value */
-    CHECK_BIT_BUFFER(br_state, 1, return FALSE);
-    if (GET_BITS(1))
-      MCU_data[blkn][0][0] |= p1;
-    /* Note: since we use |=, repeating the assignment later is safe */
-  }
-
-  /* Completed MCU, so update state */
-  BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-
-  /* Account for restart interval if using restarts */
-  if (cinfo->restart_interval)
-    entropy->restarts_to_go--;
-
-  return TRUE;
-}
-
-
-/*
- * MCU decoding for AC successive approximation refinement scan.
- */
-
-METHODDEF(boolean)
-decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  register int s, k, r;
-  unsigned int EOBRUN;
-  int Se;
-  JCOEF p1, m1;
-  const int * natural_order;
-  JBLOCKROW block;
-  JCOEFPTR thiscoef;
-  BITREAD_STATE_VARS;
-  d_derived_tbl * tbl;
-  int num_newnz;
-  int newnz_pos[DCTSIZE2];
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* If we've run out of data, don't modify the MCU.
-   */
-  if (! entropy->insufficient_data) {
-
-    Se = cinfo->Se;
-    p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
-    m1 = -p1;			/* -1 in the bit position being coded */
-    natural_order = cinfo->natural_order;
-
-    /* Load up working state */
-    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-    EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */
-
-    /* There is always only one block per MCU */
-    block = MCU_data[0];
-    tbl = entropy->ac_derived_tbl;
-
-    /* If we are forced to suspend, we must undo the assignments to any newly
-     * nonzero coefficients in the block, because otherwise we'd get confused
-     * next time about which coefficients were already nonzero.
-     * But we need not undo addition of bits to already-nonzero coefficients;
-     * instead, we can test the current bit to see if we already did it.
-     */
-    num_newnz = 0;
-
-    /* initialize coefficient loop counter to start of band */
-    k = cinfo->Ss;
-
-    if (EOBRUN == 0) {
-      do {
-	HUFF_DECODE(s, br_state, tbl, goto undoit, label3);
-	r = s >> 4;
-	s &= 15;
-	if (s) {
-	  if (s != 1)		/* size of new coef should always be 1 */
-	    WARNMS(cinfo, JWRN_HUFF_BAD_CODE);
-	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
-	  if (GET_BITS(1))
-	    s = p1;		/* newly nonzero coef is positive */
-	  else
-	    s = m1;		/* newly nonzero coef is negative */
-	} else {
-	  if (r != 15) {
-	    EOBRUN = 1 << r;	/* EOBr, run length is 2^r + appended bits */
-	    if (r) {
-	      CHECK_BIT_BUFFER(br_state, r, goto undoit);
-	      r = GET_BITS(r);
-	      EOBRUN += r;
-	    }
-	    break;		/* rest of block is handled by EOB logic */
-	  }
-	  /* note s = 0 for processing ZRL */
-	}
-	/* Advance over already-nonzero coefs and r still-zero coefs,
-	 * appending correction bits to the nonzeroes.  A correction bit is 1
-	 * if the absolute value of the coefficient must be increased.
-	 */
-	do {
-	  thiscoef = *block + natural_order[k];
-	  if (*thiscoef) {
-	    CHECK_BIT_BUFFER(br_state, 1, goto undoit);
-	    if (GET_BITS(1)) {
-	      if ((*thiscoef & p1) == 0) { /* do nothing if already set it */
-		if (*thiscoef >= 0)
-		  *thiscoef += p1;
-		else
-		  *thiscoef += m1;
-	      }
-	    }
-	  } else {
-	    if (--r < 0)
-	      break;		/* reached target zero coefficient */
-	  }
-	  k++;
-	} while (k <= Se);
-	if (s) {
-	  int pos = natural_order[k];
-	  /* Output newly nonzero coefficient */
-	  (*block)[pos] = (JCOEF) s;
-	  /* Remember its position in case we have to suspend */
-	  newnz_pos[num_newnz++] = pos;
-	}
-	k++;
-      } while (k <= Se);
-    }
-
-    if (EOBRUN) {
-      /* Scan any remaining coefficient positions after the end-of-band
-       * (the last newly nonzero coefficient, if any).  Append a correction
-       * bit to each already-nonzero coefficient.  A correction bit is 1
-       * if the absolute value of the coefficient must be increased.
-       */
-      do {
-	thiscoef = *block + natural_order[k];
-	if (*thiscoef) {
-	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
-	  if (GET_BITS(1)) {
-	    if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */
-	      if (*thiscoef >= 0)
-		*thiscoef += p1;
-	      else
-		*thiscoef += m1;
-	    }
-	  }
-	}
-	k++;
-      } while (k <= Se);
-      /* Count one block completed in EOB run */
-      EOBRUN--;
-    }
-
-    /* Completed MCU, so update state */
-    BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-    entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */
-  }
-
-  /* Account for restart interval if using restarts */
-  if (cinfo->restart_interval)
-    entropy->restarts_to_go--;
-
-  return TRUE;
-
-undoit:
-  /* Re-zero any output coefficients that we made newly nonzero */
-  while (num_newnz)
-    (*block)[newnz_pos[--num_newnz]] = 0;
-
-  return FALSE;
-}
-
-
-/*
- * Decode one MCU's worth of Huffman-compressed coefficients,
- * partial blocks.
- */
-
-METHODDEF(boolean)
-decode_mcu_sub (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  const int * natural_order;
-  int Se, blkn;
-  BITREAD_STATE_VARS;
-  savable_state state;
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* If we've run out of data, just leave the MCU set to zeroes.
-   * This way, we return uniform gray for the remainder of the segment.
-   */
-  if (! entropy->insufficient_data) {
-
-    natural_order = cinfo->natural_order;
-    Se = cinfo->lim_Se;
-
-    /* Load up working state */
-    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-    ASSIGN_STATE(state, entropy->saved);
-
-    /* Outer loop handles each block in the MCU */
-
-    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-      JBLOCKROW block = MCU_data[blkn];
-      d_derived_tbl * htbl;
-      register int s, k, r;
-      int coef_limit, ci;
-
-      /* Decode a single block's worth of coefficients */
-
-      /* Section F.2.2.1: decode the DC coefficient difference */
-      htbl = entropy->dc_cur_tbls[blkn];
-      HUFF_DECODE(s, br_state, htbl, return FALSE, label1);
-
-      htbl = entropy->ac_cur_tbls[blkn];
-      k = 1;
-      coef_limit = entropy->coef_limit[blkn];
-      if (coef_limit) {
-	/* Convert DC difference to actual value, update last_dc_val */
-	if (s) {
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  r = GET_BITS(s);
-	  s = HUFF_EXTEND(r, s);
-	}
-	ci = cinfo->MCU_membership[blkn];
-	s += state.last_dc_val[ci];
-	state.last_dc_val[ci] = s;
-	/* Output the DC coefficient */
-	(*block)[0] = (JCOEF) s;
-
-	/* Section F.2.2.2: decode the AC coefficients */
-	/* Since zeroes are skipped, output area must be cleared beforehand */
-	for (; k < coef_limit; k++) {
-	  HUFF_DECODE(s, br_state, htbl, return FALSE, label2);
-
-	  r = s >> 4;
-	  s &= 15;
-
-	  if (s) {
-	    k += r;
-	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	    r = GET_BITS(s);
-	    s = HUFF_EXTEND(r, s);
-	    /* Output coefficient in natural (dezigzagged) order.
-	     * Note: the extra entries in natural_order[] will save us
-	     * if k > Se, which could happen if the data is corrupted.
-	     */
-	    (*block)[natural_order[k]] = (JCOEF) s;
-	  } else {
-	    if (r != 15)
-	      goto EndOfBlock;
-	    k += 15;
-	  }
-	}
-      } else {
-	if (s) {
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  DROP_BITS(s);
-	}
-      }
-
-      /* Section F.2.2.2: decode the AC coefficients */
-      /* In this path we just discard the values */
-      for (; k <= Se; k++) {
-	HUFF_DECODE(s, br_state, htbl, return FALSE, label3);
-
-	r = s >> 4;
-	s &= 15;
-
-	if (s) {
-	  k += r;
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  DROP_BITS(s);
-	} else {
-	  if (r != 15)
-	    break;
-	  k += 15;
-	}
-      }
-
-      EndOfBlock: ;
-    }
-
-    /* Completed MCU, so update state */
-    BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-    ASSIGN_STATE(entropy->saved, state);
-  }
-
-  /* Account for restart interval if using restarts */
-  if (cinfo->restart_interval)
-    entropy->restarts_to_go--;
-
-  return TRUE;
-}
-
-
-/*
- * Decode one MCU's worth of Huffman-compressed coefficients,
- * full-size blocks.
- */
-
-METHODDEF(boolean)
-decode_mcu (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int blkn;
-  BITREAD_STATE_VARS;
-  savable_state state;
-
-  /* Process restart marker if needed; may have to suspend */
-  if (cinfo->restart_interval) {
-    if (entropy->restarts_to_go == 0)
-      if (! process_restart(cinfo))
-	return FALSE;
-  }
-
-  /* If we've run out of data, just leave the MCU set to zeroes.
-   * This way, we return uniform gray for the remainder of the segment.
-   */
-  if (! entropy->insufficient_data) {
-
-    /* Load up working state */
-    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
-    ASSIGN_STATE(state, entropy->saved);
-
-    /* Outer loop handles each block in the MCU */
-
-    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-      JBLOCKROW block = MCU_data[blkn];
-      d_derived_tbl * htbl;
-      register int s, k, r;
-      int coef_limit, ci;
-
-      /* Decode a single block's worth of coefficients */
-
-      /* Section F.2.2.1: decode the DC coefficient difference */
-      htbl = entropy->dc_cur_tbls[blkn];
-      HUFF_DECODE(s, br_state, htbl, return FALSE, label1);
-
-      htbl = entropy->ac_cur_tbls[blkn];
-      k = 1;
-      coef_limit = entropy->coef_limit[blkn];
-      if (coef_limit) {
-	/* Convert DC difference to actual value, update last_dc_val */
-	if (s) {
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  r = GET_BITS(s);
-	  s = HUFF_EXTEND(r, s);
-	}
-	ci = cinfo->MCU_membership[blkn];
-	s += state.last_dc_val[ci];
-	state.last_dc_val[ci] = s;
-	/* Output the DC coefficient */
-	(*block)[0] = (JCOEF) s;
-
-	/* Section F.2.2.2: decode the AC coefficients */
-	/* Since zeroes are skipped, output area must be cleared beforehand */
-	for (; k < coef_limit; k++) {
-	  HUFF_DECODE(s, br_state, htbl, return FALSE, label2);
-
-	  r = s >> 4;
-	  s &= 15;
-
-	  if (s) {
-	    k += r;
-	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	    r = GET_BITS(s);
-	    s = HUFF_EXTEND(r, s);
-	    /* Output coefficient in natural (dezigzagged) order.
-	     * Note: the extra entries in jpeg_natural_order[] will save us
-	     * if k >= DCTSIZE2, which could happen if the data is corrupted.
-	     */
-	    (*block)[jpeg_natural_order[k]] = (JCOEF) s;
-	  } else {
-	    if (r != 15)
-	      goto EndOfBlock;
-	    k += 15;
-	  }
-	}
-      } else {
-	if (s) {
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  DROP_BITS(s);
-	}
-      }
-
-      /* Section F.2.2.2: decode the AC coefficients */
-      /* In this path we just discard the values */
-      for (; k < DCTSIZE2; k++) {
-	HUFF_DECODE(s, br_state, htbl, return FALSE, label3);
-
-	r = s >> 4;
-	s &= 15;
-
-	if (s) {
-	  k += r;
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  DROP_BITS(s);
-	} else {
-	  if (r != 15)
-	    break;
-	  k += 15;
-	}
-      }
-
-      EndOfBlock: ;
-    }
-
-    /* Completed MCU, so update state */
-    BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
-    ASSIGN_STATE(entropy->saved, state);
-  }
-
-  /* Account for restart interval if using restarts */
-  if (cinfo->restart_interval)
-    entropy->restarts_to_go--;
-
-  return TRUE;
-}
-
-
-/*
- * Initialize for a Huffman-compressed scan.
- */
-
-METHODDEF(void)
-start_pass_huff_decoder (j_decompress_ptr cinfo)
-{
-  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int ci, blkn, tbl, i;
-  jpeg_component_info * compptr;
-
-  if (cinfo->progressive_mode) {
-    /* Validate progressive scan parameters */
-    if (cinfo->Ss == 0) {
-      if (cinfo->Se != 0)
-	goto bad;
-    } else {
-      /* need not check Ss/Se < 0 since they came from unsigned bytes */
-      if (cinfo->Se < cinfo->Ss || cinfo->Se > cinfo->lim_Se)
-	goto bad;
-      /* AC scans may have only one component */
-      if (cinfo->comps_in_scan != 1)
-	goto bad;
-    }
-    if (cinfo->Ah != 0) {
-      /* Successive approximation refinement scan: must have Al = Ah-1. */
-      if (cinfo->Ah-1 != cinfo->Al)
-	goto bad;
-    }
-    if (cinfo->Al > 13) {	/* need not check for < 0 */
-      /* Arguably the maximum Al value should be less than 13 for 8-bit
-       * precision, but the spec doesn't say so, and we try to be liberal
-       * about what we accept.  Note: large Al values could result in
-       * out-of-range DC coefficients during early scans, leading to bizarre
-       * displays due to overflows in the IDCT math.  But we won't crash.
-       */
-      bad:
-      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
-	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
-    }
-    /* Update progression status, and verify that scan order is legal.
-     * Note that inter-scan inconsistencies are treated as warnings
-     * not fatal errors ... not clear if this is right way to behave.
-     */
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
-      int *coef_bit_ptr = & cinfo->coef_bits[cindex][0];
-      if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
-	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
-      for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
-	int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
-	if (cinfo->Ah != expected)
-	  WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
-	coef_bit_ptr[coefi] = cinfo->Al;
-      }
-    }
-
-    /* Select MCU decoding routine */
-    if (cinfo->Ah == 0) {
-      if (cinfo->Ss == 0)
-	entropy->pub.decode_mcu = decode_mcu_DC_first;
-      else
-	entropy->pub.decode_mcu = decode_mcu_AC_first;
-    } else {
-      if (cinfo->Ss == 0)
-	entropy->pub.decode_mcu = decode_mcu_DC_refine;
-      else
-	entropy->pub.decode_mcu = decode_mcu_AC_refine;
-    }
-
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      compptr = cinfo->cur_comp_info[ci];
-      /* Make sure requested tables are present, and compute derived tables.
-       * We may build same derived table more than once, but it's not expensive.
-       */
-      if (cinfo->Ss == 0) {
-	if (cinfo->Ah == 0) {	/* DC refinement needs no table */
-	  tbl = compptr->dc_tbl_no;
-	  jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
-				  & entropy->derived_tbls[tbl]);
-	}
-      } else {
-	tbl = compptr->ac_tbl_no;
-	jpeg_make_d_derived_tbl(cinfo, FALSE, tbl,
-				& entropy->derived_tbls[tbl]);
-	/* remember the single active table */
-	entropy->ac_derived_tbl = entropy->derived_tbls[tbl];
-      }
-      /* Initialize DC predictions to 0 */
-      entropy->saved.last_dc_val[ci] = 0;
-    }
-
-    /* Initialize private state variables */
-    entropy->saved.EOBRUN = 0;
-  } else {
-    /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
-     * This ought to be an error condition, but we make it a warning because
-     * there are some baseline files out there with all zeroes in these bytes.
-     */
-    if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 ||
-	((cinfo->is_baseline || cinfo->Se < DCTSIZE2) &&
-	cinfo->Se != cinfo->lim_Se))
-      WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
-
-    /* Select MCU decoding routine */
-    /* We retain the hard-coded case for full-size blocks.
-     * This is not necessary, but it appears that this version is slightly
-     * more performant in the given implementation.
-     * With an improved implementation we would prefer a single optimized
-     * function.
-     */
-    if (cinfo->lim_Se != DCTSIZE2-1)
-      entropy->pub.decode_mcu = decode_mcu_sub;
-    else
-      entropy->pub.decode_mcu = decode_mcu;
-
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      compptr = cinfo->cur_comp_info[ci];
-      /* Compute derived values for Huffman tables */
-      /* We may do this more than once for a table, but it's not expensive */
-      tbl = compptr->dc_tbl_no;
-      jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
-			      & entropy->dc_derived_tbls[tbl]);
-      if (cinfo->lim_Se) {	/* AC needs no table when not present */
-	tbl = compptr->ac_tbl_no;
-	jpeg_make_d_derived_tbl(cinfo, FALSE, tbl,
-				& entropy->ac_derived_tbls[tbl]);
-      }
-      /* Initialize DC predictions to 0 */
-      entropy->saved.last_dc_val[ci] = 0;
-    }
-
-    /* Precalculate decoding info for each block in an MCU of this scan */
-    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-      ci = cinfo->MCU_membership[blkn];
-      compptr = cinfo->cur_comp_info[ci];
-      /* Precalculate which table to use for each block */
-      entropy->dc_cur_tbls[blkn] = entropy->dc_derived_tbls[compptr->dc_tbl_no];
-      entropy->ac_cur_tbls[blkn] =	/* AC needs no table when not present */
-	cinfo->lim_Se ? entropy->ac_derived_tbls[compptr->ac_tbl_no] : NULL;
-      /* Decide whether we really care about the coefficient values */
-      if (compptr->component_needed) {
-	ci = compptr->DCT_v_scaled_size;
-	i = compptr->DCT_h_scaled_size;
-	switch (cinfo->lim_Se) {
-	case (1*1-1):
-	  entropy->coef_limit[blkn] = 1;
-	  break;
-	case (2*2-1):
-	  if (ci <= 0 || ci > 2) ci = 2;
-	  if (i <= 0 || i > 2) i = 2;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order2[ci - 1][i - 1];
-	  break;
-	case (3*3-1):
-	  if (ci <= 0 || ci > 3) ci = 3;
-	  if (i <= 0 || i > 3) i = 3;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order3[ci - 1][i - 1];
-	  break;
-	case (4*4-1):
-	  if (ci <= 0 || ci > 4) ci = 4;
-	  if (i <= 0 || i > 4) i = 4;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order4[ci - 1][i - 1];
-	  break;
-	case (5*5-1):
-	  if (ci <= 0 || ci > 5) ci = 5;
-	  if (i <= 0 || i > 5) i = 5;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order5[ci - 1][i - 1];
-	  break;
-	case (6*6-1):
-	  if (ci <= 0 || ci > 6) ci = 6;
-	  if (i <= 0 || i > 6) i = 6;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order6[ci - 1][i - 1];
-	  break;
-	case (7*7-1):
-	  if (ci <= 0 || ci > 7) ci = 7;
-	  if (i <= 0 || i > 7) i = 7;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order7[ci - 1][i - 1];
-	  break;
-	default:
-	  if (ci <= 0 || ci > 8) ci = 8;
-	  if (i <= 0 || i > 8) i = 8;
-	  entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order[ci - 1][i - 1];
-	}
-      } else {
-	entropy->coef_limit[blkn] = 0;
-      }
-    }
-  }
-
-  /* Initialize bitread state variables */
-  entropy->bitstate.bits_left = 0;
-  entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
-  entropy->insufficient_data = FALSE;
-
-  /* Initialize restart counter */
-  entropy->restarts_to_go = cinfo->restart_interval;
-}
-
-
-/*
- * Module initialization routine for Huffman entropy decoding.
- */
-
-GLOBAL(void)
-jinit_huff_decoder (j_decompress_ptr cinfo)
-{
-  huff_entropy_ptr entropy;
-  int i;
-
-  entropy = (huff_entropy_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(huff_entropy_decoder));
-  cinfo->entropy = &entropy->pub;
-  entropy->pub.start_pass = start_pass_huff_decoder;
-  entropy->pub.finish_pass = finish_pass_huff;
-
-  if (cinfo->progressive_mode) {
-    /* Create progression status table */
-    int *coef_bit_ptr, ci;
-    cinfo->coef_bits = (int (*)[DCTSIZE2]) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       cinfo->num_components * DCTSIZE2 * SIZEOF(int));
-    coef_bit_ptr = & cinfo->coef_bits[0][0];
-    for (ci = 0; ci < cinfo->num_components; ci++)
-      for (i = 0; i < DCTSIZE2; i++)
-	*coef_bit_ptr++ = -1;
-
-    /* Mark derived tables unallocated */
-    for (i = 0; i < NUM_HUFF_TBLS; i++) {
-      entropy->derived_tbls[i] = NULL;
-    }
-  } else {
-    /* Mark derived tables unallocated */
-    for (i = 0; i < NUM_HUFF_TBLS; i++) {
-      entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
-    }
-  }
-}
diff --git a/cmake/externals/libjpeg/jdinput.c b/cmake/externals/libjpeg/jdinput.c
deleted file mode 100644
index 29fbef90b..000000000
--- a/cmake/externals/libjpeg/jdinput.c
+++ /dev/null
@@ -1,657 +0,0 @@
-/*
- * jdinput.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2002-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains input control logic for the JPEG decompressor.
- * These routines are concerned with controlling the decompressor's input
- * processing (marker reading and coefficient decoding).  The actual input
- * reading is done in jdmarker.c, jdhuff.c, and jdarith.c.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Private state */
-
-typedef struct {
-  struct jpeg_input_controller pub; /* public fields */
-
-  int inheaders;		/* Nonzero until first SOS is reached */
-} my_input_controller;
-
-typedef my_input_controller * my_inputctl_ptr;
-
-
-/* Forward declarations */
-METHODDEF(int) consume_markers JPP((j_decompress_ptr cinfo));
-
-
-/*
- * Routines to calculate various quantities related to the size of the image.
- */
-
-
-/*
- * Compute output image dimensions and related values.
- * NOTE: this is exported for possible use by application.
- * Hence it mustn't do anything that can't be done twice.
- */
-
-GLOBAL(void)
-jpeg_core_output_dimensions (j_decompress_ptr cinfo)
-/* Do computations that are needed before master selection phase.
- * This function is used for transcoding and full decompression.
- */
-{
-#ifdef IDCT_SCALING_SUPPORTED
-  int ci;
-  jpeg_component_info *compptr;
-
-  /* Compute actual output image dimensions and DCT scaling choices. */
-  if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom) {
-    /* Provide 1/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 1;
-    cinfo->min_DCT_v_scaled_size = 1;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 2) {
-    /* Provide 2/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 2L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 2L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 2;
-    cinfo->min_DCT_v_scaled_size = 2;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 3) {
-    /* Provide 3/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 3L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 3L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 3;
-    cinfo->min_DCT_v_scaled_size = 3;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 4) {
-    /* Provide 4/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 4L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 4L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 4;
-    cinfo->min_DCT_v_scaled_size = 4;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 5) {
-    /* Provide 5/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 5L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 5L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 5;
-    cinfo->min_DCT_v_scaled_size = 5;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 6) {
-    /* Provide 6/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 6L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 6L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 6;
-    cinfo->min_DCT_v_scaled_size = 6;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 7) {
-    /* Provide 7/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 7L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 7L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 7;
-    cinfo->min_DCT_v_scaled_size = 7;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 8) {
-    /* Provide 8/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 8L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 8L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 8;
-    cinfo->min_DCT_v_scaled_size = 8;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 9) {
-    /* Provide 9/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 9L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 9L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 9;
-    cinfo->min_DCT_v_scaled_size = 9;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 10) {
-    /* Provide 10/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 10L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 10L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 10;
-    cinfo->min_DCT_v_scaled_size = 10;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 11) {
-    /* Provide 11/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 11L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 11L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 11;
-    cinfo->min_DCT_v_scaled_size = 11;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 12) {
-    /* Provide 12/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 12L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 12L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 12;
-    cinfo->min_DCT_v_scaled_size = 12;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 13) {
-    /* Provide 13/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 13L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 13L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 13;
-    cinfo->min_DCT_v_scaled_size = 13;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 14) {
-    /* Provide 14/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 14L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 14L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 14;
-    cinfo->min_DCT_v_scaled_size = 14;
-  } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 15) {
-    /* Provide 15/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 15L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 15L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 15;
-    cinfo->min_DCT_v_scaled_size = 15;
-  } else {
-    /* Provide 16/block_size scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * 16L, (long) cinfo->block_size);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * 16L, (long) cinfo->block_size);
-    cinfo->min_DCT_h_scaled_size = 16;
-    cinfo->min_DCT_v_scaled_size = 16;
-  }
-
-  /* Recompute dimensions of components */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size;
-    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size;
-  }
-
-#else /* !IDCT_SCALING_SUPPORTED */
-
-  /* Hardwire it to "no scaling" */
-  cinfo->output_width = cinfo->image_width;
-  cinfo->output_height = cinfo->image_height;
-  /* initial_setup has already initialized DCT_scaled_size,
-   * and has computed unscaled downsampled_width and downsampled_height.
-   */
-
-#endif /* IDCT_SCALING_SUPPORTED */
-}
-
-
-LOCAL(void)
-initial_setup (j_decompress_ptr cinfo)
-/* Called once, when first SOS marker is reached */
-{
-  int ci;
-  jpeg_component_info *compptr;
-
-  /* Make sure image isn't bigger than I can handle */
-  if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION ||
-      (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
-    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
-
-  /* Only 8 to 12 bits data precision are supported for DCT based JPEG */
-  if (cinfo->data_precision < 8 || cinfo->data_precision > 12)
-    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
-
-  /* Check that number of components won't exceed internal array sizes */
-  if (cinfo->num_components > MAX_COMPONENTS)
-    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	     MAX_COMPONENTS);
-
-  /* Compute maximum sampling factors; check factor validity */
-  cinfo->max_h_samp_factor = 1;
-  cinfo->max_v_samp_factor = 1;
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
-	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
-      ERREXIT(cinfo, JERR_BAD_SAMPLING);
-    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
-				   compptr->h_samp_factor);
-    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
-				   compptr->v_samp_factor);
-  }
-
-  /* Derive block_size, natural_order, and lim_Se */
-  if (cinfo->is_baseline || (cinfo->progressive_mode &&
-      cinfo->comps_in_scan)) { /* no pseudo SOS marker */
-    cinfo->block_size = DCTSIZE;
-    cinfo->natural_order = jpeg_natural_order;
-    cinfo->lim_Se = DCTSIZE2-1;
-  } else
-    switch (cinfo->Se) {
-    case (1*1-1):
-      cinfo->block_size = 1;
-      cinfo->natural_order = jpeg_natural_order; /* not needed */
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (2*2-1):
-      cinfo->block_size = 2;
-      cinfo->natural_order = jpeg_natural_order2;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (3*3-1):
-      cinfo->block_size = 3;
-      cinfo->natural_order = jpeg_natural_order3;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (4*4-1):
-      cinfo->block_size = 4;
-      cinfo->natural_order = jpeg_natural_order4;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (5*5-1):
-      cinfo->block_size = 5;
-      cinfo->natural_order = jpeg_natural_order5;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (6*6-1):
-      cinfo->block_size = 6;
-      cinfo->natural_order = jpeg_natural_order6;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (7*7-1):
-      cinfo->block_size = 7;
-      cinfo->natural_order = jpeg_natural_order7;
-      cinfo->lim_Se = cinfo->Se;
-      break;
-    case (8*8-1):
-      cinfo->block_size = 8;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (9*9-1):
-      cinfo->block_size = 9;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (10*10-1):
-      cinfo->block_size = 10;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (11*11-1):
-      cinfo->block_size = 11;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (12*12-1):
-      cinfo->block_size = 12;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (13*13-1):
-      cinfo->block_size = 13;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (14*14-1):
-      cinfo->block_size = 14;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (15*15-1):
-      cinfo->block_size = 15;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    case (16*16-1):
-      cinfo->block_size = 16;
-      cinfo->natural_order = jpeg_natural_order;
-      cinfo->lim_Se = DCTSIZE2-1;
-      break;
-    default:
-      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
-	       cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
-    }
-
-  /* We initialize DCT_scaled_size and min_DCT_scaled_size to block_size.
-   * In the full decompressor,
-   * this will be overridden by jpeg_calc_output_dimensions in jdmaster.c;
-   * but in the transcoder,
-   * jpeg_calc_output_dimensions is not used, so we must do it here.
-   */
-  cinfo->min_DCT_h_scaled_size = cinfo->block_size;
-  cinfo->min_DCT_v_scaled_size = cinfo->block_size;
-
-  /* Compute dimensions of components */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    compptr->DCT_h_scaled_size = cinfo->block_size;
-    compptr->DCT_v_scaled_size = cinfo->block_size;
-    /* Size in DCT blocks */
-    compptr->width_in_blocks = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    compptr->height_in_blocks = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
-		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-    /* downsampled_width and downsampled_height will also be overridden by
-     * jdmaster.c if we are doing full decompression.  The transcoder library
-     * doesn't use these values, but the calling application might.
-     */
-    /* Size in samples */
-    compptr->downsampled_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
-		    (long) cinfo->max_h_samp_factor);
-    compptr->downsampled_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
-		    (long) cinfo->max_v_samp_factor);
-    /* Mark component needed, until color conversion says otherwise */
-    compptr->component_needed = TRUE;
-    /* Mark no quantization table yet saved for component */
-    compptr->quant_table = NULL;
-  }
-
-  /* Compute number of fully interleaved MCU rows. */
-  cinfo->total_iMCU_rows = (JDIMENSION)
-    jdiv_round_up((long) cinfo->image_height,
-	          (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-
-  /* Decide whether file contains multiple scans */
-  if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode)
-    cinfo->inputctl->has_multiple_scans = TRUE;
-  else
-    cinfo->inputctl->has_multiple_scans = FALSE;
-}
-
-
-LOCAL(void)
-per_scan_setup (j_decompress_ptr cinfo)
-/* Do computations that are needed before processing a JPEG scan */
-/* cinfo->comps_in_scan and cinfo->cur_comp_info[] were set from SOS marker */
-{
-  int ci, mcublks, tmp;
-  jpeg_component_info *compptr;
-
-  if (cinfo->comps_in_scan == 1) {
-
-    /* Noninterleaved (single-component) scan */
-    compptr = cinfo->cur_comp_info[0];
-
-    /* Overall image size in MCUs */
-    cinfo->MCUs_per_row = compptr->width_in_blocks;
-    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
-
-    /* For noninterleaved scan, always one block per MCU */
-    compptr->MCU_width = 1;
-    compptr->MCU_height = 1;
-    compptr->MCU_blocks = 1;
-    compptr->MCU_sample_width = compptr->DCT_h_scaled_size;
-    compptr->last_col_width = 1;
-    /* For noninterleaved scans, it is convenient to define last_row_height
-     * as the number of block rows present in the last iMCU row.
-     */
-    tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
-    if (tmp == 0) tmp = compptr->v_samp_factor;
-    compptr->last_row_height = tmp;
-
-    /* Prepare array describing MCU composition */
-    cinfo->blocks_in_MCU = 1;
-    cinfo->MCU_membership[0] = 0;
-
-  } else {
-
-    /* Interleaved (multi-component) scan */
-    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
-      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
-	       MAX_COMPS_IN_SCAN);
-
-    /* Overall image size in MCUs */
-    cinfo->MCUs_per_row = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width,
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    cinfo->MCU_rows_in_scan = cinfo->total_iMCU_rows;
-
-    cinfo->blocks_in_MCU = 0;
-
-    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-      compptr = cinfo->cur_comp_info[ci];
-      /* Sampling factors give # of blocks of component in each MCU */
-      compptr->MCU_width = compptr->h_samp_factor;
-      compptr->MCU_height = compptr->v_samp_factor;
-      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
-      compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_h_scaled_size;
-      /* Figure number of non-dummy blocks in last MCU column & row */
-      tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
-      if (tmp == 0) tmp = compptr->MCU_width;
-      compptr->last_col_width = tmp;
-      tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
-      if (tmp == 0) tmp = compptr->MCU_height;
-      compptr->last_row_height = tmp;
-      /* Prepare array describing MCU composition */
-      mcublks = compptr->MCU_blocks;
-      if (cinfo->blocks_in_MCU + mcublks > D_MAX_BLOCKS_IN_MCU)
-	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
-      while (mcublks-- > 0) {
-	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
-      }
-    }
-
-  }
-}
-
-
-/*
- * Save away a copy of the Q-table referenced by each component present
- * in the current scan, unless already saved during a prior scan.
- *
- * In a multiple-scan JPEG file, the encoder could assign different components
- * the same Q-table slot number, but change table definitions between scans
- * so that each component uses a different Q-table.  (The IJG encoder is not
- * currently capable of doing this, but other encoders might.)  Since we want
- * to be able to dequantize all the components at the end of the file, this
- * means that we have to save away the table actually used for each component.
- * We do this by copying the table at the start of the first scan containing
- * the component.
- * The JPEG spec prohibits the encoder from changing the contents of a Q-table
- * slot between scans of a component using that slot.  If the encoder does so
- * anyway, this decoder will simply use the Q-table values that were current
- * at the start of the first scan for the component.
- *
- * The decompressor output side looks only at the saved quant tables,
- * not at the current Q-table slots.
- */
-
-LOCAL(void)
-latch_quant_tables (j_decompress_ptr cinfo)
-{
-  int ci, qtblno;
-  jpeg_component_info *compptr;
-  JQUANT_TBL * qtbl;
-
-  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-    compptr = cinfo->cur_comp_info[ci];
-    /* No work if we already saved Q-table for this component */
-    if (compptr->quant_table != NULL)
-      continue;
-    /* Make sure specified quantization table is present */
-    qtblno = compptr->quant_tbl_no;
-    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
-	cinfo->quant_tbl_ptrs[qtblno] == NULL)
-      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
-    /* OK, save away the quantization table */
-    qtbl = (JQUANT_TBL *) (*cinfo->mem->alloc_small)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(JQUANT_TBL));
-    MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], SIZEOF(JQUANT_TBL));
-    compptr->quant_table = qtbl;
-  }
-}
-
-
-/*
- * Initialize the input modules to read a scan of compressed data.
- * The first call to this is done by jdmaster.c after initializing
- * the entire decompressor (during jpeg_start_decompress).
- * Subsequent calls come from consume_markers, below.
- */
-
-METHODDEF(void)
-start_input_pass (j_decompress_ptr cinfo)
-{
-  per_scan_setup(cinfo);
-  latch_quant_tables(cinfo);
-  (*cinfo->entropy->start_pass) (cinfo);
-  (*cinfo->coef->start_input_pass) (cinfo);
-  cinfo->inputctl->consume_input = cinfo->coef->consume_data;
-}
-
-
-/*
- * Finish up after inputting a compressed-data scan.
- * This is called by the coefficient controller after it's read all
- * the expected data of the scan.
- */
-
-METHODDEF(void)
-finish_input_pass (j_decompress_ptr cinfo)
-{
-  (*cinfo->entropy->finish_pass) (cinfo);
-  cinfo->inputctl->consume_input = consume_markers;
-}
-
-
-/*
- * Read JPEG markers before, between, or after compressed-data scans.
- * Change state as necessary when a new scan is reached.
- * Return value is JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
- *
- * The consume_input method pointer points either here or to the
- * coefficient controller's consume_data routine, depending on whether
- * we are reading a compressed data segment or inter-segment markers.
- *
- * Note: This function should NOT return a pseudo SOS marker (with zero
- * component number) to the caller.  A pseudo marker received by
- * read_markers is processed and then skipped for other markers.
- */
-
-METHODDEF(int)
-consume_markers (j_decompress_ptr cinfo)
-{
-  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
-  int val;
-
-  if (inputctl->pub.eoi_reached) /* After hitting EOI, read no further */
-    return JPEG_REACHED_EOI;
-
-  for (;;) {			/* Loop to pass pseudo SOS marker */
-    val = (*cinfo->marker->read_markers) (cinfo);
-
-    switch (val) {
-    case JPEG_REACHED_SOS:	/* Found SOS */
-      if (inputctl->inheaders) { /* 1st SOS */
-	if (inputctl->inheaders == 1)
-	  initial_setup(cinfo);
-	if (cinfo->comps_in_scan == 0) { /* pseudo SOS marker */
-	  inputctl->inheaders = 2;
-	  break;
-	}
-	inputctl->inheaders = 0;
-	/* Note: start_input_pass must be called by jdmaster.c
-	 * before any more input can be consumed.  jdapimin.c is
-	 * responsible for enforcing this sequencing.
-	 */
-      } else {			/* 2nd or later SOS marker */
-	if (! inputctl->pub.has_multiple_scans)
-	  ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */
-	if (cinfo->comps_in_scan == 0) /* unexpected pseudo SOS marker */
-	  break;
-	start_input_pass(cinfo);
-      }
-      return val;
-    case JPEG_REACHED_EOI:	/* Found EOI */
-      inputctl->pub.eoi_reached = TRUE;
-      if (inputctl->inheaders) { /* Tables-only datastream, apparently */
-	if (cinfo->marker->saw_SOF)
-	  ERREXIT(cinfo, JERR_SOF_NO_SOS);
-      } else {
-	/* Prevent infinite loop in coef ctlr's decompress_data routine
-	 * if user set output_scan_number larger than number of scans.
-	 */
-	if (cinfo->output_scan_number > cinfo->input_scan_number)
-	  cinfo->output_scan_number = cinfo->input_scan_number;
-      }
-      return val;
-    case JPEG_SUSPENDED:
-      return val;
-    default:
-      return val;
-    }
-  }
-}
-
-
-/*
- * Reset state to begin a fresh datastream.
- */
-
-METHODDEF(void)
-reset_input_controller (j_decompress_ptr cinfo)
-{
-  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
-
-  inputctl->pub.consume_input = consume_markers;
-  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
-  inputctl->pub.eoi_reached = FALSE;
-  inputctl->inheaders = 1;
-  /* Reset other modules */
-  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
-  (*cinfo->marker->reset_marker_reader) (cinfo);
-  /* Reset progression state -- would be cleaner if entropy decoder did this */
-  cinfo->coef_bits = NULL;
-}
-
-
-/*
- * Initialize the input controller module.
- * This is called only once, when the decompression object is created.
- */
-
-GLOBAL(void)
-jinit_input_controller (j_decompress_ptr cinfo)
-{
-  my_inputctl_ptr inputctl;
-
-  /* Create subobject in permanent pool */
-  inputctl = (my_inputctl_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(my_input_controller));
-  cinfo->inputctl = &inputctl->pub;
-  /* Initialize method pointers */
-  inputctl->pub.consume_input = consume_markers;
-  inputctl->pub.reset_input_controller = reset_input_controller;
-  inputctl->pub.start_input_pass = start_input_pass;
-  inputctl->pub.finish_input_pass = finish_input_pass;
-  /* Initialize state: can't use reset_input_controller since we don't
-   * want to try to reset other modules yet.
-   */
-  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
-  inputctl->pub.eoi_reached = FALSE;
-  inputctl->inheaders = 1;
-}
diff --git a/cmake/externals/libjpeg/jdmainct.c b/cmake/externals/libjpeg/jdmainct.c
deleted file mode 100644
index 1cd66d853..000000000
--- a/cmake/externals/libjpeg/jdmainct.c
+++ /dev/null
@@ -1,511 +0,0 @@
-/*
- * jdmainct.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2002-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the main buffer controller for decompression.
- * The main buffer lies between the JPEG decompressor proper and the
- * post-processor; it holds downsampled data in the JPEG colorspace.
- *
- * Note that this code is bypassed in raw-data mode, since the application
- * supplies the equivalent of the main buffer in that case.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * In the current system design, the main buffer need never be a full-image
- * buffer; any full-height buffers will be found inside the coefficient or
- * postprocessing controllers.  Nonetheless, the main controller is not
- * trivial.  Its responsibility is to provide context rows for upsampling/
- * rescaling, and doing this in an efficient fashion is a bit tricky.
- *
- * Postprocessor input data is counted in "row groups".  A row group is
- * defined to be (v_samp_factor * DCT_v_scaled_size / min_DCT_v_scaled_size)
- * sample rows of each component.  (We require DCT_scaled_size values to be
- * chosen such that these numbers are integers.  In practice DCT_scaled_size
- * values will likely be powers of two, so we actually have the stronger
- * condition that DCT_scaled_size / min_DCT_scaled_size is an integer.)
- * Upsampling will typically produce max_v_samp_factor pixel rows from each
- * row group (times any additional scale factor that the upsampler is
- * applying).
- *
- * The coefficient controller will deliver data to us one iMCU row at a time;
- * each iMCU row contains v_samp_factor * DCT_v_scaled_size sample rows, or
- * exactly min_DCT_v_scaled_size row groups.  (This amount of data corresponds
- * to one row of MCUs when the image is fully interleaved.)  Note that the
- * number of sample rows varies across components, but the number of row
- * groups does not.  Some garbage sample rows may be included in the last iMCU
- * row at the bottom of the image.
- *
- * Depending on the vertical scaling algorithm used, the upsampler may need
- * access to the sample row(s) above and below its current input row group.
- * The upsampler is required to set need_context_rows TRUE at global selection
- * time if so.  When need_context_rows is FALSE, this controller can simply
- * obtain one iMCU row at a time from the coefficient controller and dole it
- * out as row groups to the postprocessor.
- *
- * When need_context_rows is TRUE, this controller guarantees that the buffer
- * passed to postprocessing contains at least one row group's worth of samples
- * above and below the row group(s) being processed.  Note that the context
- * rows "above" the first passed row group appear at negative row offsets in
- * the passed buffer.  At the top and bottom of the image, the required
- * context rows are manufactured by duplicating the first or last real sample
- * row; this avoids having special cases in the upsampling inner loops.
- *
- * The amount of context is fixed at one row group just because that's a
- * convenient number for this controller to work with.  The existing
- * upsamplers really only need one sample row of context.  An upsampler
- * supporting arbitrary output rescaling might wish for more than one row
- * group of context when shrinking the image; tough, we don't handle that.
- * (This is justified by the assumption that downsizing will be handled mostly
- * by adjusting the DCT_scaled_size values, so that the actual scale factor at
- * the upsample step needn't be much less than one.)
- *
- * To provide the desired context, we have to retain the last two row groups
- * of one iMCU row while reading in the next iMCU row.  (The last row group
- * can't be processed until we have another row group for its below-context,
- * and so we have to save the next-to-last group too for its above-context.)
- * We could do this most simply by copying data around in our buffer, but
- * that'd be very slow.  We can avoid copying any data by creating a rather
- * strange pointer structure.  Here's how it works.  We allocate a workspace
- * consisting of M+2 row groups (where M = min_DCT_v_scaled_size is the number
- * of row groups per iMCU row).  We create two sets of redundant pointers to
- * the workspace.  Labeling the physical row groups 0 to M+1, the synthesized
- * pointer lists look like this:
- *                   M+1                          M-1
- * master pointer --> 0         master pointer --> 0
- *                    1                            1
- *                   ...                          ...
- *                   M-3                          M-3
- *                   M-2                           M
- *                   M-1                          M+1
- *                    M                           M-2
- *                   M+1                          M-1
- *                    0                            0
- * We read alternate iMCU rows using each master pointer; thus the last two
- * row groups of the previous iMCU row remain un-overwritten in the workspace.
- * The pointer lists are set up so that the required context rows appear to
- * be adjacent to the proper places when we pass the pointer lists to the
- * upsampler.
- *
- * The above pictures describe the normal state of the pointer lists.
- * At top and bottom of the image, we diddle the pointer lists to duplicate
- * the first or last sample row as necessary (this is cheaper than copying
- * sample rows around).
- *
- * This scheme breaks down if M < 2, ie, min_DCT_v_scaled_size is 1.  In that
- * situation each iMCU row provides only one row group so the buffering logic
- * must be different (eg, we must read two iMCU rows before we can emit the
- * first row group).  For now, we simply do not support providing context
- * rows when min_DCT_v_scaled_size is 1.  That combination seems unlikely to
- * be worth providing --- if someone wants a 1/8th-size preview, they probably
- * want it quick and dirty, so a context-free upsampler is sufficient.
- */
-
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_d_main_controller pub; /* public fields */
-
-  /* Pointer to allocated workspace (M or M+2 row groups). */
-  JSAMPARRAY buffer[MAX_COMPONENTS];
-
-  JDIMENSION rowgroup_ctr;	/* counts row groups output to postprocessor */
-  JDIMENSION rowgroups_avail;	/* row groups available to postprocessor */
-
-  /* Remaining fields are only used in the context case. */
-
-  boolean buffer_full;		/* Have we gotten an iMCU row from decoder? */
-
-  /* These are the master pointers to the funny-order pointer lists. */
-  JSAMPIMAGE xbuffer[2];	/* pointers to weird pointer lists */
-
-  int whichptr;			/* indicates which pointer set is now in use */
-  int context_state;		/* process_data state machine status */
-  JDIMENSION iMCU_row_ctr;	/* counts iMCU rows to detect image top/bot */
-} my_main_controller;
-
-typedef my_main_controller * my_main_ptr;
-
-/* context_state values: */
-#define CTX_PREPARE_FOR_IMCU	0	/* need to prepare for MCU row */
-#define CTX_PROCESS_IMCU	1	/* feeding iMCU to postprocessor */
-#define CTX_POSTPONED_ROW	2	/* feeding postponed row group */
-
-
-/* Forward declarations */
-METHODDEF(void) process_data_simple_main
-	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
-METHODDEF(void) process_data_context_main
-	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
-#ifdef QUANT_2PASS_SUPPORTED
-METHODDEF(void) process_data_crank_post
-	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
-#endif
-
-
-LOCAL(void)
-alloc_funny_pointers (j_decompress_ptr cinfo)
-/* Allocate space for the funny pointer lists.
- * This is done only once, not once per pass.
- */
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-  int ci, rgroup;
-  int M = cinfo->min_DCT_v_scaled_size;
-  jpeg_component_info *compptr;
-  JSAMPARRAY xbuf;
-
-  /* Get top-level space for component array pointers.
-   * We alloc both arrays with one call to save a few cycles.
-   */
-  mainp->xbuffer[0] = (JSAMPIMAGE) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE,
-     cinfo->num_components * 2 * SIZEOF(JSAMPARRAY));
-  mainp->xbuffer[1] = mainp->xbuffer[0] + cinfo->num_components;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    if (! compptr->component_needed)
-      continue;			/* skip uninteresting component */
-    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
-    /* Get space for pointer lists --- M+4 row groups in each list.
-     * We alloc both pointer lists with one call to save a few cycles.
-     */
-    xbuf = (JSAMPARRAY) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo,
-      JPOOL_IMAGE, 2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW));
-    xbuf += rgroup;		/* want one row group at negative offsets */
-    mainp->xbuffer[0][ci] = xbuf;
-    xbuf += rgroup * (M + 4);
-    mainp->xbuffer[1][ci] = xbuf;
-  }
-}
-
-
-LOCAL(void)
-make_funny_pointers (j_decompress_ptr cinfo)
-/* Create the funny pointer lists discussed in the comments above.
- * The actual workspace is already allocated (in mainp->buffer),
- * and the space for the pointer lists is allocated too.
- * This routine just fills in the curiously ordered lists.
- * This will be repeated at the beginning of each pass.
- */
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-  int ci, i, rgroup;
-  int M = cinfo->min_DCT_v_scaled_size;
-  jpeg_component_info *compptr;
-  JSAMPARRAY buf, xbuf0, xbuf1;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    if (! compptr->component_needed)
-      continue;			/* skip uninteresting component */
-    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
-    xbuf0 = mainp->xbuffer[0][ci];
-    xbuf1 = mainp->xbuffer[1][ci];
-    /* First copy the workspace pointers as-is */
-    buf = mainp->buffer[ci];
-    for (i = 0; i < rgroup * (M + 2); i++) {
-      xbuf0[i] = xbuf1[i] = buf[i];
-    }
-    /* In the second list, put the last four row groups in swapped order */
-    for (i = 0; i < rgroup * 2; i++) {
-      xbuf1[rgroup*(M-2) + i] = buf[rgroup*M + i];
-      xbuf1[rgroup*M + i] = buf[rgroup*(M-2) + i];
-    }
-    /* The wraparound pointers at top and bottom will be filled later
-     * (see set_wraparound_pointers, below).  Initially we want the "above"
-     * pointers to duplicate the first actual data line.  This only needs
-     * to happen in xbuffer[0].
-     */
-    for (i = 0; i < rgroup; i++) {
-      xbuf0[i - rgroup] = xbuf0[0];
-    }
-  }
-}
-
-
-LOCAL(void)
-set_wraparound_pointers (j_decompress_ptr cinfo)
-/* Set up the "wraparound" pointers at top and bottom of the pointer lists.
- * This changes the pointer list state from top-of-image to the normal state.
- */
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-  int ci, i, rgroup;
-  int M = cinfo->min_DCT_v_scaled_size;
-  jpeg_component_info *compptr;
-  JSAMPARRAY xbuf0, xbuf1;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    if (! compptr->component_needed)
-      continue;			/* skip uninteresting component */
-    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
-    xbuf0 = mainp->xbuffer[0][ci];
-    xbuf1 = mainp->xbuffer[1][ci];
-    for (i = 0; i < rgroup; i++) {
-      xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i];
-      xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i];
-      xbuf0[rgroup*(M+2) + i] = xbuf0[i];
-      xbuf1[rgroup*(M+2) + i] = xbuf1[i];
-    }
-  }
-}
-
-
-LOCAL(void)
-set_bottom_pointers (j_decompress_ptr cinfo)
-/* Change the pointer lists to duplicate the last sample row at the bottom
- * of the image.  whichptr indicates which xbuffer holds the final iMCU row.
- * Also sets rowgroups_avail to indicate number of nondummy row groups in row.
- */
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-  int ci, i, rgroup, iMCUheight, rows_left;
-  jpeg_component_info *compptr;
-  JSAMPARRAY xbuf;
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    if (! compptr->component_needed)
-      continue;			/* skip uninteresting component */
-    /* Count sample rows in one iMCU row and in one row group */
-    iMCUheight = compptr->v_samp_factor * compptr->DCT_v_scaled_size;
-    rgroup = iMCUheight / cinfo->min_DCT_v_scaled_size;
-    /* Count nondummy sample rows remaining for this component */
-    rows_left = (int) (compptr->downsampled_height % (JDIMENSION) iMCUheight);
-    if (rows_left == 0) rows_left = iMCUheight;
-    /* Count nondummy row groups.  Should get same answer for each component,
-     * so we need only do it once.
-     */
-    if (ci == 0) {
-      mainp->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1);
-    }
-    /* Duplicate the last real sample row rgroup*2 times; this pads out the
-     * last partial rowgroup and ensures at least one full rowgroup of context.
-     */
-    xbuf = mainp->xbuffer[mainp->whichptr][ci];
-    for (i = 0; i < rgroup * 2; i++) {
-      xbuf[rows_left + i] = xbuf[rows_left-1];
-    }
-  }
-}
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-
-  switch (pass_mode) {
-  case JBUF_PASS_THRU:
-    if (cinfo->upsample->need_context_rows) {
-      mainp->pub.process_data = process_data_context_main;
-      make_funny_pointers(cinfo); /* Create the xbuffer[] lists */
-      mainp->whichptr = 0;	/* Read first iMCU row into xbuffer[0] */
-      mainp->context_state = CTX_PREPARE_FOR_IMCU;
-      mainp->iMCU_row_ctr = 0;
-      mainp->buffer_full = FALSE; /* Mark buffer empty */
-    } else {
-      /* Simple case with no context needed */
-      mainp->pub.process_data = process_data_simple_main;
-      mainp->rowgroup_ctr = mainp->rowgroups_avail; /* Mark buffer empty */
-    }
-    break;
-#ifdef QUANT_2PASS_SUPPORTED
-  case JBUF_CRANK_DEST:
-    /* For last pass of 2-pass quantization, just crank the postprocessor */
-    mainp->pub.process_data = process_data_crank_post;
-    break;
-#endif
-  default:
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-  }
-}
-
-
-/*
- * Process some data.
- * This handles the simple case where no context is required.
- */
-
-METHODDEF(void)
-process_data_simple_main (j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-			  JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-
-  /* Read input data if we haven't filled the main buffer yet */
-  if (mainp->rowgroup_ctr >= mainp->rowgroups_avail) {
-    if (! (*cinfo->coef->decompress_data) (cinfo, mainp->buffer))
-      return;			/* suspension forced, can do nothing more */
-    mainp->rowgroup_ctr = 0;	/* OK, we have an iMCU row to work with */
-  }
-
-  /* Note: at the bottom of the image, we may pass extra garbage row groups
-   * to the postprocessor.  The postprocessor has to check for bottom
-   * of image anyway (at row resolution), so no point in us doing it too.
-   */
-
-  /* Feed the postprocessor */
-  (*cinfo->post->post_process_data) (cinfo, mainp->buffer,
-			&mainp->rowgroup_ctr, mainp->rowgroups_avail,
-			output_buf, out_row_ctr, out_rows_avail);
-}
-
-
-/*
- * Process some data.
- * This handles the case where context rows must be provided.
- */
-
-METHODDEF(void)
-process_data_context_main (j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-			   JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
-{
-  my_main_ptr mainp = (my_main_ptr) cinfo->main;
-
-  /* Read input data if we haven't filled the main buffer yet */
-  if (! mainp->buffer_full) {
-    if (! (*cinfo->coef->decompress_data) (cinfo,
-					   mainp->xbuffer[mainp->whichptr]))
-      return;			/* suspension forced, can do nothing more */
-    mainp->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
-    mainp->iMCU_row_ctr++;	/* count rows received */
-  }
-
-  /* Postprocessor typically will not swallow all the input data it is handed
-   * in one call (due to filling the output buffer first).  Must be prepared
-   * to exit and restart.  This switch lets us keep track of how far we got.
-   * Note that each case falls through to the next on successful completion.
-   */
-  switch (mainp->context_state) {
-  case CTX_POSTPONED_ROW:
-    /* Call postprocessor using previously set pointers for postponed row */
-    (*cinfo->post->post_process_data) (cinfo, mainp->xbuffer[mainp->whichptr],
-			&mainp->rowgroup_ctr, mainp->rowgroups_avail,
-			output_buf, out_row_ctr, out_rows_avail);
-    if (mainp->rowgroup_ctr < mainp->rowgroups_avail)
-      return;			/* Need to suspend */
-    mainp->context_state = CTX_PREPARE_FOR_IMCU;
-    if (*out_row_ctr >= out_rows_avail)
-      return;			/* Postprocessor exactly filled output buf */
-    /*FALLTHROUGH*/
-  case CTX_PREPARE_FOR_IMCU:
-    /* Prepare to process first M-1 row groups of this iMCU row */
-    mainp->rowgroup_ctr = 0;
-    mainp->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_v_scaled_size - 1);
-    /* Check for bottom of image: if so, tweak pointers to "duplicate"
-     * the last sample row, and adjust rowgroups_avail to ignore padding rows.
-     */
-    if (mainp->iMCU_row_ctr == cinfo->total_iMCU_rows)
-      set_bottom_pointers(cinfo);
-    mainp->context_state = CTX_PROCESS_IMCU;
-    /*FALLTHROUGH*/
-  case CTX_PROCESS_IMCU:
-    /* Call postprocessor using previously set pointers */
-    (*cinfo->post->post_process_data) (cinfo, mainp->xbuffer[mainp->whichptr],
-			&mainp->rowgroup_ctr, mainp->rowgroups_avail,
-			output_buf, out_row_ctr, out_rows_avail);
-    if (mainp->rowgroup_ctr < mainp->rowgroups_avail)
-      return;			/* Need to suspend */
-    /* After the first iMCU, change wraparound pointers to normal state */
-    if (mainp->iMCU_row_ctr == 1)
-      set_wraparound_pointers(cinfo);
-    /* Prepare to load new iMCU row using other xbuffer list */
-    mainp->whichptr ^= 1;	/* 0=>1 or 1=>0 */
-    mainp->buffer_full = FALSE;
-    /* Still need to process last row group of this iMCU row, */
-    /* which is saved at index M+1 of the other xbuffer */
-    mainp->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_v_scaled_size + 1);
-    mainp->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_v_scaled_size + 2);
-    mainp->context_state = CTX_POSTPONED_ROW;
-  }
-}
-
-
-/*
- * Process some data.
- * Final pass of two-pass quantization: just call the postprocessor.
- * Source data will be the postprocessor controller's internal buffer.
- */
-
-#ifdef QUANT_2PASS_SUPPORTED
-
-METHODDEF(void)
-process_data_crank_post (j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-			 JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
-{
-  (*cinfo->post->post_process_data) (cinfo, (JSAMPIMAGE) NULL,
-			(JDIMENSION *) NULL, (JDIMENSION) 0,
-			output_buf, out_row_ctr, out_rows_avail);
-}
-
-#endif /* QUANT_2PASS_SUPPORTED */
-
-
-/*
- * Initialize main buffer controller.
- */
-
-GLOBAL(void)
-jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
-{
-  my_main_ptr mainp;
-  int ci, rgroup, ngroups;
-  jpeg_component_info *compptr;
-
-  mainp = (my_main_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_main_controller));
-  cinfo->main = &mainp->pub;
-  mainp->pub.start_pass = start_pass_main;
-
-  if (need_full_buffer)		/* shouldn't happen */
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-
-  /* Allocate the workspace.
-   * ngroups is the number of row groups we need.
-   */
-  if (cinfo->upsample->need_context_rows) {
-    if (cinfo->min_DCT_v_scaled_size < 2) /* unsupported, see comments above */
-      ERREXIT(cinfo, JERR_NOTIMPL);
-    alloc_funny_pointers(cinfo); /* Alloc space for xbuffer[] lists */
-    ngroups = cinfo->min_DCT_v_scaled_size + 2;
-  } else {
-    /* There are always min_DCT_v_scaled_size row groups in an iMCU row. */
-    ngroups = cinfo->min_DCT_v_scaled_size;
-    mainp->rowgroups_avail = (JDIMENSION) ngroups;
-  }
-
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    if (! compptr->component_needed)
-      continue;			/* skip uninteresting component */
-    rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-      cinfo->min_DCT_v_scaled_size; /* height of a row group of component */
-    mainp->buffer[ci] = (*cinfo->mem->alloc_sarray)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       compptr->width_in_blocks * ((JDIMENSION) compptr->DCT_h_scaled_size),
-       (JDIMENSION) (rgroup * ngroups));
-  }
-}
diff --git a/cmake/externals/libjpeg/jdmarker.c b/cmake/externals/libjpeg/jdmarker.c
deleted file mode 100644
index c10fde605..000000000
--- a/cmake/externals/libjpeg/jdmarker.c
+++ /dev/null
@@ -1,1505 +0,0 @@
-/*
- * jdmarker.c
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2009-2019 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains routines to decode JPEG datastream markers.
- * Most of the complexity arises from our desire to support input
- * suspension: if not all of the data for a marker is available,
- * we must exit back to the application.  On resumption, we reprocess
- * the marker.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-typedef enum {			/* JPEG marker codes */
-  M_SOF0  = 0xc0,
-  M_SOF1  = 0xc1,
-  M_SOF2  = 0xc2,
-  M_SOF3  = 0xc3,
-
-  M_SOF5  = 0xc5,
-  M_SOF6  = 0xc6,
-  M_SOF7  = 0xc7,
-
-  M_JPG   = 0xc8,
-  M_SOF9  = 0xc9,
-  M_SOF10 = 0xca,
-  M_SOF11 = 0xcb,
-
-  M_SOF13 = 0xcd,
-  M_SOF14 = 0xce,
-  M_SOF15 = 0xcf,
-
-  M_DHT   = 0xc4,
-
-  M_DAC   = 0xcc,
-
-  M_RST0  = 0xd0,
-  M_RST1  = 0xd1,
-  M_RST2  = 0xd2,
-  M_RST3  = 0xd3,
-  M_RST4  = 0xd4,
-  M_RST5  = 0xd5,
-  M_RST6  = 0xd6,
-  M_RST7  = 0xd7,
-
-  M_SOI   = 0xd8,
-  M_EOI   = 0xd9,
-  M_SOS   = 0xda,
-  M_DQT   = 0xdb,
-  M_DNL   = 0xdc,
-  M_DRI   = 0xdd,
-  M_DHP   = 0xde,
-  M_EXP   = 0xdf,
-
-  M_APP0  = 0xe0,
-  M_APP1  = 0xe1,
-  M_APP2  = 0xe2,
-  M_APP3  = 0xe3,
-  M_APP4  = 0xe4,
-  M_APP5  = 0xe5,
-  M_APP6  = 0xe6,
-  M_APP7  = 0xe7,
-  M_APP8  = 0xe8,
-  M_APP9  = 0xe9,
-  M_APP10 = 0xea,
-  M_APP11 = 0xeb,
-  M_APP12 = 0xec,
-  M_APP13 = 0xed,
-  M_APP14 = 0xee,
-  M_APP15 = 0xef,
-
-  M_JPG0  = 0xf0,
-  M_JPG8  = 0xf8,
-  M_JPG13 = 0xfd,
-  M_COM   = 0xfe,
-
-  M_TEM   = 0x01,
-
-  M_ERROR = 0x100
-} JPEG_MARKER;
-
-
-/* Private state */
-
-typedef struct {
-  struct jpeg_marker_reader pub; /* public fields */
-
-  /* Application-overridable marker processing methods */
-  jpeg_marker_parser_method process_COM;
-  jpeg_marker_parser_method process_APPn[16];
-
-  /* Limit on marker data length to save for each marker type */
-  unsigned int length_limit_COM;
-  unsigned int length_limit_APPn[16];
-
-  /* Status of COM/APPn marker saving */
-  jpeg_saved_marker_ptr cur_marker;	/* NULL if not processing a marker */
-  unsigned int bytes_read;		/* data bytes read so far in marker */
-  /* Note: cur_marker is not linked into marker_list until it's all read. */
-} my_marker_reader;
-
-typedef my_marker_reader * my_marker_ptr;
-
-
-/*
- * Macros for fetching data from the data source module.
- *
- * At all times, cinfo->src->next_input_byte and ->bytes_in_buffer reflect
- * the current restart point; we update them only when we have reached a
- * suitable place to restart if a suspension occurs.
- */
-
-/* Declare and initialize local copies of input pointer/count */
-#define INPUT_VARS(cinfo)  \
-	struct jpeg_source_mgr * datasrc = (cinfo)->src;  \
-	const JOCTET * next_input_byte = datasrc->next_input_byte;  \
-	size_t bytes_in_buffer = datasrc->bytes_in_buffer
-
-/* Unload the local copies --- do this only at a restart boundary */
-#define INPUT_SYNC(cinfo)  \
-	( datasrc->next_input_byte = next_input_byte,  \
-	  datasrc->bytes_in_buffer = bytes_in_buffer )
-
-/* Reload the local copies --- used only in MAKE_BYTE_AVAIL */
-#define INPUT_RELOAD(cinfo)  \
-	( next_input_byte = datasrc->next_input_byte,  \
-	  bytes_in_buffer = datasrc->bytes_in_buffer )
-
-/* Internal macro for INPUT_BYTE and INPUT_2BYTES: make a byte available.
- * Note we do *not* do INPUT_SYNC before calling fill_input_buffer,
- * but we must reload the local copies after a successful fill.
- */
-#define MAKE_BYTE_AVAIL(cinfo,action)  \
-	if (bytes_in_buffer == 0) {  \
-	  if (! (*datasrc->fill_input_buffer) (cinfo))  \
-	    { action; }  \
-	  INPUT_RELOAD(cinfo);  \
-	}
-
-/* Read a byte into variable V.
- * If must suspend, take the specified action (typically "return FALSE").
- */
-#define INPUT_BYTE(cinfo,V,action)  \
-	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
-		  bytes_in_buffer--; \
-		  V = GETJOCTET(*next_input_byte++); )
-
-/* As above, but read two bytes interpreted as an unsigned 16-bit integer.
- * V should be declared unsigned int or perhaps INT32.
- */
-#define INPUT_2BYTES(cinfo,V,action)  \
-	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
-		  bytes_in_buffer--; \
-		  V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \
-		  MAKE_BYTE_AVAIL(cinfo,action); \
-		  bytes_in_buffer--; \
-		  V += GETJOCTET(*next_input_byte++); )
-
-
-/*
- * Routines to process JPEG markers.
- *
- * Entry condition: JPEG marker itself has been read and its code saved
- *   in cinfo->unread_marker; input restart point is just after the marker.
- *
- * Exit: if return TRUE, have read and processed any parameters, and have
- *   updated the restart point to point after the parameters.
- *   If return FALSE, was forced to suspend before reaching end of
- *   marker parameters; restart point has not been moved.  Same routine
- *   will be called again after application supplies more input data.
- *
- * This approach to suspension assumes that all of a marker's parameters
- * can fit into a single input bufferload.  This should hold for "normal"
- * markers.  Some COM/APPn markers might have large parameter segments
- * that might not fit.  If we are simply dropping such a marker, we use
- * skip_input_data to get past it, and thereby put the problem on the
- * source manager's shoulders.  If we are saving the marker's contents
- * into memory, we use a slightly different convention: when forced to
- * suspend, the marker processor updates the restart point to the end of
- * what it's consumed (ie, the end of the buffer) before returning FALSE.
- * On resumption, cinfo->unread_marker still contains the marker code,
- * but the data source will point to the next chunk of marker data.
- * The marker processor must retain internal state to deal with this.
- *
- * Note that we don't bother to avoid duplicate trace messages if a
- * suspension occurs within marker parameters.  Other side effects
- * require more care.
- */
-
-
-LOCAL(boolean)
-get_soi (j_decompress_ptr cinfo)
-/* Process an SOI marker */
-{
-  int i;
-  
-  TRACEMS(cinfo, 1, JTRC_SOI);
-
-  if (cinfo->marker->saw_SOI)
-    ERREXIT(cinfo, JERR_SOI_DUPLICATE);
-
-  /* Reset all parameters that are defined to be reset by SOI */
-
-  for (i = 0; i < NUM_ARITH_TBLS; i++) {
-    cinfo->arith_dc_L[i] = 0;
-    cinfo->arith_dc_U[i] = 1;
-    cinfo->arith_ac_K[i] = 5;
-  }
-  cinfo->restart_interval = 0;
-
-  /* Set initial assumptions for colorspace etc */
-
-  cinfo->jpeg_color_space = JCS_UNKNOWN;
-  cinfo->color_transform = JCT_NONE;
-  cinfo->CCIR601_sampling = FALSE; /* Assume non-CCIR sampling??? */
-
-  cinfo->saw_JFIF_marker = FALSE;
-  cinfo->JFIF_major_version = 1; /* set default JFIF APP0 values */
-  cinfo->JFIF_minor_version = 1;
-  cinfo->density_unit = 0;
-  cinfo->X_density = 1;
-  cinfo->Y_density = 1;
-  cinfo->saw_Adobe_marker = FALSE;
-  cinfo->Adobe_transform = 0;
-
-  cinfo->marker->saw_SOI = TRUE;
-
-  return TRUE;
-}
-
-
-LOCAL(boolean)
-get_sof (j_decompress_ptr cinfo, boolean is_baseline, boolean is_prog,
-	 boolean is_arith)
-/* Process a SOFn marker */
-{
-  INT32 length;
-  int c, ci, i;
-  jpeg_component_info * compptr;
-  INPUT_VARS(cinfo);
-
-  cinfo->is_baseline = is_baseline;
-  cinfo->progressive_mode = is_prog;
-  cinfo->arith_code = is_arith;
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-
-  INPUT_BYTE(cinfo, cinfo->data_precision, return FALSE);
-  INPUT_2BYTES(cinfo, cinfo->image_height, return FALSE);
-  INPUT_2BYTES(cinfo, cinfo->image_width, return FALSE);
-  INPUT_BYTE(cinfo, cinfo->num_components, return FALSE);
-
-  length -= 8;
-
-  TRACEMS4(cinfo, 1, JTRC_SOF, cinfo->unread_marker,
-	   (int) cinfo->image_width, (int) cinfo->image_height,
-	   cinfo->num_components);
-
-  if (cinfo->marker->saw_SOF)
-    ERREXIT(cinfo, JERR_SOF_DUPLICATE);
-
-  /* We don't support files in which the image height is initially specified */
-  /* as 0 and is later redefined by DNL.  As long as we have to check that,  */
-  /* might as well have a general sanity check. */
-  if (cinfo->image_height <= 0 || cinfo->image_width <= 0 ||
-      cinfo->num_components <= 0)
-    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
-
-  if (length != (cinfo->num_components * 3))
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  if (cinfo->comp_info == NULL)	/* do only once, even if suspend */
-    cinfo->comp_info = (jpeg_component_info *) (*cinfo->mem->alloc_small)
-			((j_common_ptr) cinfo, JPOOL_IMAGE,
-			 cinfo->num_components * SIZEOF(jpeg_component_info));
-
-  for (ci = 0; ci < cinfo->num_components; ci++) {
-    INPUT_BYTE(cinfo, c, return FALSE);
-    /* Check to see whether component id has already been seen   */
-    /* (in violation of the spec, but unfortunately seen in some */
-    /* files).  If so, create "fake" component id equal to the   */
-    /* max id seen so far + 1. */
-    for (i = 0, compptr = cinfo->comp_info; i < ci; i++, compptr++) {
-      if (c == compptr->component_id) {
-	compptr = cinfo->comp_info;
-	c = compptr->component_id;
-	compptr++;
-	for (i = 1; i < ci; i++, compptr++) {
-	  if (compptr->component_id > c) c = compptr->component_id;
-	}
-	c++;
-	break;
-      }
-    }
-    compptr->component_id = c;
-    compptr->component_index = ci;
-    INPUT_BYTE(cinfo, c, return FALSE);
-    compptr->h_samp_factor = (c >> 4) & 15;
-    compptr->v_samp_factor = (c     ) & 15;
-    INPUT_BYTE(cinfo, compptr->quant_tbl_no, return FALSE);
-
-    TRACEMS4(cinfo, 1, JTRC_SOF_COMPONENT,
-	     compptr->component_id, compptr->h_samp_factor,
-	     compptr->v_samp_factor, compptr->quant_tbl_no);
-  }
-
-  cinfo->marker->saw_SOF = TRUE;
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-LOCAL(boolean)
-get_sos (j_decompress_ptr cinfo)
-/* Process a SOS marker */
-{
-  INT32 length;
-  int c, ci, i, n;
-  jpeg_component_info * compptr;
-  INPUT_VARS(cinfo);
-
-  if (! cinfo->marker->saw_SOF)
-    ERREXITS(cinfo, JERR_SOF_BEFORE, "SOS");
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-
-  INPUT_BYTE(cinfo, n, return FALSE); /* Number of components */
-
-  TRACEMS1(cinfo, 1, JTRC_SOS, n);
-
-  if (length != (n * 2 + 6) || n > MAX_COMPS_IN_SCAN ||
-      (n == 0 && !cinfo->progressive_mode))
-      /* pseudo SOS marker only allowed in progressive mode */
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  cinfo->comps_in_scan = n;
-
-  /* Collect the component-spec parameters */
-
-  for (i = 0; i < n; i++) {
-    INPUT_BYTE(cinfo, c, return FALSE);
-
-    /* Detect the case where component id's are not unique, and, if so, */
-    /* create a fake component id using the same logic as in get_sof.   */
-    /* Note:  This also ensures that all of the SOF components are      */
-    /* referenced in the single scan case, which prevents access to     */
-    /* uninitialized memory in later decoding stages. */
-    for (ci = 0; ci < i; ci++) {
-      if (c == cinfo->cur_comp_info[ci]->component_id) {
-	c = cinfo->cur_comp_info[0]->component_id;
-	for (ci = 1; ci < i; ci++) {
-	  compptr = cinfo->cur_comp_info[ci];
-	  if (compptr->component_id > c) c = compptr->component_id;
-	}
-	c++;
-	break;
-      }
-    }
-
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      if (c == compptr->component_id)
-	goto id_found;
-    }
-
-    ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, c);
-
-  id_found:
-
-    cinfo->cur_comp_info[i] = compptr;
-    INPUT_BYTE(cinfo, c, return FALSE);
-    compptr->dc_tbl_no = (c >> 4) & 15;
-    compptr->ac_tbl_no = (c     ) & 15;
-
-    TRACEMS3(cinfo, 1, JTRC_SOS_COMPONENT, compptr->component_id,
-	     compptr->dc_tbl_no, compptr->ac_tbl_no);
-  }
-
-  /* Collect the additional scan parameters Ss, Se, Ah/Al. */
-  INPUT_BYTE(cinfo, c, return FALSE);
-  cinfo->Ss = c;
-  INPUT_BYTE(cinfo, c, return FALSE);
-  cinfo->Se = c;
-  INPUT_BYTE(cinfo, c, return FALSE);
-  cinfo->Ah = (c >> 4) & 15;
-  cinfo->Al = (c     ) & 15;
-
-  TRACEMS4(cinfo, 1, JTRC_SOS_PARAMS, cinfo->Ss, cinfo->Se,
-	   cinfo->Ah, cinfo->Al);
-
-  /* Prepare to scan data & restart markers */
-  cinfo->marker->next_restart_num = 0;
-
-  /* Count another (non-pseudo) SOS marker */
-  if (n) cinfo->input_scan_number++;
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-#ifdef D_ARITH_CODING_SUPPORTED
-
-LOCAL(boolean)
-get_dac (j_decompress_ptr cinfo)
-/* Process a DAC marker */
-{
-  INT32 length;
-  int index, val;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  length -= 2;
-  
-  while (length > 0) {
-    INPUT_BYTE(cinfo, index, return FALSE);
-    INPUT_BYTE(cinfo, val, return FALSE);
-
-    length -= 2;
-
-    TRACEMS2(cinfo, 1, JTRC_DAC, index, val);
-
-    if (index < 0 || index >= (2*NUM_ARITH_TBLS))
-      ERREXIT1(cinfo, JERR_DAC_INDEX, index);
-
-    if (index >= NUM_ARITH_TBLS) { /* define AC table */
-      cinfo->arith_ac_K[index-NUM_ARITH_TBLS] = (UINT8) val;
-    } else {			/* define DC table */
-      cinfo->arith_dc_L[index] = (UINT8) (val & 0x0F);
-      cinfo->arith_dc_U[index] = (UINT8) (val >> 4);
-      if (cinfo->arith_dc_L[index] > cinfo->arith_dc_U[index])
-	ERREXIT1(cinfo, JERR_DAC_VALUE, val);
-    }
-  }
-
-  if (length != 0)
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-#else /* ! D_ARITH_CODING_SUPPORTED */
-
-#define get_dac(cinfo)  skip_variable(cinfo)
-
-#endif /* D_ARITH_CODING_SUPPORTED */
-
-
-LOCAL(boolean)
-get_dht (j_decompress_ptr cinfo)
-/* Process a DHT marker */
-{
-  INT32 length;
-  UINT8 bits[17];
-  UINT8 huffval[256];
-  int i, index, count;
-  JHUFF_TBL **htblptr;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  length -= 2;
-  
-  while (length > 16) {
-    INPUT_BYTE(cinfo, index, return FALSE);
-
-    TRACEMS1(cinfo, 1, JTRC_DHT, index);
-      
-    bits[0] = 0;
-    count = 0;
-    for (i = 1; i <= 16; i++) {
-      INPUT_BYTE(cinfo, bits[i], return FALSE);
-      count += bits[i];
-    }
-
-    length -= 1 + 16;
-
-    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
-	     bits[1], bits[2], bits[3], bits[4],
-	     bits[5], bits[6], bits[7], bits[8]);
-    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
-	     bits[9], bits[10], bits[11], bits[12],
-	     bits[13], bits[14], bits[15], bits[16]);
-
-    /* Here we just do minimal validation of the counts to avoid walking
-     * off the end of our table space.  jdhuff.c will check more carefully.
-     */
-    if (count > 256 || ((INT32) count) > length)
-      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-
-    for (i = 0; i < count; i++)
-      INPUT_BYTE(cinfo, huffval[i], return FALSE);
-
-    length -= count;
-
-    if (index & 0x10) {		/* AC table definition */
-      index -= 0x10;
-      htblptr = &cinfo->ac_huff_tbl_ptrs[index];
-    } else {			/* DC table definition */
-      htblptr = &cinfo->dc_huff_tbl_ptrs[index];
-    }
-
-    if (index < 0 || index >= NUM_HUFF_TBLS)
-      ERREXIT1(cinfo, JERR_DHT_INDEX, index);
-
-    if (*htblptr == NULL)
-      *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
-  
-    MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
-    if (count > 0)
-      MEMCOPY((*htblptr)->huffval, huffval, count * SIZEOF(UINT8));
-  }
-
-  if (length != 0)
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-LOCAL(boolean)
-get_dqt (j_decompress_ptr cinfo)
-/* Process a DQT marker */
-{
-  INT32 length, count, i;
-  int n, prec;
-  unsigned int tmp;
-  JQUANT_TBL *quant_ptr;
-  const int *natural_order;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  length -= 2;
-
-  while (length > 0) {
-    length--;
-    INPUT_BYTE(cinfo, n, return FALSE);
-    prec = n >> 4;
-    n &= 0x0F;
-
-    TRACEMS2(cinfo, 1, JTRC_DQT, n, prec);
-
-    if (n >= NUM_QUANT_TBLS)
-      ERREXIT1(cinfo, JERR_DQT_INDEX, n);
-      
-    if (cinfo->quant_tbl_ptrs[n] == NULL)
-      cinfo->quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) cinfo);
-    quant_ptr = cinfo->quant_tbl_ptrs[n];
-
-    if (prec) {
-      if (length < DCTSIZE2 * 2) {
-	/* Initialize full table for safety. */
-	for (i = 0; i < DCTSIZE2; i++) {
-	  quant_ptr->quantval[i] = 1;
-	}
-	count = length >> 1;
-      } else
-	count = DCTSIZE2;
-    } else {
-      if (length < DCTSIZE2) {
-	/* Initialize full table for safety. */
-	for (i = 0; i < DCTSIZE2; i++) {
-	  quant_ptr->quantval[i] = 1;
-	}
-	count = length;
-      } else
-	count = DCTSIZE2;
-    }
-
-    switch ((int) count) {
-    case (2*2): natural_order = jpeg_natural_order2; break;
-    case (3*3): natural_order = jpeg_natural_order3; break;
-    case (4*4): natural_order = jpeg_natural_order4; break;
-    case (5*5): natural_order = jpeg_natural_order5; break;
-    case (6*6): natural_order = jpeg_natural_order6; break;
-    case (7*7): natural_order = jpeg_natural_order7; break;
-    default:    natural_order = jpeg_natural_order;
-    }
-
-    for (i = 0; i < count; i++) {
-      if (prec)
-	INPUT_2BYTES(cinfo, tmp, return FALSE);
-      else
-	INPUT_BYTE(cinfo, tmp, return FALSE);
-      /* We convert the zigzag-order table to natural array order. */
-      quant_ptr->quantval[natural_order[i]] = (UINT16) tmp;
-    }
-
-    if (cinfo->err->trace_level >= 2) {
-      for (i = 0; i < DCTSIZE2; i += 8) {
-	TRACEMS8(cinfo, 2, JTRC_QUANTVALS,
-		 quant_ptr->quantval[i],   quant_ptr->quantval[i+1],
-		 quant_ptr->quantval[i+2], quant_ptr->quantval[i+3],
-		 quant_ptr->quantval[i+4], quant_ptr->quantval[i+5],
-		 quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]);
-      }
-    }
-
-    length -= count;
-    if (prec) length -= count;
-  }
-
-  if (length != 0)
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-LOCAL(boolean)
-get_dri (j_decompress_ptr cinfo)
-/* Process a DRI marker */
-{
-  INT32 length;
-  unsigned int tmp;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  
-  if (length != 4)
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  INPUT_2BYTES(cinfo, tmp, return FALSE);
-
-  TRACEMS1(cinfo, 1, JTRC_DRI, tmp);
-
-  cinfo->restart_interval = tmp;
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-LOCAL(boolean)
-get_lse (j_decompress_ptr cinfo)
-/* Process an LSE marker */
-{
-  INT32 length;
-  unsigned int tmp;
-  int cid;
-  INPUT_VARS(cinfo);
-
-  if (! cinfo->marker->saw_SOF)
-    ERREXITS(cinfo, JERR_SOF_BEFORE, "LSE");
-
-  if (cinfo->num_components < 3) goto bad;
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-
-  if (length != 24)
-    ERREXIT(cinfo, JERR_BAD_LENGTH);
-
-  INPUT_BYTE(cinfo, tmp, return FALSE);
-  if (tmp != 0x0D)	/* ID inverse transform specification */
-    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
-  INPUT_2BYTES(cinfo, tmp, return FALSE);
-  if (tmp != MAXJSAMPLE) goto bad;		/* MAXTRANS */
-  INPUT_BYTE(cinfo, tmp, return FALSE);
-  if (tmp != 3) goto bad;			/* Nt=3 */
-  INPUT_BYTE(cinfo, cid, return FALSE);
-  if (cid != cinfo->comp_info[1].component_id) goto bad;
-  INPUT_BYTE(cinfo, cid, return FALSE);
-  if (cid != cinfo->comp_info[0].component_id) goto bad;
-  INPUT_BYTE(cinfo, cid, return FALSE);
-  if (cid != cinfo->comp_info[2].component_id) goto bad;
-  INPUT_BYTE(cinfo, tmp, return FALSE);
-  if (tmp != 0x80) goto bad;		/* F1: CENTER1=1, NORM1=0 */
-  INPUT_2BYTES(cinfo, tmp, return FALSE);
-  if (tmp != 0) goto bad;			/* A(1,1)=0 */
-  INPUT_2BYTES(cinfo, tmp, return FALSE);
-  if (tmp != 0) goto bad;			/* A(1,2)=0 */
-  INPUT_BYTE(cinfo, tmp, return FALSE);
-  if (tmp != 0) goto bad;		/* F2: CENTER2=0, NORM2=0 */
-  INPUT_2BYTES(cinfo, tmp, return FALSE);
-  if (tmp != 1) goto bad;			/* A(2,1)=1 */
-  INPUT_2BYTES(cinfo, tmp, return FALSE);
-  if (tmp != 0) goto bad;			/* A(2,2)=0 */
-  INPUT_BYTE(cinfo, tmp, return FALSE);
-  if (tmp != 0) goto bad;		/* F3: CENTER3=0, NORM3=0 */
-  INPUT_2BYTES(cinfo, tmp, return FALSE);
-  if (tmp != 1) goto bad;			/* A(3,1)=1 */
-  INPUT_2BYTES(cinfo, tmp, return FALSE);
-  if (tmp != 0) {				/* A(3,2)=0 */
-    bad:
-    ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-  }
-
-  /* OK, valid transform that we can handle. */
-  cinfo->color_transform = JCT_SUBTRACT_GREEN;
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-/*
- * Routines for processing APPn and COM markers.
- * These are either saved in memory or discarded, per application request.
- * APP0 and APP14 are specially checked to see if they are
- * JFIF and Adobe markers, respectively.
- */
-
-#define APP0_DATA_LEN	14	/* Length of interesting data in APP0 */
-#define APP14_DATA_LEN	12	/* Length of interesting data in APP14 */
-#define APPN_DATA_LEN	14	/* Must be the largest of the above!! */
-
-
-LOCAL(void)
-examine_app0 (j_decompress_ptr cinfo, JOCTET FAR * data,
-	      unsigned int datalen, INT32 remaining)
-/* Examine first few bytes from an APP0.
- * Take appropriate action if it is a JFIF marker.
- * datalen is # of bytes at data[], remaining is length of rest of marker data.
- */
-{
-  INT32 totallen = (INT32) datalen + remaining;
-
-  if (datalen >= APP0_DATA_LEN &&
-      GETJOCTET(data[0]) == 0x4A &&
-      GETJOCTET(data[1]) == 0x46 &&
-      GETJOCTET(data[2]) == 0x49 &&
-      GETJOCTET(data[3]) == 0x46 &&
-      GETJOCTET(data[4]) == 0) {
-    /* Found JFIF APP0 marker: save info */
-    cinfo->saw_JFIF_marker = TRUE;
-    cinfo->JFIF_major_version = GETJOCTET(data[5]);
-    cinfo->JFIF_minor_version = GETJOCTET(data[6]);
-    cinfo->density_unit = GETJOCTET(data[7]);
-    cinfo->X_density = (GETJOCTET(data[8]) << 8) + GETJOCTET(data[9]);
-    cinfo->Y_density = (GETJOCTET(data[10]) << 8) + GETJOCTET(data[11]);
-    /* Check version.
-     * Major version must be 1 or 2, anything else signals an incompatible
-     * change.
-     * (We used to treat this as an error, but now it's a nonfatal warning,
-     * because some bozo at Hijaak couldn't read the spec.)
-     * Minor version should be 0..2, but process anyway if newer.
-     */
-    if (cinfo->JFIF_major_version != 1 && cinfo->JFIF_major_version != 2)
-      WARNMS2(cinfo, JWRN_JFIF_MAJOR,
-	      cinfo->JFIF_major_version, cinfo->JFIF_minor_version);
-    /* Generate trace messages */
-    TRACEMS5(cinfo, 1, JTRC_JFIF,
-	     cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
-	     cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
-    /* Validate thumbnail dimensions and issue appropriate messages */
-    if (GETJOCTET(data[12]) | GETJOCTET(data[13]))
-      TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL,
-	       GETJOCTET(data[12]), GETJOCTET(data[13]));
-    totallen -= APP0_DATA_LEN;
-    if (totallen !=
-	((INT32)GETJOCTET(data[12]) * (INT32)GETJOCTET(data[13]) * (INT32) 3))
-      TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int) totallen);
-  } else if (datalen >= 6 &&
-      GETJOCTET(data[0]) == 0x4A &&
-      GETJOCTET(data[1]) == 0x46 &&
-      GETJOCTET(data[2]) == 0x58 &&
-      GETJOCTET(data[3]) == 0x58 &&
-      GETJOCTET(data[4]) == 0) {
-    /* Found JFIF "JFXX" extension APP0 marker */
-    /* The library doesn't actually do anything with these,
-     * but we try to produce a helpful trace message.
-     */
-    switch (GETJOCTET(data[5])) {
-    case 0x10:
-      TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int) totallen);
-      break;
-    case 0x11:
-      TRACEMS1(cinfo, 1, JTRC_THUMB_PALETTE, (int) totallen);
-      break;
-    case 0x13:
-      TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int) totallen);
-      break;
-    default:
-      TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION,
-	       GETJOCTET(data[5]), (int) totallen);
-    }
-  } else {
-    /* Start of APP0 does not match "JFIF" or "JFXX", or too short */
-    TRACEMS1(cinfo, 1, JTRC_APP0, (int) totallen);
-  }
-}
-
-
-LOCAL(void)
-examine_app14 (j_decompress_ptr cinfo, JOCTET FAR * data,
-	       unsigned int datalen, INT32 remaining)
-/* Examine first few bytes from an APP14.
- * Take appropriate action if it is an Adobe marker.
- * datalen is # of bytes at data[], remaining is length of rest of marker data.
- */
-{
-  unsigned int version, flags0, flags1, transform;
-
-  if (datalen >= APP14_DATA_LEN &&
-      GETJOCTET(data[0]) == 0x41 &&
-      GETJOCTET(data[1]) == 0x64 &&
-      GETJOCTET(data[2]) == 0x6F &&
-      GETJOCTET(data[3]) == 0x62 &&
-      GETJOCTET(data[4]) == 0x65) {
-    /* Found Adobe APP14 marker */
-    version = (GETJOCTET(data[5]) << 8) + GETJOCTET(data[6]);
-    flags0 = (GETJOCTET(data[7]) << 8) + GETJOCTET(data[8]);
-    flags1 = (GETJOCTET(data[9]) << 8) + GETJOCTET(data[10]);
-    transform = GETJOCTET(data[11]);
-    TRACEMS4(cinfo, 1, JTRC_ADOBE, version, flags0, flags1, transform);
-    cinfo->saw_Adobe_marker = TRUE;
-    cinfo->Adobe_transform = (UINT8) transform;
-  } else {
-    /* Start of APP14 does not match "Adobe", or too short */
-    TRACEMS1(cinfo, 1, JTRC_APP14, (int) (datalen + remaining));
-  }
-}
-
-
-METHODDEF(boolean)
-get_interesting_appn (j_decompress_ptr cinfo)
-/* Process an APP0 or APP14 marker without saving it */
-{
-  INT32 length;
-  JOCTET b[APPN_DATA_LEN];
-  unsigned int i, numtoread;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  length -= 2;
-
-  /* get the interesting part of the marker data */
-  if (length >= APPN_DATA_LEN)
-    numtoread = APPN_DATA_LEN;
-  else if (length > 0)
-    numtoread = (unsigned int) length;
-  else
-    numtoread = 0;
-  for (i = 0; i < numtoread; i++)
-    INPUT_BYTE(cinfo, b[i], return FALSE);
-  length -= numtoread;
-
-  /* process it */
-  switch (cinfo->unread_marker) {
-  case M_APP0:
-    examine_app0(cinfo, (JOCTET FAR *) b, numtoread, length);
-    break;
-  case M_APP14:
-    examine_app14(cinfo, (JOCTET FAR *) b, numtoread, length);
-    break;
-  default:
-    /* can't get here unless jpeg_save_markers chooses wrong processor */
-    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
-  }
-
-  /* skip any remaining data -- could be lots */
-  INPUT_SYNC(cinfo);
-  if (length > 0)
-    (*cinfo->src->skip_input_data) (cinfo, (long) length);
-
-  return TRUE;
-}
-
-
-#ifdef SAVE_MARKERS_SUPPORTED
-
-METHODDEF(boolean)
-save_marker (j_decompress_ptr cinfo)
-/* Save an APPn or COM marker into the marker list */
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-  jpeg_saved_marker_ptr cur_marker = marker->cur_marker;
-  unsigned int bytes_read, data_length;
-  JOCTET FAR * data;
-  INT32 length = 0;
-  INPUT_VARS(cinfo);
-
-  if (cur_marker == NULL) {
-    /* begin reading a marker */
-    INPUT_2BYTES(cinfo, length, return FALSE);
-    length -= 2;
-    if (length >= 0) {		/* watch out for bogus length word */
-      /* figure out how much we want to save */
-      unsigned int limit;
-      if (cinfo->unread_marker == (int) M_COM)
-	limit = marker->length_limit_COM;
-      else
-	limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0];
-      if ((unsigned int) length < limit)
-	limit = (unsigned int) length;
-      /* allocate and initialize the marker item */
-      cur_marker = (jpeg_saved_marker_ptr)
-	(*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				    SIZEOF(struct jpeg_marker_struct) + limit);
-      cur_marker->next = NULL;
-      cur_marker->marker = (UINT8) cinfo->unread_marker;
-      cur_marker->original_length = (unsigned int) length;
-      cur_marker->data_length = limit;
-      /* data area is just beyond the jpeg_marker_struct */
-      data = cur_marker->data = (JOCTET FAR *) (cur_marker + 1);
-      marker->cur_marker = cur_marker;
-      marker->bytes_read = 0;
-      bytes_read = 0;
-      data_length = limit;
-    } else {
-      /* deal with bogus length word */
-      bytes_read = data_length = 0;
-      data = NULL;
-    }
-  } else {
-    /* resume reading a marker */
-    bytes_read = marker->bytes_read;
-    data_length = cur_marker->data_length;
-    data = cur_marker->data + bytes_read;
-  }
-
-  while (bytes_read < data_length) {
-    INPUT_SYNC(cinfo);		/* move the restart point to here */
-    marker->bytes_read = bytes_read;
-    /* If there's not at least one byte in buffer, suspend */
-    MAKE_BYTE_AVAIL(cinfo, return FALSE);
-    /* Copy bytes with reasonable rapidity */
-    while (bytes_read < data_length && bytes_in_buffer > 0) {
-      *data++ = *next_input_byte++;
-      bytes_in_buffer--;
-      bytes_read++;
-    }
-  }
-
-  /* Done reading what we want to read */
-  if (cur_marker != NULL) {	/* will be NULL if bogus length word */
-    /* Add new marker to end of list */
-    if (cinfo->marker_list == NULL) {
-      cinfo->marker_list = cur_marker;
-    } else {
-      jpeg_saved_marker_ptr prev = cinfo->marker_list;
-      while (prev->next != NULL)
-	prev = prev->next;
-      prev->next = cur_marker;
-    }
-    /* Reset pointer & calc remaining data length */
-    data = cur_marker->data;
-    length = cur_marker->original_length - data_length;
-  }
-  /* Reset to initial state for next marker */
-  marker->cur_marker = NULL;
-
-  /* Process the marker if interesting; else just make a generic trace msg */
-  switch (cinfo->unread_marker) {
-  case M_APP0:
-    examine_app0(cinfo, data, data_length, length);
-    break;
-  case M_APP14:
-    examine_app14(cinfo, data, data_length, length);
-    break;
-  default:
-    TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker,
-	     (int) (data_length + length));
-  }
-
-  /* skip any remaining data -- could be lots */
-  INPUT_SYNC(cinfo);		/* do before skip_input_data */
-  if (length > 0)
-    (*cinfo->src->skip_input_data) (cinfo, (long) length);
-
-  return TRUE;
-}
-
-#endif /* SAVE_MARKERS_SUPPORTED */
-
-
-METHODDEF(boolean)
-skip_variable (j_decompress_ptr cinfo)
-/* Skip over an unknown or uninteresting variable-length marker */
-{
-  INT32 length;
-  INPUT_VARS(cinfo);
-
-  INPUT_2BYTES(cinfo, length, return FALSE);
-  length -= 2;
-  
-  TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, (int) length);
-
-  INPUT_SYNC(cinfo);		/* do before skip_input_data */
-  if (length > 0)
-    (*cinfo->src->skip_input_data) (cinfo, (long) length);
-
-  return TRUE;
-}
-
-
-/*
- * Find the next JPEG marker, save it in cinfo->unread_marker.
- * Returns FALSE if had to suspend before reaching a marker;
- * in that case cinfo->unread_marker is unchanged.
- *
- * Note that the result might not be a valid marker code,
- * but it will never be 0 or FF.
- */
-
-LOCAL(boolean)
-next_marker (j_decompress_ptr cinfo)
-{
-  int c;
-  INPUT_VARS(cinfo);
-
-  for (;;) {
-    INPUT_BYTE(cinfo, c, return FALSE);
-    /* Skip any non-FF bytes.
-     * This may look a bit inefficient, but it will not occur in a valid file.
-     * We sync after each discarded byte so that a suspending data source
-     * can discard the byte from its buffer.
-     */
-    while (c != 0xFF) {
-      cinfo->marker->discarded_bytes++;
-      INPUT_SYNC(cinfo);
-      INPUT_BYTE(cinfo, c, return FALSE);
-    }
-    /* This loop swallows any duplicate FF bytes.  Extra FFs are legal as
-     * pad bytes, so don't count them in discarded_bytes.  We assume there
-     * will not be so many consecutive FF bytes as to overflow a suspending
-     * data source's input buffer.
-     */
-    do {
-      INPUT_BYTE(cinfo, c, return FALSE);
-    } while (c == 0xFF);
-    if (c != 0)
-      break;			/* found a valid marker, exit loop */
-    /* Reach here if we found a stuffed-zero data sequence (FF/00).
-     * Discard it and loop back to try again.
-     */
-    cinfo->marker->discarded_bytes += 2;
-    INPUT_SYNC(cinfo);
-  }
-
-  if (cinfo->marker->discarded_bytes != 0) {
-    WARNMS2(cinfo, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c);
-    cinfo->marker->discarded_bytes = 0;
-  }
-
-  cinfo->unread_marker = c;
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-LOCAL(boolean)
-first_marker (j_decompress_ptr cinfo)
-/* Like next_marker, but used to obtain the initial SOI marker. */
-/* For this marker, we do not allow preceding garbage or fill; otherwise,
- * we might well scan an entire input file before realizing it ain't JPEG.
- * If an application wants to process non-JFIF files, it must seek to the
- * SOI before calling the JPEG library.
- */
-{
-  int c, c2;
-  INPUT_VARS(cinfo);
-
-  INPUT_BYTE(cinfo, c, return FALSE);
-  INPUT_BYTE(cinfo, c2, return FALSE);
-  if (c != 0xFF || c2 != (int) M_SOI)
-    ERREXIT2(cinfo, JERR_NO_SOI, c, c2);
-
-  cinfo->unread_marker = c2;
-
-  INPUT_SYNC(cinfo);
-  return TRUE;
-}
-
-
-/*
- * Read markers until SOS or EOI.
- *
- * Returns same codes as are defined for jpeg_consume_input:
- * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
- *
- * Note: This function may return a pseudo SOS marker (with zero
- * component number) for treat by input controller's consume_input.
- * consume_input itself should filter out (skip) the pseudo marker
- * after processing for the caller.
- */
-
-METHODDEF(int)
-read_markers (j_decompress_ptr cinfo)
-{
-  /* Outer loop repeats once for each marker. */
-  for (;;) {
-    /* Collect the marker proper, unless we already did. */
-    /* NB: first_marker() enforces the requirement that SOI appear first. */
-    if (cinfo->unread_marker == 0) {
-      if (! cinfo->marker->saw_SOI) {
-	if (! first_marker(cinfo))
-	  return JPEG_SUSPENDED;
-      } else {
-	if (! next_marker(cinfo))
-	  return JPEG_SUSPENDED;
-      }
-    }
-    /* At this point cinfo->unread_marker contains the marker code and the
-     * input point is just past the marker proper, but before any parameters.
-     * A suspension will cause us to return with this state still true.
-     */
-    switch (cinfo->unread_marker) {
-    case M_SOI:
-      if (! get_soi(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_SOF0:		/* Baseline */
-      if (! get_sof(cinfo, TRUE, FALSE, FALSE))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_SOF1:		/* Extended sequential, Huffman */
-      if (! get_sof(cinfo, FALSE, FALSE, FALSE))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_SOF2:		/* Progressive, Huffman */
-      if (! get_sof(cinfo, FALSE, TRUE, FALSE))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_SOF9:		/* Extended sequential, arithmetic */
-      if (! get_sof(cinfo, FALSE, FALSE, TRUE))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_SOF10:		/* Progressive, arithmetic */
-      if (! get_sof(cinfo, FALSE, TRUE, TRUE))
-	return JPEG_SUSPENDED;
-      break;
-
-    /* Currently unsupported SOFn types */
-    case M_SOF3:		/* Lossless, Huffman */
-    case M_SOF5:		/* Differential sequential, Huffman */
-    case M_SOF6:		/* Differential progressive, Huffman */
-    case M_SOF7:		/* Differential lossless, Huffman */
-    case M_JPG:			/* Reserved for JPEG extensions */
-    case M_SOF11:		/* Lossless, arithmetic */
-    case M_SOF13:		/* Differential sequential, arithmetic */
-    case M_SOF14:		/* Differential progressive, arithmetic */
-    case M_SOF15:		/* Differential lossless, arithmetic */
-      ERREXIT1(cinfo, JERR_SOF_UNSUPPORTED, cinfo->unread_marker);
-      break;
-
-    case M_SOS:
-      if (! get_sos(cinfo))
-	return JPEG_SUSPENDED;
-      cinfo->unread_marker = 0;	/* processed the marker */
-      return JPEG_REACHED_SOS;
-
-    case M_EOI:
-      TRACEMS(cinfo, 1, JTRC_EOI);
-      cinfo->unread_marker = 0;	/* processed the marker */
-      return JPEG_REACHED_EOI;
-
-    case M_DAC:
-      if (! get_dac(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_DHT:
-      if (! get_dht(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_DQT:
-      if (! get_dqt(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_DRI:
-      if (! get_dri(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_JPG8:
-      if (! get_lse(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_APP0:
-    case M_APP1:
-    case M_APP2:
-    case M_APP3:
-    case M_APP4:
-    case M_APP5:
-    case M_APP6:
-    case M_APP7:
-    case M_APP8:
-    case M_APP9:
-    case M_APP10:
-    case M_APP11:
-    case M_APP12:
-    case M_APP13:
-    case M_APP14:
-    case M_APP15:
-      if (! (*((my_marker_ptr) cinfo->marker)->process_APPn[
-		cinfo->unread_marker - (int) M_APP0]) (cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_COM:
-      if (! (*((my_marker_ptr) cinfo->marker)->process_COM) (cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    case M_RST0:		/* these are all parameterless */
-    case M_RST1:
-    case M_RST2:
-    case M_RST3:
-    case M_RST4:
-    case M_RST5:
-    case M_RST6:
-    case M_RST7:
-    case M_TEM:
-      TRACEMS1(cinfo, 1, JTRC_PARMLESS_MARKER, cinfo->unread_marker);
-      break;
-
-    case M_DNL:			/* Ignore DNL ... perhaps the wrong thing */
-      if (! skip_variable(cinfo))
-	return JPEG_SUSPENDED;
-      break;
-
-    default:			/* must be DHP, EXP, JPGn, or RESn */
-      /* For now, we treat the reserved markers as fatal errors since they are
-       * likely to be used to signal incompatible JPEG Part 3 extensions.
-       * Once the JPEG 3 version-number marker is well defined, this code
-       * ought to change!
-       */
-      ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
-    }
-    /* Successfully processed marker, so reset state variable */
-    cinfo->unread_marker = 0;
-  } /* end loop */
-}
-
-
-/*
- * Read a restart marker, which is expected to appear next in the datastream;
- * if the marker is not there, take appropriate recovery action.
- * Returns FALSE if suspension is required.
- *
- * This is called by the entropy decoder after it has read an appropriate
- * number of MCUs.  cinfo->unread_marker may be nonzero if the entropy decoder
- * has already read a marker from the data source.  Under normal conditions
- * cinfo->unread_marker will be reset to 0 before returning; if not reset,
- * it holds a marker which the decoder will be unable to read past.
- */
-
-METHODDEF(boolean)
-read_restart_marker (j_decompress_ptr cinfo)
-{
-  /* Obtain a marker unless we already did. */
-  /* Note that next_marker will complain if it skips any data. */
-  if (cinfo->unread_marker == 0) {
-    if (! next_marker(cinfo))
-      return FALSE;
-  }
-
-  if (cinfo->unread_marker ==
-      ((int) M_RST0 + cinfo->marker->next_restart_num)) {
-    /* Normal case --- swallow the marker and let entropy decoder continue */
-    TRACEMS1(cinfo, 3, JTRC_RST, cinfo->marker->next_restart_num);
-    cinfo->unread_marker = 0;
-  } else {
-    /* Uh-oh, the restart markers have been messed up. */
-    /* Let the data source manager determine how to resync. */
-    if (! (*cinfo->src->resync_to_restart) (cinfo,
-					    cinfo->marker->next_restart_num))
-      return FALSE;
-  }
-
-  /* Update next-restart state */
-  cinfo->marker->next_restart_num = (cinfo->marker->next_restart_num + 1) & 7;
-
-  return TRUE;
-}
-
-
-/*
- * This is the default resync_to_restart method for data source managers
- * to use if they don't have any better approach.  Some data source managers
- * may be able to back up, or may have additional knowledge about the data
- * which permits a more intelligent recovery strategy; such managers would
- * presumably supply their own resync method.
- *
- * read_restart_marker calls resync_to_restart if it finds a marker other than
- * the restart marker it was expecting.  (This code is *not* used unless
- * a nonzero restart interval has been declared.)  cinfo->unread_marker is
- * the marker code actually found (might be anything, except 0 or FF).
- * The desired restart marker number (0..7) is passed as a parameter.
- * This routine is supposed to apply whatever error recovery strategy seems
- * appropriate in order to position the input stream to the next data segment.
- * Note that cinfo->unread_marker is treated as a marker appearing before
- * the current data-source input point; usually it should be reset to zero
- * before returning.
- * Returns FALSE if suspension is required.
- *
- * This implementation is substantially constrained by wanting to treat the
- * input as a data stream; this means we can't back up.  Therefore, we have
- * only the following actions to work with:
- *   1. Simply discard the marker and let the entropy decoder resume at next
- *      byte of file.
- *   2. Read forward until we find another marker, discarding intervening
- *      data.  (In theory we could look ahead within the current bufferload,
- *      without having to discard data if we don't find the desired marker.
- *      This idea is not implemented here, in part because it makes behavior
- *      dependent on buffer size and chance buffer-boundary positions.)
- *   3. Leave the marker unread (by failing to zero cinfo->unread_marker).
- *      This will cause the entropy decoder to process an empty data segment,
- *      inserting dummy zeroes, and then we will reprocess the marker.
- *
- * #2 is appropriate if we think the desired marker lies ahead, while #3 is
- * appropriate if the found marker is a future restart marker (indicating
- * that we have missed the desired restart marker, probably because it got
- * corrupted).
- * We apply #2 or #3 if the found marker is a restart marker no more than
- * two counts behind or ahead of the expected one.  We also apply #2 if the
- * found marker is not a legal JPEG marker code (it's certainly bogus data).
- * If the found marker is a restart marker more than 2 counts away, we do #1
- * (too much risk that the marker is erroneous; with luck we will be able to
- * resync at some future point).
- * For any valid non-restart JPEG marker, we apply #3.  This keeps us from
- * overrunning the end of a scan.  An implementation limited to single-scan
- * files might find it better to apply #2 for markers other than EOI, since
- * any other marker would have to be bogus data in that case.
- */
-
-GLOBAL(boolean)
-jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired)
-{
-  int marker = cinfo->unread_marker;
-  int action = 1;
-  
-  /* Always put up a warning. */
-  WARNMS2(cinfo, JWRN_MUST_RESYNC, marker, desired);
-  
-  /* Outer loop handles repeated decision after scanning forward. */
-  for (;;) {
-    if (marker < (int) M_SOF0)
-      action = 2;		/* invalid marker */
-    else if (marker < (int) M_RST0 || marker > (int) M_RST7)
-      action = 3;		/* valid non-restart marker */
-    else {
-      if (marker == ((int) M_RST0 + ((desired+1) & 7)) ||
-	  marker == ((int) M_RST0 + ((desired+2) & 7)))
-	action = 3;		/* one of the next two expected restarts */
-      else if (marker == ((int) M_RST0 + ((desired-1) & 7)) ||
-	       marker == ((int) M_RST0 + ((desired-2) & 7)))
-	action = 2;		/* a prior restart, so advance */
-      else
-	action = 1;		/* desired restart or too far away */
-    }
-    TRACEMS2(cinfo, 4, JTRC_RECOVERY_ACTION, marker, action);
-    switch (action) {
-    case 1:
-      /* Discard marker and let entropy decoder resume processing. */
-      cinfo->unread_marker = 0;
-      return TRUE;
-    case 2:
-      /* Scan to the next marker, and repeat the decision loop. */
-      if (! next_marker(cinfo))
-	return FALSE;
-      marker = cinfo->unread_marker;
-      break;
-    case 3:
-      /* Return without advancing past this marker. */
-      /* Entropy decoder will be forced to process an empty segment. */
-      return TRUE;
-    }
-  } /* end loop */
-}
-
-
-/*
- * Reset marker processing state to begin a fresh datastream.
- */
-
-METHODDEF(void)
-reset_marker_reader (j_decompress_ptr cinfo)
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-
-  cinfo->comp_info = NULL;		/* until allocated by get_sof */
-  cinfo->input_scan_number = 0;		/* no SOS seen yet */
-  cinfo->unread_marker = 0;		/* no pending marker */
-  marker->pub.saw_SOI = FALSE;		/* set internal state too */
-  marker->pub.saw_SOF = FALSE;
-  marker->pub.discarded_bytes = 0;
-  marker->cur_marker = NULL;
-}
-
-
-/*
- * Initialize the marker reader module.
- * This is called only once, when the decompression object is created.
- */
-
-GLOBAL(void)
-jinit_marker_reader (j_decompress_ptr cinfo)
-{
-  my_marker_ptr marker;
-  int i;
-
-  /* Create subobject in permanent pool */
-  marker = (my_marker_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_PERMANENT, SIZEOF(my_marker_reader));
-  cinfo->marker = &marker->pub;
-  /* Initialize public method pointers */
-  marker->pub.reset_marker_reader = reset_marker_reader;
-  marker->pub.read_markers = read_markers;
-  marker->pub.read_restart_marker = read_restart_marker;
-  /* Initialize COM/APPn processing.
-   * By default, we examine and then discard APP0 and APP14,
-   * but simply discard COM and all other APPn.
-   */
-  marker->process_COM = skip_variable;
-  marker->length_limit_COM = 0;
-  for (i = 0; i < 16; i++) {
-    marker->process_APPn[i] = skip_variable;
-    marker->length_limit_APPn[i] = 0;
-  }
-  marker->process_APPn[0] = get_interesting_appn;
-  marker->process_APPn[14] = get_interesting_appn;
-  /* Reset marker processing state */
-  reset_marker_reader(cinfo);
-}
-
-
-/*
- * Control saving of COM and APPn markers into marker_list.
- */
-
-#ifdef SAVE_MARKERS_SUPPORTED
-
-GLOBAL(void)
-jpeg_save_markers (j_decompress_ptr cinfo, int marker_code,
-		   unsigned int length_limit)
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-  long maxlength;
-  jpeg_marker_parser_method processor;
-
-  /* Length limit mustn't be larger than what we can allocate
-   * (should only be a concern in a 16-bit environment).
-   */
-  maxlength = cinfo->mem->max_alloc_chunk - SIZEOF(struct jpeg_marker_struct);
-  if (((long) length_limit) > maxlength)
-    length_limit = (unsigned int) maxlength;
-
-  /* Choose processor routine to use.
-   * APP0/APP14 have special requirements.
-   */
-  if (length_limit) {
-    processor = save_marker;
-    /* If saving APP0/APP14, save at least enough for our internal use. */
-    if (marker_code == (int) M_APP0 && length_limit < APP0_DATA_LEN)
-      length_limit = APP0_DATA_LEN;
-    else if (marker_code == (int) M_APP14 && length_limit < APP14_DATA_LEN)
-      length_limit = APP14_DATA_LEN;
-  } else {
-    processor = skip_variable;
-    /* If discarding APP0/APP14, use our regular on-the-fly processor. */
-    if (marker_code == (int) M_APP0 || marker_code == (int) M_APP14)
-      processor = get_interesting_appn;
-  }
-
-  if (marker_code == (int) M_COM) {
-    marker->process_COM = processor;
-    marker->length_limit_COM = length_limit;
-  } else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15) {
-    marker->process_APPn[marker_code - (int) M_APP0] = processor;
-    marker->length_limit_APPn[marker_code - (int) M_APP0] = length_limit;
-  } else
-    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
-}
-
-#endif /* SAVE_MARKERS_SUPPORTED */
-
-
-/*
- * Install a special processing method for COM or APPn markers.
- */
-
-GLOBAL(void)
-jpeg_set_marker_processor (j_decompress_ptr cinfo, int marker_code,
-			   jpeg_marker_parser_method routine)
-{
-  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
-
-  if (marker_code == (int) M_COM)
-    marker->process_COM = routine;
-  else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15)
-    marker->process_APPn[marker_code - (int) M_APP0] = routine;
-  else
-    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
-}
diff --git a/cmake/externals/libjpeg/jdmaster.c b/cmake/externals/libjpeg/jdmaster.c
deleted file mode 100644
index 3070b7bb4..000000000
--- a/cmake/externals/libjpeg/jdmaster.c
+++ /dev/null
@@ -1,532 +0,0 @@
-/*
- * jdmaster.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2002-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains master control logic for the JPEG decompressor.
- * These routines are concerned with selecting the modules to be executed
- * and with determining the number of passes and the work to be done in each
- * pass.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Private state */
-
-typedef struct {
-  struct jpeg_decomp_master pub; /* public fields */
-
-  int pass_number;		/* # of passes completed */
-
-  boolean using_merged_upsample; /* TRUE if using merged upsample/cconvert */
-
-  /* Saved references to initialized quantizer modules,
-   * in case we need to switch modes.
-   */
-  struct jpeg_color_quantizer * quantizer_1pass;
-  struct jpeg_color_quantizer * quantizer_2pass;
-} my_decomp_master;
-
-typedef my_decomp_master * my_master_ptr;
-
-
-/*
- * Determine whether merged upsample/color conversion should be used.
- * CRUCIAL: this must match the actual capabilities of jdmerge.c!
- */
-
-LOCAL(boolean)
-use_merged_upsample (j_decompress_ptr cinfo)
-{
-#ifdef UPSAMPLE_MERGING_SUPPORTED
-  /* Merging is the equivalent of plain box-filter upsampling. */
-  /* The following condition is only needed if fancy shall select
-   * a different upsampling method.  In our current implementation
-   * fancy only affects the DCT scaling, thus we can use fancy
-   * upsampling and merged upsample simultaneously, in particular
-   * with scaled DCT sizes larger than the default DCTSIZE.
-   */
-#if 0
-  if (cinfo->do_fancy_upsampling)
-    return FALSE;
-#endif
-  if (cinfo->CCIR601_sampling)
-    return FALSE;
-  /* jdmerge.c only supports YCC=>RGB color conversion */
-  if ((cinfo->jpeg_color_space != JCS_YCbCr &&
-       cinfo->jpeg_color_space != JCS_BG_YCC) ||
-      cinfo->num_components != 3 ||
-      cinfo->out_color_space != JCS_RGB ||
-      cinfo->out_color_components != RGB_PIXELSIZE ||
-      cinfo->color_transform)
-    return FALSE;
-  /* and it only handles 2h1v or 2h2v sampling ratios */
-  if (cinfo->comp_info[0].h_samp_factor != 2 ||
-      cinfo->comp_info[1].h_samp_factor != 1 ||
-      cinfo->comp_info[2].h_samp_factor != 1 ||
-      cinfo->comp_info[0].v_samp_factor >  2 ||
-      cinfo->comp_info[1].v_samp_factor != 1 ||
-      cinfo->comp_info[2].v_samp_factor != 1)
-    return FALSE;
-  /* furthermore, it doesn't work if we've scaled the IDCTs differently */
-  if (cinfo->comp_info[0].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
-      cinfo->comp_info[1].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
-      cinfo->comp_info[2].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size ||
-      cinfo->comp_info[0].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size ||
-      cinfo->comp_info[1].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size ||
-      cinfo->comp_info[2].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size)
-    return FALSE;
-  /* ??? also need to test for upsample-time rescaling, when & if supported */
-  return TRUE;			/* by golly, it'll work... */
-#else
-  return FALSE;
-#endif
-}
-
-
-/*
- * Compute output image dimensions and related values.
- * NOTE: this is exported for possible use by application.
- * Hence it mustn't do anything that can't be done twice.
- * Also note that it may be called before the master module is initialized!
- */
-
-GLOBAL(void)
-jpeg_calc_output_dimensions (j_decompress_ptr cinfo)
-/* Do computations that are needed before master selection phase.
- * This function is used for full decompression.
- */
-{
-  int ci, i;
-  jpeg_component_info *compptr;
-
-  /* Prevent application from calling me at wrong times */
-  if (cinfo->global_state != DSTATE_READY)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  /* Compute core output image dimensions and DCT scaling choices. */
-  jpeg_core_output_dimensions(cinfo);
-
-#ifdef IDCT_SCALING_SUPPORTED
-
-  /* In selecting the actual DCT scaling for each component, we try to
-   * scale up the chroma components via IDCT scaling rather than upsampling.
-   * This saves time if the upsampler gets to use 1:1 scaling.
-   * Note this code adapts subsampling ratios which are powers of 2.
-   */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    int ssize = 1;
-    if (! cinfo->raw_data_out)
-      while (cinfo->min_DCT_h_scaled_size * ssize <=
-	     (cinfo->do_fancy_upsampling ? DCTSIZE : DCTSIZE / 2) &&
-	     (cinfo->max_h_samp_factor % (compptr->h_samp_factor * ssize * 2)) ==
-	     0) {
-	ssize = ssize * 2;
-      }
-    compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size * ssize;
-    ssize = 1;
-    if (! cinfo->raw_data_out)
-      while (cinfo->min_DCT_v_scaled_size * ssize <=
-	     (cinfo->do_fancy_upsampling ? DCTSIZE : DCTSIZE / 2) &&
-	     (cinfo->max_v_samp_factor % (compptr->v_samp_factor * ssize * 2)) ==
-	     0) {
-	ssize = ssize * 2;
-      }
-    compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size * ssize;
-
-    /* We don't support IDCT ratios larger than 2. */
-    if (compptr->DCT_h_scaled_size > compptr->DCT_v_scaled_size * 2)
-	compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size * 2;
-    else if (compptr->DCT_v_scaled_size > compptr->DCT_h_scaled_size * 2)
-	compptr->DCT_v_scaled_size = compptr->DCT_h_scaled_size * 2;
-
-    /* Recompute downsampled dimensions of components;
-     * application needs to know these if using raw downsampled data.
-     */
-    /* Size in samples, after IDCT scaling */
-    compptr->downsampled_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width *
-		    (long) (compptr->h_samp_factor * compptr->DCT_h_scaled_size),
-		    (long) (cinfo->max_h_samp_factor * cinfo->block_size));
-    compptr->downsampled_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height *
-		    (long) (compptr->v_samp_factor * compptr->DCT_v_scaled_size),
-		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-  }
-
-#endif /* IDCT_SCALING_SUPPORTED */
-
-  /* Report number of components in selected colorspace. */
-  /* This should correspond to the actual code in the color conversion module. */
-  switch (cinfo->out_color_space) {
-  case JCS_GRAYSCALE:
-    cinfo->out_color_components = 1;
-    break;
-  case JCS_RGB:
-  case JCS_BG_RGB:
-    cinfo->out_color_components = RGB_PIXELSIZE;
-    break;
-  default:	/* YCCK <=> CMYK conversion or same colorspace as in file */
-    i = 0;
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++)
-      if (compptr->component_needed)
-	i++;	/* count output color components */
-    cinfo->out_color_components = i;
-  }
-  cinfo->output_components = (cinfo->quantize_colors ? 1 :
-			      cinfo->out_color_components);
-
-  /* See if upsampler will want to emit more than one row at a time */
-  if (use_merged_upsample(cinfo))
-    cinfo->rec_outbuf_height = cinfo->max_v_samp_factor;
-  else
-    cinfo->rec_outbuf_height = 1;
-}
-
-
-/*
- * Several decompression processes need to range-limit values to the range
- * 0..MAXJSAMPLE; the input value may fall somewhat outside this range
- * due to noise introduced by quantization, roundoff error, etc.  These
- * processes are inner loops and need to be as fast as possible.  On most
- * machines, particularly CPUs with pipelines or instruction prefetch,
- * a (subscript-check-less) C table lookup
- *		x = sample_range_limit[x];
- * is faster than explicit tests
- *		if (x < 0)  x = 0;
- *		else if (x > MAXJSAMPLE)  x = MAXJSAMPLE;
- * These processes all use a common table prepared by the routine below.
- *
- * For most steps we can mathematically guarantee that the initial value
- * of x is within 2*(MAXJSAMPLE+1) of the legal range, so a table running
- * from -2*(MAXJSAMPLE+1) to 3*MAXJSAMPLE+2 is sufficient.  But for the
- * initial limiting step (just after the IDCT), a wildly out-of-range value
- * is possible if the input data is corrupt.  To avoid any chance of indexing
- * off the end of memory and getting a bad-pointer trap, we perform the
- * post-IDCT limiting thus:
- *		x = (sample_range_limit - SUBSET)[(x + CENTER) & MASK];
- * where MASK is 2 bits wider than legal sample data, ie 10 bits for 8-bit
- * samples.  Under normal circumstances this is more than enough range and
- * a correct output will be generated; with bogus input data the mask will
- * cause wraparound, and we will safely generate a bogus-but-in-range output.
- * For the post-IDCT step, we want to convert the data from signed to unsigned
- * representation by adding CENTERJSAMPLE at the same time that we limit it.
- * This is accomplished with SUBSET = CENTER - CENTERJSAMPLE.
- *
- * Note that the table is allocated in near data space on PCs; it's small
- * enough and used often enough to justify this.
- */
-
-LOCAL(void)
-prepare_range_limit_table (j_decompress_ptr cinfo)
-/* Allocate and fill in the sample_range_limit table */
-{
-  JSAMPLE * table;
-  int i;
-
-  table = (JSAMPLE *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo,
-    JPOOL_IMAGE, (RANGE_CENTER * 2 + MAXJSAMPLE + 1) * SIZEOF(JSAMPLE));
-  /* First segment of range limit table: limit[x] = 0 for x < 0 */
-  MEMZERO(table, RANGE_CENTER * SIZEOF(JSAMPLE));
-  table += RANGE_CENTER;	/* allow negative subscripts of table */
-  cinfo->sample_range_limit = table;
-  /* Main part of range limit table: limit[x] = x */
-  for (i = 0; i <= MAXJSAMPLE; i++)
-    table[i] = (JSAMPLE) i;
-  /* End of range limit table: limit[x] = MAXJSAMPLE for x > MAXJSAMPLE */
-  for (; i <=  MAXJSAMPLE + RANGE_CENTER; i++)
-    table[i] = MAXJSAMPLE;
-}
-
-
-/*
- * Master selection of decompression modules.
- * This is done once at jpeg_start_decompress time.  We determine
- * which modules will be used and give them appropriate initialization calls.
- * We also initialize the decompressor input side to begin consuming data.
- *
- * Since jpeg_read_header has finished, we know what is in the SOF
- * and (first) SOS markers.  We also have all the application parameter
- * settings.
- */
-
-LOCAL(void)
-master_selection (j_decompress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-  boolean use_c_buffer;
-  long samplesperrow;
-  JDIMENSION jd_samplesperrow;
-
-  /* For now, precision must match compiled-in value... */
-  if (cinfo->data_precision != BITS_IN_JSAMPLE)
-    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
-
-  /* Initialize dimensions and other stuff */
-  jpeg_calc_output_dimensions(cinfo);
-  prepare_range_limit_table(cinfo);
-
-  /* Sanity check on image dimensions */
-  if (cinfo->output_height <= 0 || cinfo->output_width <= 0 ||
-      cinfo->out_color_components <= 0)
-    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
-
-  /* Width of an output scanline must be representable as JDIMENSION. */
-  samplesperrow = (long) cinfo->output_width * (long) cinfo->out_color_components;
-  jd_samplesperrow = (JDIMENSION) samplesperrow;
-  if ((long) jd_samplesperrow != samplesperrow)
-    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
-
-  /* Initialize my private state */
-  master->pass_number = 0;
-  master->using_merged_upsample = use_merged_upsample(cinfo);
-
-  /* Color quantizer selection */
-  master->quantizer_1pass = NULL;
-  master->quantizer_2pass = NULL;
-  /* No mode changes if not using buffered-image mode. */
-  if (! cinfo->quantize_colors || ! cinfo->buffered_image) {
-    cinfo->enable_1pass_quant = FALSE;
-    cinfo->enable_external_quant = FALSE;
-    cinfo->enable_2pass_quant = FALSE;
-  }
-  if (cinfo->quantize_colors) {
-    if (cinfo->raw_data_out)
-      ERREXIT(cinfo, JERR_NOTIMPL);
-    /* 2-pass quantizer only works in 3-component color space. */
-    if (cinfo->out_color_components != 3) {
-      cinfo->enable_1pass_quant = TRUE;
-      cinfo->enable_external_quant = FALSE;
-      cinfo->enable_2pass_quant = FALSE;
-      cinfo->colormap = NULL;
-    } else if (cinfo->colormap != NULL) {
-      cinfo->enable_external_quant = TRUE;
-    } else if (cinfo->two_pass_quantize) {
-      cinfo->enable_2pass_quant = TRUE;
-    } else {
-      cinfo->enable_1pass_quant = TRUE;
-    }
-
-    if (cinfo->enable_1pass_quant) {
-#ifdef QUANT_1PASS_SUPPORTED
-      jinit_1pass_quantizer(cinfo);
-      master->quantizer_1pass = cinfo->cquantize;
-#else
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-    }
-
-    /* We use the 2-pass code to map to external colormaps. */
-    if (cinfo->enable_2pass_quant || cinfo->enable_external_quant) {
-#ifdef QUANT_2PASS_SUPPORTED
-      jinit_2pass_quantizer(cinfo);
-      master->quantizer_2pass = cinfo->cquantize;
-#else
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-    }
-    /* If both quantizers are initialized, the 2-pass one is left active;
-     * this is necessary for starting with quantization to an external map.
-     */
-  }
-
-  /* Post-processing: in particular, color conversion first */
-  if (! cinfo->raw_data_out) {
-    if (master->using_merged_upsample) {
-#ifdef UPSAMPLE_MERGING_SUPPORTED
-      jinit_merged_upsampler(cinfo); /* does color conversion too */
-#else
-      ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif
-    } else {
-      jinit_color_deconverter(cinfo);
-      jinit_upsampler(cinfo);
-    }
-    jinit_d_post_controller(cinfo, cinfo->enable_2pass_quant);
-  }
-  /* Inverse DCT */
-  jinit_inverse_dct(cinfo);
-  /* Entropy decoding: either Huffman or arithmetic coding. */
-  if (cinfo->arith_code)
-    jinit_arith_decoder(cinfo);
-  else {
-    jinit_huff_decoder(cinfo);
-  }
-
-  /* Initialize principal buffer controllers. */
-  use_c_buffer = cinfo->inputctl->has_multiple_scans || cinfo->buffered_image;
-  jinit_d_coef_controller(cinfo, use_c_buffer);
-
-  if (! cinfo->raw_data_out)
-    jinit_d_main_controller(cinfo, FALSE /* never need full buffer here */);
-
-  /* We can now tell the memory manager to allocate virtual arrays. */
-  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
-
-  /* Initialize input side of decompressor to consume first scan. */
-  (*cinfo->inputctl->start_input_pass) (cinfo);
-
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-  /* If jpeg_start_decompress will read the whole file, initialize
-   * progress monitoring appropriately.  The input step is counted
-   * as one pass.
-   */
-  if (cinfo->progress != NULL && ! cinfo->buffered_image &&
-      cinfo->inputctl->has_multiple_scans) {
-    int nscans;
-    /* Estimate number of scans to set pass_limit. */
-    if (cinfo->progressive_mode) {
-      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
-      nscans = 2 + 3 * cinfo->num_components;
-    } else {
-      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
-      nscans = cinfo->num_components;
-    }
-    cinfo->progress->pass_counter = 0L;
-    cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
-    cinfo->progress->completed_passes = 0;
-    cinfo->progress->total_passes = (cinfo->enable_2pass_quant ? 3 : 2);
-    /* Count the input pass as done */
-    master->pass_number++;
-  }
-#endif /* D_MULTISCAN_FILES_SUPPORTED */
-}
-
-
-/*
- * Per-pass setup.
- * This is called at the beginning of each output pass.  We determine which
- * modules will be active during this pass and give them appropriate
- * start_pass calls.  We also set is_dummy_pass to indicate whether this
- * is a "real" output pass or a dummy pass for color quantization.
- * (In the latter case, jdapistd.c will crank the pass to completion.)
- */
-
-METHODDEF(void)
-prepare_for_output_pass (j_decompress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-
-  if (master->pub.is_dummy_pass) {
-#ifdef QUANT_2PASS_SUPPORTED
-    /* Final pass of 2-pass quantization */
-    master->pub.is_dummy_pass = FALSE;
-    (*cinfo->cquantize->start_pass) (cinfo, FALSE);
-    (*cinfo->post->start_pass) (cinfo, JBUF_CRANK_DEST);
-    (*cinfo->main->start_pass) (cinfo, JBUF_CRANK_DEST);
-#else
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-#endif /* QUANT_2PASS_SUPPORTED */
-  } else {
-    if (cinfo->quantize_colors && cinfo->colormap == NULL) {
-      /* Select new quantization method */
-      if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) {
-	cinfo->cquantize = master->quantizer_2pass;
-	master->pub.is_dummy_pass = TRUE;
-      } else if (cinfo->enable_1pass_quant) {
-	cinfo->cquantize = master->quantizer_1pass;
-      } else {
-	ERREXIT(cinfo, JERR_MODE_CHANGE);
-      }
-    }
-    (*cinfo->idct->start_pass) (cinfo);
-    (*cinfo->coef->start_output_pass) (cinfo);
-    if (! cinfo->raw_data_out) {
-      if (! master->using_merged_upsample)
-	(*cinfo->cconvert->start_pass) (cinfo);
-      (*cinfo->upsample->start_pass) (cinfo);
-      if (cinfo->quantize_colors)
-	(*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass);
-      (*cinfo->post->start_pass) (cinfo,
-	    (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
-      (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
-    }
-  }
-
-  /* Set up progress monitor's pass info if present */
-  if (cinfo->progress != NULL) {
-    cinfo->progress->completed_passes = master->pass_number;
-    cinfo->progress->total_passes = master->pass_number +
-				    (master->pub.is_dummy_pass ? 2 : 1);
-    /* In buffered-image mode, we assume one more output pass if EOI not
-     * yet reached, but no more passes if EOI has been reached.
-     */
-    if (cinfo->buffered_image && ! cinfo->inputctl->eoi_reached) {
-      cinfo->progress->total_passes += (cinfo->enable_2pass_quant ? 2 : 1);
-    }
-  }
-}
-
-
-/*
- * Finish up at end of an output pass.
- */
-
-METHODDEF(void)
-finish_output_pass (j_decompress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-
-  if (cinfo->quantize_colors)
-    (*cinfo->cquantize->finish_pass) (cinfo);
-  master->pass_number++;
-}
-
-
-#ifdef D_MULTISCAN_FILES_SUPPORTED
-
-/*
- * Switch to a new external colormap between output passes.
- */
-
-GLOBAL(void)
-jpeg_new_colormap (j_decompress_ptr cinfo)
-{
-  my_master_ptr master = (my_master_ptr) cinfo->master;
-
-  /* Prevent application from calling me at wrong times */
-  if (cinfo->global_state != DSTATE_BUFIMAGE)
-    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-
-  if (cinfo->quantize_colors && cinfo->enable_external_quant &&
-      cinfo->colormap != NULL) {
-    /* Select 2-pass quantizer for external colormap use */
-    cinfo->cquantize = master->quantizer_2pass;
-    /* Notify quantizer of colormap change */
-    (*cinfo->cquantize->new_color_map) (cinfo);
-    master->pub.is_dummy_pass = FALSE; /* just in case */
-  } else
-    ERREXIT(cinfo, JERR_MODE_CHANGE);
-}
-
-#endif /* D_MULTISCAN_FILES_SUPPORTED */
-
-
-/*
- * Initialize master decompression control and select active modules.
- * This is performed at the start of jpeg_start_decompress.
- */
-
-GLOBAL(void)
-jinit_master_decompress (j_decompress_ptr cinfo)
-{
-  my_master_ptr master;
-
-  master = (my_master_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_decomp_master));
-  cinfo->master = &master->pub;
-  master->pub.prepare_for_output_pass = prepare_for_output_pass;
-  master->pub.finish_output_pass = finish_output_pass;
-
-  master->pub.is_dummy_pass = FALSE;
-
-  master_selection(cinfo);
-}
diff --git a/cmake/externals/libjpeg/jdmerge.c b/cmake/externals/libjpeg/jdmerge.c
deleted file mode 100644
index 0d16821be..000000000
--- a/cmake/externals/libjpeg/jdmerge.c
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- * jdmerge.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2013-2022 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains code for merged upsampling/color conversion.
- *
- * This file combines functions from jdsample.c and jdcolor.c;
- * read those files first to understand what's going on.
- *
- * When the chroma components are to be upsampled by simple replication
- * (ie, box filtering), we can save some work in color conversion by
- * calculating all the output pixels corresponding to a pair of chroma
- * samples at one time.  In the conversion equations
- *	R = Y           + K1 * Cr
- *	G = Y + K2 * Cb + K3 * Cr
- *	B = Y + K4 * Cb
- * only the Y term varies among the group of pixels corresponding to a pair
- * of chroma samples, so the rest of the terms can be calculated just once.
- * At typical sampling ratios, this eliminates half or three-quarters
- * of the multiplications needed for color conversion.
- *
- * This file currently provides implementations for the following cases:
- *	YCC => RGB color conversion only (YCbCr or BG_YCC).
- *	Sampling ratios of 2h1v or 2h2v.
- *	No scaling needed at upsample time.
- *	Corner-aligned (non-CCIR601) sampling alignment.
- * Other special cases could be added, but in most applications these
- * are the only common cases.  (For uncommon cases we fall back on
- * the more general code in jdsample.c and jdcolor.c.)
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-#ifdef UPSAMPLE_MERGING_SUPPORTED
-
-
-#if RANGE_BITS < 2
-  /* Deliberate syntax err */
-  Sorry, this code requires 2 or more range extension bits.
-#endif
-
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_upsampler pub;	/* public fields */
-
-  /* Pointer to routine to do actual upsampling/conversion of one row group */
-  JMETHOD(void, upmethod, (j_decompress_ptr cinfo,
-			   JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
-			   JSAMPARRAY output_buf));
-
-  /* Private state for YCC->RGB conversion */
-  int * Cr_r_tab;		/* => table for Cr to R conversion */
-  int * Cb_b_tab;		/* => table for Cb to B conversion */
-  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
-  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
-
-  /* For 2:1 vertical sampling, we produce two output rows at a time.
-   * We need a "spare" row buffer to hold the second output row if the
-   * application provides just a one-row buffer; we also use the spare
-   * to discard the dummy last row if the image height is odd.
-   */
-  JSAMPROW spare_row;
-  boolean spare_full;		/* T if spare buffer is occupied */
-
-  JDIMENSION out_row_width;	/* samples per output row */
-  JDIMENSION rows_to_go;	/* counts rows remaining in image */
-} my_upsampler;
-
-typedef my_upsampler * my_upsample_ptr;
-
-#define SCALEBITS	16	/* speediest right-shift on some machines */
-#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
-
-
-/*
- * Initialize tables for YCbCr->RGB and BG_YCC->RGB colorspace conversion.
- * This is taken directly from jdcolor.c; see that file for more info.
- */
-
-LOCAL(void)
-build_ycc_rgb_table (j_decompress_ptr cinfo)
-/* Normal case, sYCC */
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  int i;
-  INT32 x;
-  SHIFT_TEMPS
-
-  upsample->Cr_r_tab = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
-  upsample->Cb_b_tab = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
-  upsample->Cr_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-  upsample->Cb_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-
-  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
-    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
-    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
-    /* Cr=>R value is nearest int to 1.402 * x */
-    upsample->Cr_r_tab[i] = (int) DESCALE(FIX(1.402) * x, SCALEBITS);
-    /* Cb=>B value is nearest int to 1.772 * x */
-    upsample->Cb_b_tab[i] = (int) DESCALE(FIX(1.772) * x, SCALEBITS);
-    /* Cr=>G value is scaled-up -0.714136286 * x */
-    upsample->Cr_g_tab[i] = (- FIX(0.714136286)) * x;
-    /* Cb=>G value is scaled-up -0.344136286 * x */
-    /* We also add in ONE_HALF so that need not do it in inner loop */
-    upsample->Cb_g_tab[i] = (- FIX(0.344136286)) * x + ONE_HALF;
-  }
-}
-
-
-LOCAL(void)
-build_bg_ycc_rgb_table (j_decompress_ptr cinfo)
-/* Wide gamut case, bg-sYCC */
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  int i;
-  INT32 x;
-  SHIFT_TEMPS
-
-  upsample->Cr_r_tab = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
-  upsample->Cb_b_tab = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int));
-  upsample->Cr_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-  upsample->Cb_g_tab = (INT32 *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32));
-
-  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
-    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
-    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
-    /* Cr=>R value is nearest int to 2.804 * x */
-    upsample->Cr_r_tab[i] = (int) DESCALE(FIX(2.804) * x, SCALEBITS);
-    /* Cb=>B value is nearest int to 3.544 * x */
-    upsample->Cb_b_tab[i] = (int) DESCALE(FIX(3.544) * x, SCALEBITS);
-    /* Cr=>G value is scaled-up -1.428272572 * x */
-    upsample->Cr_g_tab[i] = (- FIX(1.428272572)) * x;
-    /* Cb=>G value is scaled-up -0.688272572 * x */
-    /* We also add in ONE_HALF so that need not do it in inner loop */
-    upsample->Cb_g_tab[i] = (- FIX(0.688272572)) * x + ONE_HALF;
-  }
-}
-
-
-/*
- * Initialize for an upsampling pass.
- */
-
-METHODDEF(void)
-start_pass_merged_upsample (j_decompress_ptr cinfo)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-
-  /* Mark the spare buffer empty */
-  upsample->spare_full = FALSE;
-  /* Initialize total-height counter for detecting bottom of image */
-  upsample->rows_to_go = cinfo->output_height;
-}
-
-
-/*
- * Control routine to do upsampling (and color conversion).
- *
- * The control routine just handles the row buffering considerations.
- */
-
-METHODDEF(void)
-merged_2v_upsample (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
-/* 2:1 vertical sampling case: may need a spare row. */
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  JSAMPROW work_ptrs[2];
-  JDIMENSION num_rows;		/* number of rows returned to caller */
-
-  if (upsample->spare_full) {
-    /* If we have a spare row saved from a previous cycle, just return it. */
-    jcopy_sample_rows(& upsample->spare_row, output_buf + *out_row_ctr,
-		      1, upsample->out_row_width);
-    num_rows = 1;
-    upsample->spare_full = FALSE;
-  } else {
-    /* Figure number of rows to return to caller. */
-    num_rows = 2;
-    /* Not more than the distance to the end of the image. */
-    if (num_rows > upsample->rows_to_go)
-      num_rows = upsample->rows_to_go;
-    /* And not more than what the client can accept: */
-    out_rows_avail -= *out_row_ctr;
-    if (num_rows > out_rows_avail)
-      num_rows = out_rows_avail;
-    /* Create output pointer array for upsampler. */
-    work_ptrs[0] = output_buf[*out_row_ctr];
-    if (num_rows > 1) {
-      work_ptrs[1] = output_buf[*out_row_ctr + 1];
-    } else {
-      work_ptrs[1] = upsample->spare_row;
-      upsample->spare_full = TRUE;
-    }
-    /* Now do the upsampling. */
-    (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr, work_ptrs);
-  }
-
-  /* Adjust counts */
-  *out_row_ctr += num_rows;
-  upsample->rows_to_go -= num_rows;
-  /* When the buffer is emptied, declare this input row group consumed */
-  if (! upsample->spare_full)
-    (*in_row_group_ctr)++;
-}
-
-
-METHODDEF(void)
-merged_1v_upsample (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
-/* 1:1 vertical sampling case: much easier, never need a spare row. */
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-
-  /* Just do the upsampling. */
-  (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr,
-			 output_buf + *out_row_ctr);
-  /* Adjust counts */
-  (*out_row_ctr)++;
-  (*in_row_group_ctr)++;
-}
-
-
-/*
- * These are the routines invoked by the control routines to do
- * the actual upsampling/conversion.  One row group is processed per call.
- *
- * Note: since we may be writing directly into application-supplied buffers,
- * we have to be honest about the output width; we can't assume the buffer
- * has been rounded up to an even width.
- */
-
-
-/*
- * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
- */
-
-METHODDEF(void)
-h2v1_merged_upsample (j_decompress_ptr cinfo,
-		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
-		      JSAMPARRAY output_buf)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  register int y, cred, cgreen, cblue;
-  int cb, cr;
-  register JSAMPROW outptr;
-  JSAMPROW inptr0, inptr1, inptr2;
-  JDIMENSION col;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  int * Crrtab = upsample->Cr_r_tab;
-  int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
-  SHIFT_TEMPS
-
-  inptr0 = input_buf[0][in_row_group_ctr];
-  inptr1 = input_buf[1][in_row_group_ctr];
-  inptr2 = input_buf[2][in_row_group_ctr];
-  outptr = output_buf[0];
-  /* Loop for each pair of output pixels */
-  for (col = cinfo->output_width >> 1; col > 0; col--) {
-    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue  = Cbbtab[cb];
-    cred   = Crrtab[cr];
-    /* Fetch 2 Y values and emit 2 pixels */
-    y  = GETJSAMPLE(*inptr0++);
-    outptr[RGB_RED]   = range_limit[y + cred];
-    outptr[RGB_GREEN] = range_limit[y + cgreen];
-    outptr[RGB_BLUE]  = range_limit[y + cblue];
-    outptr += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr0++);
-    outptr[RGB_RED]   = range_limit[y + cred];
-    outptr[RGB_GREEN] = range_limit[y + cgreen];
-    outptr[RGB_BLUE]  = range_limit[y + cblue];
-    outptr += RGB_PIXELSIZE;
-  }
-  /* If image width is odd, do the last output column separately */
-  if (cinfo->output_width & 1) {
-    y  = GETJSAMPLE(*inptr0);
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
-    outptr[RGB_RED]   = range_limit[y + Crrtab[cr]];
-    outptr[RGB_GREEN] = range_limit[y +
-			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
-						 SCALEBITS))];
-    outptr[RGB_BLUE]  = range_limit[y + Cbbtab[cb]];
-  }
-}
-
-
-/*
- * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
- */
-
-METHODDEF(void)
-h2v2_merged_upsample (j_decompress_ptr cinfo,
-		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
-		      JSAMPARRAY output_buf)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  register int y, cred, cgreen, cblue;
-  int cb, cr;
-  register JSAMPROW outptr0, outptr1;
-  JSAMPROW inptr00, inptr01, inptr1, inptr2;
-  JDIMENSION col;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  int * Crrtab = upsample->Cr_r_tab;
-  int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
-  SHIFT_TEMPS
-
-  inptr00 = input_buf[0][in_row_group_ctr*2];
-  inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
-  inptr1 = input_buf[1][in_row_group_ctr];
-  inptr2 = input_buf[2][in_row_group_ctr];
-  outptr0 = output_buf[0];
-  outptr1 = output_buf[1];
-  /* Loop for each group of output pixels */
-  for (col = cinfo->output_width >> 1; col > 0; col--) {
-    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue  = Cbbtab[cb];
-    cred   = Crrtab[cr];
-    /* Fetch 4 Y values and emit 4 pixels */
-    y  = GETJSAMPLE(*inptr00++);
-    outptr0[RGB_RED]   = range_limit[y + cred];
-    outptr0[RGB_GREEN] = range_limit[y + cgreen];
-    outptr0[RGB_BLUE]  = range_limit[y + cblue];
-    outptr0 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr00++);
-    outptr0[RGB_RED]   = range_limit[y + cred];
-    outptr0[RGB_GREEN] = range_limit[y + cgreen];
-    outptr0[RGB_BLUE]  = range_limit[y + cblue];
-    outptr0 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr01++);
-    outptr1[RGB_RED]   = range_limit[y + cred];
-    outptr1[RGB_GREEN] = range_limit[y + cgreen];
-    outptr1[RGB_BLUE]  = range_limit[y + cblue];
-    outptr1 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr01++);
-    outptr1[RGB_RED]   = range_limit[y + cred];
-    outptr1[RGB_GREEN] = range_limit[y + cgreen];
-    outptr1[RGB_BLUE]  = range_limit[y + cblue];
-    outptr1 += RGB_PIXELSIZE;
-  }
-  /* If image width is odd, do the last output column separately */
-  if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue  = Cbbtab[cb];
-    cred   = Crrtab[cr];
-    y  = GETJSAMPLE(*inptr00);
-    outptr0[RGB_RED]   = range_limit[y + cred];
-    outptr0[RGB_GREEN] = range_limit[y + cgreen];
-    outptr0[RGB_BLUE]  = range_limit[y + cblue];
-    y  = GETJSAMPLE(*inptr01);
-    outptr1[RGB_RED]   = range_limit[y + cred];
-    outptr1[RGB_GREEN] = range_limit[y + cgreen];
-    outptr1[RGB_BLUE]  = range_limit[y + cblue];
-  }
-}
-
-
-/*
- * Module initialization routine for merged upsampling/color conversion.
- *
- * NB: this is called under the conditions determined by use_merged_upsample()
- * in jdmaster.c.  That routine MUST correspond to the actual capabilities
- * of this module; no safety checks are made here.
- */
-
-GLOBAL(void)
-jinit_merged_upsampler (j_decompress_ptr cinfo)
-{
-  my_upsample_ptr upsample;
-
-  upsample = (my_upsample_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_upsampler));
-  cinfo->upsample = &upsample->pub;
-  upsample->pub.start_pass = start_pass_merged_upsample;
-  upsample->pub.need_context_rows = FALSE;
-
-  upsample->out_row_width = cinfo->output_width * cinfo->out_color_components;
-
-  if (cinfo->max_v_samp_factor == 2) {
-    upsample->pub.upsample = merged_2v_upsample;
-    upsample->upmethod = h2v2_merged_upsample;
-    /* Allocate a spare row buffer */
-    upsample->spare_row = (JSAMPROW) (*cinfo->mem->alloc_large)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       (size_t) upsample->out_row_width * SIZEOF(JSAMPLE));
-  } else {
-    upsample->pub.upsample = merged_1v_upsample;
-    upsample->upmethod = h2v1_merged_upsample;
-    /* No spare row needed */
-    upsample->spare_row = NULL;
-  }
-
-  if (cinfo->jpeg_color_space == JCS_BG_YCC)
-    build_bg_ycc_rgb_table(cinfo);
-  else
-    build_ycc_rgb_table(cinfo);
-}
-
-#endif /* UPSAMPLE_MERGING_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jdpostct.c b/cmake/externals/libjpeg/jdpostct.c
deleted file mode 100644
index 571563d72..000000000
--- a/cmake/externals/libjpeg/jdpostct.c
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * jdpostct.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the decompression postprocessing controller.
- * This controller manages the upsampling, color conversion, and color
- * quantization/reduction steps; specifically, it controls the buffering
- * between upsample/color conversion and color quantization/reduction.
- *
- * If no color quantization/reduction is required, then this module has no
- * work to do, and it just hands off to the upsample/color conversion code.
- * An integrated upsample/convert/quantize process would replace this module
- * entirely.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Private buffer controller object */
-
-typedef struct {
-  struct jpeg_d_post_controller pub; /* public fields */
-
-  /* Color quantization source buffer: this holds output data from
-   * the upsample/color conversion step to be passed to the quantizer.
-   * For two-pass color quantization, we need a full-image buffer;
-   * for one-pass operation, a strip buffer is sufficient.
-   */
-  jvirt_sarray_ptr whole_image;	/* virtual array, or NULL if one-pass */
-  JSAMPARRAY buffer;		/* strip buffer, or current strip of virtual */
-  JDIMENSION strip_height;	/* buffer size in rows */
-  /* for two-pass mode only: */
-  JDIMENSION starting_row;	/* row # of first row in current strip */
-  JDIMENSION next_row;		/* index of next row to fill/empty in strip */
-} my_post_controller;
-
-typedef my_post_controller * my_post_ptr;
-
-
-/* Forward declarations */
-METHODDEF(void) post_process_1pass
-	JPP((j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	     JDIMENSION in_row_groups_avail,
-	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	     JDIMENSION out_rows_avail));
-#ifdef QUANT_2PASS_SUPPORTED
-METHODDEF(void) post_process_prepass
-	JPP((j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	     JDIMENSION in_row_groups_avail,
-	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	     JDIMENSION out_rows_avail));
-METHODDEF(void) post_process_2pass
-	JPP((j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	     JDIMENSION in_row_groups_avail,
-	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	     JDIMENSION out_rows_avail));
-#endif
-
-
-/*
- * Initialize for a processing pass.
- */
-
-METHODDEF(void)
-start_pass_dpost (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
-{
-  my_post_ptr post = (my_post_ptr) cinfo->post;
-
-  switch (pass_mode) {
-  case JBUF_PASS_THRU:
-    if (cinfo->quantize_colors) {
-      /* Single-pass processing with color quantization. */
-      post->pub.post_process_data = post_process_1pass;
-      /* We could be doing buffered-image output before starting a 2-pass
-       * color quantization; in that case, jinit_d_post_controller did not
-       * allocate a strip buffer.  Use the virtual-array buffer as workspace.
-       */
-      if (post->buffer == NULL) {
-	post->buffer = (*cinfo->mem->access_virt_sarray)
-	  ((j_common_ptr) cinfo, post->whole_image,
-	   (JDIMENSION) 0, post->strip_height, TRUE);
-      }
-    } else {
-      /* For single-pass processing without color quantization,
-       * I have no work to do; just call the upsampler directly.
-       */
-      post->pub.post_process_data = cinfo->upsample->upsample;
-    }
-    break;
-#ifdef QUANT_2PASS_SUPPORTED
-  case JBUF_SAVE_AND_PASS:
-    /* First pass of 2-pass quantization */
-    if (post->whole_image == NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    post->pub.post_process_data = post_process_prepass;
-    break;
-  case JBUF_CRANK_DEST:
-    /* Second pass of 2-pass quantization */
-    if (post->whole_image == NULL)
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    post->pub.post_process_data = post_process_2pass;
-    break;
-#endif /* QUANT_2PASS_SUPPORTED */
-  default:
-    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    break;
-  }
-  post->starting_row = post->next_row = 0;
-}
-
-
-/*
- * Process some data in the one-pass (strip buffer) case.
- * This is used for color precision reduction as well as one-pass quantization.
- */
-
-METHODDEF(void)
-post_process_1pass (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
-{
-  my_post_ptr post = (my_post_ptr) cinfo->post;
-  JDIMENSION num_rows, max_rows;
-
-  /* Fill the buffer, but not more than what we can dump out in one go. */
-  /* Note we rely on the upsampler to detect bottom of image. */
-  max_rows = out_rows_avail - *out_row_ctr;
-  if (max_rows > post->strip_height)
-    max_rows = post->strip_height;
-  num_rows = 0;
-  (*cinfo->upsample->upsample) (cinfo,
-		input_buf, in_row_group_ctr, in_row_groups_avail,
-		post->buffer, &num_rows, max_rows);
-  /* Quantize and emit data. */
-  (*cinfo->cquantize->color_quantize) (cinfo,
-		post->buffer, output_buf + *out_row_ctr, (int) num_rows);
-  *out_row_ctr += num_rows;
-}
-
-
-#ifdef QUANT_2PASS_SUPPORTED
-
-/*
- * Process some data in the first pass of 2-pass quantization.
- */
-
-METHODDEF(void)
-post_process_prepass (j_decompress_ptr cinfo,
-		      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		      JDIMENSION in_row_groups_avail,
-		      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		      JDIMENSION out_rows_avail)
-{
-  my_post_ptr post = (my_post_ptr) cinfo->post;
-  JDIMENSION old_next_row, num_rows;
-
-  /* Reposition virtual buffer if at start of strip. */
-  if (post->next_row == 0) {
-    post->buffer = (*cinfo->mem->access_virt_sarray)
-	((j_common_ptr) cinfo, post->whole_image,
-	 post->starting_row, post->strip_height, TRUE);
-  }
-
-  /* Upsample some data (up to a strip height's worth). */
-  old_next_row = post->next_row;
-  (*cinfo->upsample->upsample) (cinfo,
-		input_buf, in_row_group_ctr, in_row_groups_avail,
-		post->buffer, &post->next_row, post->strip_height);
-
-  /* Allow quantizer to scan new data.  No data is emitted, */
-  /* but we advance out_row_ctr so outer loop can tell when we're done. */
-  if (post->next_row > old_next_row) {
-    num_rows = post->next_row - old_next_row;
-    (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + old_next_row,
-					 (JSAMPARRAY) NULL, (int) num_rows);
-    *out_row_ctr += num_rows;
-  }
-
-  /* Advance if we filled the strip. */
-  if (post->next_row >= post->strip_height) {
-    post->starting_row += post->strip_height;
-    post->next_row = 0;
-  }
-}
-
-
-/*
- * Process some data in the second pass of 2-pass quantization.
- */
-
-METHODDEF(void)
-post_process_2pass (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
-{
-  my_post_ptr post = (my_post_ptr) cinfo->post;
-  JDIMENSION num_rows, max_rows;
-
-  /* Reposition virtual buffer if at start of strip. */
-  if (post->next_row == 0) {
-    post->buffer = (*cinfo->mem->access_virt_sarray)
-	((j_common_ptr) cinfo, post->whole_image,
-	 post->starting_row, post->strip_height, FALSE);
-  }
-
-  /* Determine number of rows to emit. */
-  num_rows = post->strip_height - post->next_row; /* available in strip */
-  max_rows = out_rows_avail - *out_row_ctr; /* available in output area */
-  if (num_rows > max_rows)
-    num_rows = max_rows;
-  /* We have to check bottom of image here, can't depend on upsampler. */
-  max_rows = cinfo->output_height - post->starting_row;
-  if (num_rows > max_rows)
-    num_rows = max_rows;
-
-  /* Quantize and emit data. */
-  (*cinfo->cquantize->color_quantize) (cinfo,
-		post->buffer + post->next_row, output_buf + *out_row_ctr,
-		(int) num_rows);
-  *out_row_ctr += num_rows;
-
-  /* Advance if we filled the strip. */
-  post->next_row += num_rows;
-  if (post->next_row >= post->strip_height) {
-    post->starting_row += post->strip_height;
-    post->next_row = 0;
-  }
-}
-
-#endif /* QUANT_2PASS_SUPPORTED */
-
-
-/*
- * Initialize postprocessing controller.
- */
-
-GLOBAL(void)
-jinit_d_post_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
-{
-  my_post_ptr post;
-
-  post = (my_post_ptr)
-    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_post_controller));
-  cinfo->post = (struct jpeg_d_post_controller *) post;
-  post->pub.start_pass = start_pass_dpost;
-  post->whole_image = NULL;	/* flag for no virtual arrays */
-  post->buffer = NULL;		/* flag for no strip buffer */
-
-  /* Create the quantization buffer, if needed */
-  if (cinfo->quantize_colors) {
-    /* The buffer strip height is max_v_samp_factor, which is typically
-     * an efficient number of rows for upsampling to return.
-     * (In the presence of output rescaling, we might want to be smarter?)
-     */
-    post->strip_height = (JDIMENSION) cinfo->max_v_samp_factor;
-    if (need_full_buffer) {
-      /* Two-pass color quantization: need full-image storage. */
-      /* We round up the number of rows to a multiple of the strip height. */
-#ifdef QUANT_2PASS_SUPPORTED
-      post->whole_image = (*cinfo->mem->request_virt_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
-	 cinfo->output_width * cinfo->out_color_components,
-	 (JDIMENSION) jround_up((long) cinfo->output_height,
-				(long) post->strip_height),
-	 post->strip_height);
-#else
-      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-#endif /* QUANT_2PASS_SUPPORTED */
-    } else {
-      /* One-pass color quantization: just make a strip buffer. */
-      post->buffer = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 cinfo->output_width * cinfo->out_color_components,
-	 post->strip_height);
-    }
-  }
-}
diff --git a/cmake/externals/libjpeg/jdsample.c b/cmake/externals/libjpeg/jdsample.c
deleted file mode 100644
index 15afeafe3..000000000
--- a/cmake/externals/libjpeg/jdsample.c
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
- * jdsample.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2002-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains upsampling routines.
- *
- * Upsampling input data is counted in "row groups".  A row group
- * is defined to be (v_samp_factor * DCT_v_scaled_size / min_DCT_v_scaled_size)
- * sample rows of each component.  Upsampling will normally produce
- * max_v_samp_factor pixel rows from each row group (but this could vary
- * if the upsampler is applying a scale factor of its own).
- *
- * An excellent reference for image resampling is
- *   Digital Image Warping, George Wolberg, 1990.
- *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Pointer to routine to upsample a single component */
-typedef JMETHOD(void, upsample1_ptr,
-		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPIMAGE output_data_ptr));
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_upsampler pub;	/* public fields */
-
-  /* Color conversion buffer.  When using separate upsampling and color
-   * conversion steps, this buffer holds one upsampled row group until it
-   * has been color converted and output.
-   * Note: we do not allocate any storage for component(s) which are full-size,
-   * ie do not need rescaling.  The corresponding entry of color_buf[] is
-   * simply set to point to the input data array, thereby avoiding copying.
-   */
-  JSAMPARRAY color_buf[MAX_COMPONENTS];
-
-  /* Per-component upsampling method pointers */
-  upsample1_ptr methods[MAX_COMPONENTS];
-
-  int next_row_out;		/* counts rows emitted from color_buf */
-  JDIMENSION rows_to_go;	/* counts rows remaining in image */
-
-  /* Height of an input row group for each component. */
-  int rowgroup_height[MAX_COMPONENTS];
-
-  /* These arrays save pixel expansion factors so that int_expand need not
-   * recompute them each time.  They are unused for other upsampling methods.
-   */
-  UINT8 h_expand[MAX_COMPONENTS];
-  UINT8 v_expand[MAX_COMPONENTS];
-} my_upsampler;
-
-typedef my_upsampler * my_upsample_ptr;
-
-
-/*
- * Initialize for an upsampling pass.
- */
-
-METHODDEF(void)
-start_pass_upsample (j_decompress_ptr cinfo)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-
-  /* Mark the conversion buffer empty */
-  upsample->next_row_out = cinfo->max_v_samp_factor;
-  /* Initialize total-height counter for detecting bottom of image */
-  upsample->rows_to_go = cinfo->output_height;
-}
-
-
-/*
- * Control routine to do upsampling (and color conversion).
- *
- * In this version we upsample each component independently.
- * We upsample one row group into the conversion buffer, then apply
- * color conversion a row at a time.
- */
-
-METHODDEF(void)
-sep_upsample (j_decompress_ptr cinfo,
-	      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	      JDIMENSION in_row_groups_avail,
-	      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	      JDIMENSION out_rows_avail)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  int ci;
-  jpeg_component_info * compptr;
-  JDIMENSION num_rows;
-
-  /* Fill the conversion buffer, if it's empty */
-  if (upsample->next_row_out >= cinfo->max_v_samp_factor) {
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      /* Don't bother to upsample an uninteresting component. */
-      if (! compptr->component_needed)
-	continue;
-      /* Invoke per-component upsample method.  Notice we pass a POINTER
-       * to color_buf[ci], so that fullsize_upsample can change it.
-       */
-      (*upsample->methods[ci]) (cinfo, compptr,
-	input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]),
-	upsample->color_buf + ci);
-    }
-    upsample->next_row_out = 0;
-  }
-
-  /* Color-convert and emit rows */
-
-  /* How many we have in the buffer: */
-  num_rows = (JDIMENSION) (cinfo->max_v_samp_factor - upsample->next_row_out);
-  /* Not more than the distance to the end of the image.  Need this test
-   * in case the image height is not a multiple of max_v_samp_factor:
-   */
-  if (num_rows > upsample->rows_to_go) 
-    num_rows = upsample->rows_to_go;
-  /* And not more than what the client can accept: */
-  out_rows_avail -= *out_row_ctr;
-  if (num_rows > out_rows_avail)
-    num_rows = out_rows_avail;
-
-  (*cinfo->cconvert->color_convert) (cinfo, upsample->color_buf,
-				     (JDIMENSION) upsample->next_row_out,
-				     output_buf + *out_row_ctr,
-				     (int) num_rows);
-
-  /* Adjust counts */
-  *out_row_ctr += num_rows;
-  upsample->rows_to_go -= num_rows;
-  upsample->next_row_out += num_rows;
-  /* When the buffer is emptied, declare this input row group consumed */
-  if (upsample->next_row_out >= cinfo->max_v_samp_factor)
-    (*in_row_group_ctr)++;
-}
-
-
-/*
- * These are the routines invoked by sep_upsample to upsample pixel values
- * of a single component.  One row group is processed per call.
- */
-
-
-/*
- * For full-size components, we just make color_buf[ci] point at the
- * input buffer, and thus avoid copying any data.  Note that this is
- * safe only because sep_upsample doesn't declare the input row group
- * "consumed" until we are done color converting and emitting it.
- */
-
-METHODDEF(void)
-fullsize_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		   JSAMPARRAY input_data, JSAMPIMAGE output_data_ptr)
-{
-  *output_data_ptr = input_data;
-}
-
-
-/*
- * This version handles any integral sampling ratios.
- * This is not used for typical JPEG files, so it need not be fast.
- * Nor, for that matter, is it particularly accurate: the algorithm is
- * simple replication of the input pixel onto the corresponding output
- * pixels.  The hi-falutin sampling literature refers to this as a
- * "box filter".  A box filter tends to introduce visible artifacts,
- * so if you are actually going to use 3:1 or 4:1 sampling ratios
- * you would be well advised to improve this code.
- */
-
-METHODDEF(void)
-int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	      JSAMPARRAY input_data, JSAMPIMAGE output_data_ptr)
-{
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  JSAMPARRAY output_data, output_end;
-  register JSAMPROW inptr, outptr;
-  register JSAMPLE invalue;
-  register int h;
-  JSAMPROW outend;
-  int h_expand, v_expand;
-
-  h_expand = upsample->h_expand[compptr->component_index];
-  v_expand = upsample->v_expand[compptr->component_index];
-
-  output_data = *output_data_ptr;
-  output_end = output_data + cinfo->max_v_samp_factor;
-  for (; output_data < output_end; output_data += v_expand) {
-    /* Generate one output row with proper horizontal expansion */
-    inptr = *input_data++;
-    outptr = *output_data;
-    outend = outptr + cinfo->output_width;
-    while (outptr < outend) {
-      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
-      for (h = h_expand; h > 0; h--) {
-	*outptr++ = invalue;
-      }
-    }
-    /* Generate any additional output rows by duplicating the first one */
-    if (v_expand > 1) {
-      jcopy_sample_rows(output_data, output_data + 1,
-			v_expand - 1, cinfo->output_width);
-    }
-  }
-}
-
-
-/*
- * Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
- * It's still a box filter.
- */
-
-METHODDEF(void)
-h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JSAMPARRAY input_data, JSAMPIMAGE output_data_ptr)
-{
-  JSAMPARRAY output_data = *output_data_ptr;
-  register JSAMPROW inptr, outptr;
-  register JSAMPLE invalue;
-  JSAMPROW outend;
-  int outrow;
-
-  for (outrow = 0; outrow < cinfo->max_v_samp_factor; outrow++) {
-    inptr = input_data[outrow];
-    outptr = output_data[outrow];
-    outend = outptr + cinfo->output_width;
-    while (outptr < outend) {
-      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
-      *outptr++ = invalue;
-      *outptr++ = invalue;
-    }
-  }
-}
-
-
-/*
- * Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
- * It's still a box filter.
- */
-
-METHODDEF(void)
-h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JSAMPARRAY input_data, JSAMPIMAGE output_data_ptr)
-{
-  JSAMPARRAY output_data, output_end;
-  register JSAMPROW inptr, outptr;
-  register JSAMPLE invalue;
-  JSAMPROW outend;
-
-  output_data = *output_data_ptr;
-  output_end = output_data + cinfo->max_v_samp_factor;
-  for (; output_data < output_end; output_data += 2) {
-    inptr = *input_data++;
-    outptr = *output_data;
-    outend = outptr + cinfo->output_width;
-    while (outptr < outend) {
-      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
-      *outptr++ = invalue;
-      *outptr++ = invalue;
-    }
-    jcopy_sample_rows(output_data, output_data + 1,
-		      1, cinfo->output_width);
-  }
-}
-
-
-/*
- * Module initialization routine for upsampling.
- */
-
-GLOBAL(void)
-jinit_upsampler (j_decompress_ptr cinfo)
-{
-  my_upsample_ptr upsample;
-  int ci;
-  jpeg_component_info * compptr;
-  int h_in_group, v_in_group, h_out_group, v_out_group;
-
-  upsample = (my_upsample_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_upsampler));
-  cinfo->upsample = &upsample->pub;
-  upsample->pub.start_pass = start_pass_upsample;
-  upsample->pub.upsample = sep_upsample;
-  upsample->pub.need_context_rows = FALSE; /* until we find out differently */
-
-  if (cinfo->CCIR601_sampling)	/* this isn't supported */
-    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
-
-  /* Verify we can handle the sampling factors, select per-component methods,
-   * and create storage as needed.
-   */
-  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-       ci++, compptr++) {
-    /* Don't bother to upsample an uninteresting component. */
-    if (! compptr->component_needed)
-      continue;
-    /* Compute size of an "input group" after IDCT scaling.  This many samples
-     * are to be converted to max_h_samp_factor * max_v_samp_factor pixels.
-     */
-    h_in_group = (compptr->h_samp_factor * compptr->DCT_h_scaled_size) /
-		 cinfo->min_DCT_h_scaled_size;
-    v_in_group = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) /
-		 cinfo->min_DCT_v_scaled_size;
-    h_out_group = cinfo->max_h_samp_factor;
-    v_out_group = cinfo->max_v_samp_factor;
-    upsample->rowgroup_height[ci] = v_in_group; /* save for use later */
-    if (h_in_group == h_out_group && v_in_group == v_out_group) {
-      /* Fullsize components can be processed without any work. */
-      upsample->methods[ci] = fullsize_upsample;
-      continue;		/* don't need to allocate buffer */
-    }
-    if (h_in_group * 2 == h_out_group && v_in_group == v_out_group) {
-      /* Special case for 2h1v upsampling */
-      upsample->methods[ci] = h2v1_upsample;
-    } else if (h_in_group * 2 == h_out_group &&
-	       v_in_group * 2 == v_out_group) {
-      /* Special case for 2h2v upsampling */
-      upsample->methods[ci] = h2v2_upsample;
-    } else if ((h_out_group % h_in_group) == 0 &&
-	       (v_out_group % v_in_group) == 0) {
-      /* Generic integral-factors upsampling method */
-      upsample->methods[ci] = int_upsample;
-      upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group);
-      upsample->v_expand[ci] = (UINT8) (v_out_group / v_in_group);
-    } else
-      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
-    upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       (JDIMENSION) jround_up((long) cinfo->output_width,
-			      (long) cinfo->max_h_samp_factor),
-       (JDIMENSION) cinfo->max_v_samp_factor);
-  }
-}
diff --git a/cmake/externals/libjpeg/jdtrans.c b/cmake/externals/libjpeg/jdtrans.c
deleted file mode 100644
index 22dd47fb5..000000000
--- a/cmake/externals/libjpeg/jdtrans.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * jdtrans.c
- *
- * Copyright (C) 1995-1997, Thomas G. Lane.
- * Modified 2000-2009 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains library routines for transcoding decompression,
- * that is, reading raw DCT coefficient arrays from an input JPEG file.
- * The routines in jdapimin.c will also be needed by a transcoder.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/* Forward declarations */
-LOCAL(void) transdecode_master_selection JPP((j_decompress_ptr cinfo));
-
-
-/*
- * Read the coefficient arrays from a JPEG file.
- * jpeg_read_header must be completed before calling this.
- *
- * The entire image is read into a set of virtual coefficient-block arrays,
- * one per component.  The return value is a pointer to the array of
- * virtual-array descriptors.  These can be manipulated directly via the
- * JPEG memory manager, or handed off to jpeg_write_coefficients().
- * To release the memory occupied by the virtual arrays, call
- * jpeg_finish_decompress() when done with the data.
- *
- * An alternative usage is to simply obtain access to the coefficient arrays
- * during a buffered-image-mode decompression operation.  This is allowed
- * after any jpeg_finish_output() call.  The arrays can be accessed until
- * jpeg_finish_decompress() is called.  (Note that any call to the library
- * may reposition the arrays, so don't rely on access_virt_barray() results
- * to stay valid across library calls.)
- *
- * Returns NULL if suspended.  This case need be checked only if
- * a suspending data source is used.
- */
-
-GLOBAL(jvirt_barray_ptr *)
-jpeg_read_coefficients (j_decompress_ptr cinfo)
-{
-  if (cinfo->global_state == DSTATE_READY) {
-    /* First call: initialize active modules */
-    transdecode_master_selection(cinfo);
-    cinfo->global_state = DSTATE_RDCOEFS;
-  }
-  if (cinfo->global_state == DSTATE_RDCOEFS) {
-    /* Absorb whole file into the coef buffer */
-    for (;;) {
-      int retcode;
-      /* Call progress monitor hook if present */
-      if (cinfo->progress != NULL)
-	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-      /* Absorb some more input */
-      retcode = (*cinfo->inputctl->consume_input) (cinfo);
-      if (retcode == JPEG_SUSPENDED)
-	return NULL;
-      if (retcode == JPEG_REACHED_EOI)
-	break;
-      /* Advance progress counter if appropriate */
-      if (cinfo->progress != NULL &&
-	  (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
-	if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
-	  /* startup underestimated number of scans; ratchet up one scan */
-	  cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
-	}
-      }
-    }
-    /* Set state so that jpeg_finish_decompress does the right thing */
-    cinfo->global_state = DSTATE_STOPPING;
-  }
-  /* At this point we should be in state DSTATE_STOPPING if being used
-   * standalone, or in state DSTATE_BUFIMAGE if being invoked to get access
-   * to the coefficients during a full buffered-image-mode decompression.
-   */
-  if ((cinfo->global_state == DSTATE_STOPPING ||
-       cinfo->global_state == DSTATE_BUFIMAGE) && cinfo->buffered_image) {
-    return cinfo->coef->coef_arrays;
-  }
-  /* Oops, improper usage */
-  ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  return NULL;			/* keep compiler happy */
-}
-
-
-/*
- * Master selection of decompression modules for transcoding.
- * This substitutes for jdmaster.c's initialization of the full decompressor.
- */
-
-LOCAL(void)
-transdecode_master_selection (j_decompress_ptr cinfo)
-{
-  /* This is effectively a buffered-image operation. */
-  cinfo->buffered_image = TRUE;
-
-  /* Compute output image dimensions and related values. */
-  jpeg_core_output_dimensions(cinfo);
-
-  /* Entropy decoding: either Huffman or arithmetic coding. */
-  if (cinfo->arith_code)
-    jinit_arith_decoder(cinfo);
-  else {
-    jinit_huff_decoder(cinfo);
-  }
-
-  /* Always get a full-image coefficient buffer. */
-  jinit_d_coef_controller(cinfo, TRUE);
-
-  /* We can now tell the memory manager to allocate virtual arrays. */
-  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
-
-  /* Initialize input side of decompressor to consume first scan. */
-  (*cinfo->inputctl->start_input_pass) (cinfo);
-
-  /* Initialize progress monitoring. */
-  if (cinfo->progress != NULL) {
-    int nscans;
-    /* Estimate number of scans to set pass_limit. */
-    if (cinfo->progressive_mode) {
-      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
-      nscans = 2 + 3 * cinfo->num_components;
-    } else if (cinfo->inputctl->has_multiple_scans) {
-      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
-      nscans = cinfo->num_components;
-    } else {
-      nscans = 1;
-    }
-    cinfo->progress->pass_counter = 0L;
-    cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
-    cinfo->progress->completed_passes = 0;
-    cinfo->progress->total_passes = 1;
-  }
-}
diff --git a/cmake/externals/libjpeg/jerror.c b/cmake/externals/libjpeg/jerror.c
deleted file mode 100644
index 7163af699..000000000
--- a/cmake/externals/libjpeg/jerror.c
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * jerror.c
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2012-2015 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains simple error-reporting and trace-message routines.
- * These are suitable for Unix-like systems and others where writing to
- * stderr is the right thing to do.  Many applications will want to replace
- * some or all of these routines.
- *
- * If you define USE_WINDOWS_MESSAGEBOX in jconfig.h or in the makefile,
- * you get a Windows-specific hack to display error messages in a dialog box.
- * It ain't much, but it beats dropping error messages into the bit bucket,
- * which is what happens to output to stderr under most Windows C compilers.
- *
- * These routines are used by both the compression and decompression code.
- */
-
-#ifdef USE_WINDOWS_MESSAGEBOX
-#include <windows.h>
-#endif
-
-/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jversion.h"
-#include "jerror.h"
-
-#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
-#define EXIT_FAILURE  1
-#endif
-
-
-/*
- * Create the message string table.
- * We do this from the master message list in jerror.h by re-reading
- * jerror.h with a suitable definition for macro JMESSAGE.
- * The message table is made an external symbol just in case any applications
- * want to refer to it directly.
- */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_std_message_table	jMsgTable
-#endif
-
-#define JMESSAGE(code,string)	string ,
-
-const char * const jpeg_std_message_table[] = {
-#include "jerror.h"
-  NULL
-};
-
-
-/*
- * Error exit handler: must not return to caller.
- *
- * Applications may override this if they want to get control back after
- * an error.  Typically one would longjmp somewhere instead of exiting.
- * The setjmp buffer can be made a private field within an expanded error
- * handler object.  Note that the info needed to generate an error message
- * is stored in the error object, so you can generate the message now or
- * later, at your convenience.
- * You should make sure that the JPEG object is cleaned up (with jpeg_abort
- * or jpeg_destroy) at some point.
- */
-
-METHODDEF(noreturn_t)
-error_exit (j_common_ptr cinfo)
-{
-  /* Always display the message */
-  (*cinfo->err->output_message) (cinfo);
-
-  /* Let the memory manager delete any temp files before we die */
-  jpeg_destroy(cinfo);
-
-  exit(EXIT_FAILURE);
-}
-
-
-/*
- * Actual output of an error or trace message.
- * Applications may override this method to send JPEG messages somewhere
- * other than stderr.
- *
- * On Windows, printing to stderr is generally completely useless,
- * so we provide optional code to produce an error-dialog popup.
- * Most Windows applications will still prefer to override this routine,
- * but if they don't, it'll do something at least marginally useful.
- *
- * NOTE: to use the library in an environment that doesn't support the
- * C stdio library, you may have to delete the call to fprintf() entirely,
- * not just not use this routine.
- */
-
-METHODDEF(void)
-output_message (j_common_ptr cinfo)
-{
-  char buffer[JMSG_LENGTH_MAX];
-
-  /* Create the message */
-  (*cinfo->err->format_message) (cinfo, buffer);
-
-#ifdef USE_WINDOWS_MESSAGEBOX
-  /* Display it in a message dialog box */
-  MessageBox(GetActiveWindow(), buffer, "JPEG Library Error",
-	     MB_OK | MB_ICONERROR);
-#else
-  /* Send it to stderr, adding a newline */
-  fprintf(stderr, "%s\n", buffer);
-#endif
-}
-
-
-/*
- * Decide whether to emit a trace or warning message.
- * msg_level is one of:
- *   -1: recoverable corrupt-data warning, may want to abort.
- *    0: important advisory messages (always display to user).
- *    1: first level of tracing detail.
- *    2,3,...: successively more detailed tracing messages.
- * An application might override this method if it wanted to abort on warnings
- * or change the policy about which messages to display.
- */
-
-METHODDEF(void)
-emit_message (j_common_ptr cinfo, int msg_level)
-{
-  struct jpeg_error_mgr * err = cinfo->err;
-
-  if (msg_level < 0) {
-    /* It's a warning message.  Since corrupt files may generate many warnings,
-     * the policy implemented here is to show only the first warning,
-     * unless trace_level >= 3.
-     */
-    if (err->num_warnings == 0 || err->trace_level >= 3)
-      (*err->output_message) (cinfo);
-    /* Always count warnings in num_warnings. */
-    err->num_warnings++;
-  } else {
-    /* It's a trace message.  Show it if trace_level >= msg_level. */
-    if (err->trace_level >= msg_level)
-      (*err->output_message) (cinfo);
-  }
-}
-
-
-/*
- * Format a message string for the most recent JPEG error or message.
- * The message is stored into buffer, which should be at least JMSG_LENGTH_MAX
- * characters.  Note that no '\n' character is added to the string.
- * Few applications should need to override this method.
- */
-
-METHODDEF(void)
-format_message (j_common_ptr cinfo, char * buffer)
-{
-  struct jpeg_error_mgr * err = cinfo->err;
-  int msg_code = err->msg_code;
-  const char * msgtext = NULL;
-  const char * msgptr;
-  char ch;
-  boolean isstring;
-
-  /* Look up message string in proper table */
-  if (msg_code > 0 && msg_code <= err->last_jpeg_message) {
-    msgtext = err->jpeg_message_table[msg_code];
-  } else if (err->addon_message_table != NULL &&
-	     msg_code >= err->first_addon_message &&
-	     msg_code <= err->last_addon_message) {
-    msgtext = err->addon_message_table[msg_code - err->first_addon_message];
-  }
-
-  /* Defend against bogus message number */
-  if (msgtext == NULL) {
-    err->msg_parm.i[0] = msg_code;
-    msgtext = err->jpeg_message_table[0];
-  }
-
-  /* Check for string parameter, as indicated by %s in the message text */
-  isstring = FALSE;
-  msgptr = msgtext;
-  while ((ch = *msgptr++) != '\0') {
-    if (ch == '%') {
-      if (*msgptr == 's') isstring = TRUE;
-      break;
-    }
-  }
-
-  /* Format the message into the passed buffer */
-  if (isstring)
-    sprintf(buffer, msgtext, err->msg_parm.s);
-  else
-    sprintf(buffer, msgtext,
-	    err->msg_parm.i[0], err->msg_parm.i[1],
-	    err->msg_parm.i[2], err->msg_parm.i[3],
-	    err->msg_parm.i[4], err->msg_parm.i[5],
-	    err->msg_parm.i[6], err->msg_parm.i[7]);
-}
-
-
-/*
- * Reset error state variables at start of a new image.
- * This is called during compression startup to reset trace/error
- * processing to default state, without losing any application-specific
- * method pointers.  An application might possibly want to override
- * this method if it has additional error processing state.
- */
-
-METHODDEF(void)
-reset_error_mgr (j_common_ptr cinfo)
-{
-  cinfo->err->num_warnings = 0;
-  /* trace_level is not reset since it is an application-supplied parameter */
-  cinfo->err->msg_code = 0;	/* may be useful as a flag for "no error" */
-}
-
-
-/*
- * Fill in the standard error-handling methods in a jpeg_error_mgr object.
- * Typical call is:
- *	struct jpeg_compress_struct cinfo;
- *	struct jpeg_error_mgr err;
- *
- *	cinfo.err = jpeg_std_error(&err);
- * after which the application may override some of the methods.
- */
-
-GLOBAL(struct jpeg_error_mgr *)
-jpeg_std_error (struct jpeg_error_mgr * err)
-{
-  err->error_exit = error_exit;
-  err->emit_message = emit_message;
-  err->output_message = output_message;
-  err->format_message = format_message;
-  err->reset_error_mgr = reset_error_mgr;
-
-  err->trace_level = 0;		/* default = no tracing */
-  err->num_warnings = 0;	/* no warnings emitted yet */
-  err->msg_code = 0;		/* may be useful as a flag for "no error" */
-
-  /* Initialize message table pointers */
-  err->jpeg_message_table = jpeg_std_message_table;
-  err->last_jpeg_message = (int) JMSG_LASTMSGCODE - 1;
-
-  err->addon_message_table = NULL;
-  err->first_addon_message = 0;	/* for safety */
-  err->last_addon_message = 0;
-
-  return err;
-}
diff --git a/cmake/externals/libjpeg/jerror.h b/cmake/externals/libjpeg/jerror.h
deleted file mode 100644
index db608b9c0..000000000
--- a/cmake/externals/libjpeg/jerror.h
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * jerror.h
- *
- * Copyright (C) 1994-1997, Thomas G. Lane.
- * Modified 1997-2018 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file defines the error and message codes for the JPEG library.
- * Edit this file to add new codes, or to translate the message strings to
- * some other language.
- * A set of error-reporting macros are defined too.  Some applications using
- * the JPEG library may wish to include this file to get the error codes
- * and/or the macros.
- */
-
-/*
- * To define the enum list of message codes, include this file without
- * defining macro JMESSAGE.  To create a message string table, include it
- * again with a suitable JMESSAGE definition (see jerror.c for an example).
- */
-#ifndef JMESSAGE
-#ifndef JERROR_H
-/* First time through, define the enum list */
-#define JMAKE_ENUM_LIST
-#else
-/* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */
-#define JMESSAGE(code,string)
-#endif /* JERROR_H */
-#endif /* JMESSAGE */
-
-#ifdef JMAKE_ENUM_LIST
-
-typedef enum {
-
-#define JMESSAGE(code,string)	code ,
-
-#endif /* JMAKE_ENUM_LIST */
-
-JMESSAGE(JMSG_NOMESSAGE, "Bogus message code %d") /* Must be first entry! */
-
-/* For maintenance convenience, list is alphabetical by message code name */
-JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix")
-JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix")
-JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode")
-JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS")
-JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
-JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range")
-JMESSAGE(JERR_BAD_DCTSIZE, "DCT scaled block size %dx%d not supported")
-JMESSAGE(JERR_BAD_DROP_SAMPLING,
-	 "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
-JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition")
-JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace")
-JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace")
-JMESSAGE(JERR_BAD_LENGTH, "Bogus marker length")
-JMESSAGE(JERR_BAD_LIB_VERSION,
-	 "Wrong JPEG library version: library is %d, caller expects %d")
-JMESSAGE(JERR_BAD_MCU_SIZE, "Sampling factors too large for interleaved scan")
-JMESSAGE(JERR_BAD_POOL_ID, "Invalid memory pool code %d")
-JMESSAGE(JERR_BAD_PRECISION, "Unsupported JPEG data precision %d")
-JMESSAGE(JERR_BAD_PROGRESSION,
-	 "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d")
-JMESSAGE(JERR_BAD_PROG_SCRIPT,
-	 "Invalid progressive parameters at scan script entry %d")
-JMESSAGE(JERR_BAD_SAMPLING, "Bogus sampling factors")
-JMESSAGE(JERR_BAD_SCAN_SCRIPT, "Invalid scan script at entry %d")
-JMESSAGE(JERR_BAD_STATE, "Improper call to JPEG library in state %d")
-JMESSAGE(JERR_BAD_STRUCT_SIZE,
-	 "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u")
-JMESSAGE(JERR_BAD_VIRTUAL_ACCESS, "Bogus virtual array access")
-JMESSAGE(JERR_BUFFER_SIZE, "Buffer passed to JPEG library is too small")
-JMESSAGE(JERR_CANT_SUSPEND, "Suspension not allowed here")
-JMESSAGE(JERR_CCIR601_NOTIMPL, "CCIR601 sampling not implemented yet")
-JMESSAGE(JERR_COMPONENT_COUNT, "Too many color components: %d, max %d")
-JMESSAGE(JERR_CONVERSION_NOTIMPL, "Unsupported color conversion request")
-JMESSAGE(JERR_DAC_INDEX, "Bogus DAC index %d")
-JMESSAGE(JERR_DAC_VALUE, "Bogus DAC value 0x%x")
-JMESSAGE(JERR_DHT_INDEX, "Bogus DHT index %d")
-JMESSAGE(JERR_DQT_INDEX, "Bogus DQT index %d")
-JMESSAGE(JERR_EMPTY_IMAGE, "Empty JPEG image (DNL not supported)")
-JMESSAGE(JERR_EMS_READ, "Read from EMS failed")
-JMESSAGE(JERR_EMS_WRITE, "Write to EMS failed")
-JMESSAGE(JERR_EOI_EXPECTED, "Didn't expect more than one scan")
-JMESSAGE(JERR_FILE_READ, "Input file read error")
-JMESSAGE(JERR_FILE_WRITE, "Output file write error --- out of disk space?")
-JMESSAGE(JERR_FRACT_SAMPLE_NOTIMPL, "Fractional sampling not implemented yet")
-JMESSAGE(JERR_HUFF_CLEN_OUTOFBOUNDS, "Huffman code size table out of bounds")
-JMESSAGE(JERR_HUFF_MISSING_CODE, "Missing Huffman code table entry")
-JMESSAGE(JERR_IMAGE_TOO_BIG, "Maximum supported image dimension is %u pixels")
-JMESSAGE(JERR_INPUT_EMPTY, "Empty input file")
-JMESSAGE(JERR_INPUT_EOF, "Premature end of input file")
-JMESSAGE(JERR_MISMATCHED_QUANT_TABLE,
-	 "Cannot transcode due to multiple use of quantization table %d")
-JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
-JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
-JMESSAGE(JERR_NOTIMPL, "Not implemented yet")
-JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
-JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
-JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported")
-JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined")
-JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image")
-JMESSAGE(JERR_NO_QUANT_TABLE, "Quantization table 0x%02x was not defined")
-JMESSAGE(JERR_NO_SOI, "Not a JPEG file: starts with 0x%02x 0x%02x")
-JMESSAGE(JERR_OUT_OF_MEMORY, "Insufficient memory (case %d)")
-JMESSAGE(JERR_QUANT_COMPONENTS,
-	 "Cannot quantize more than %d color components")
-JMESSAGE(JERR_QUANT_FEW_COLORS, "Cannot quantize to fewer than %d colors")
-JMESSAGE(JERR_QUANT_MANY_COLORS, "Cannot quantize to more than %d colors")
-JMESSAGE(JERR_SOF_BEFORE, "Invalid JPEG file structure: %s before SOF")
-JMESSAGE(JERR_SOF_DUPLICATE, "Invalid JPEG file structure: two SOF markers")
-JMESSAGE(JERR_SOF_NO_SOS, "Invalid JPEG file structure: missing SOS marker")
-JMESSAGE(JERR_SOF_UNSUPPORTED, "Unsupported JPEG process: SOF type 0x%02x")
-JMESSAGE(JERR_SOI_DUPLICATE, "Invalid JPEG file structure: two SOI markers")
-JMESSAGE(JERR_TFILE_CREATE, "Failed to create temporary file %s")
-JMESSAGE(JERR_TFILE_READ, "Read failed on temporary file")
-JMESSAGE(JERR_TFILE_SEEK, "Seek failed on temporary file")
-JMESSAGE(JERR_TFILE_WRITE,
-	 "Write failed on temporary file --- out of disk space?")
-JMESSAGE(JERR_TOO_LITTLE_DATA, "Application transferred too few scanlines")
-JMESSAGE(JERR_UNKNOWN_MARKER, "Unsupported marker type 0x%02x")
-JMESSAGE(JERR_VIRTUAL_BUG, "Virtual array controller messed up")
-JMESSAGE(JERR_WIDTH_OVERFLOW, "Image too wide for this implementation")
-JMESSAGE(JERR_XMS_READ, "Read from XMS failed")
-JMESSAGE(JERR_XMS_WRITE, "Write to XMS failed")
-JMESSAGE(JMSG_COPYRIGHT, JCOPYRIGHT)
-JMESSAGE(JMSG_VERSION, JVERSION)
-JMESSAGE(JTRC_16BIT_TABLES,
-	 "Caution: quantization tables are too coarse for baseline JPEG")
-JMESSAGE(JTRC_ADOBE,
-	 "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d")
-JMESSAGE(JTRC_APP0, "Unknown APP0 marker (not JFIF), length %u")
-JMESSAGE(JTRC_APP14, "Unknown APP14 marker (not Adobe), length %u")
-JMESSAGE(JTRC_DAC, "Define Arithmetic Table 0x%02x: 0x%02x")
-JMESSAGE(JTRC_DHT, "Define Huffman Table 0x%02x")
-JMESSAGE(JTRC_DQT, "Define Quantization Table %d  precision %d")
-JMESSAGE(JTRC_DRI, "Define Restart Interval %u")
-JMESSAGE(JTRC_EMS_CLOSE, "Freed EMS handle %u")
-JMESSAGE(JTRC_EMS_OPEN, "Obtained EMS handle %u")
-JMESSAGE(JTRC_EOI, "End Of Image")
-JMESSAGE(JTRC_HUFFBITS, "        %3d %3d %3d %3d %3d %3d %3d %3d")
-JMESSAGE(JTRC_JFIF, "JFIF APP0 marker: version %d.%02d, density %dx%d  %d")
-JMESSAGE(JTRC_JFIF_BADTHUMBNAILSIZE,
-	 "Warning: thumbnail image size does not match data length %u")
-JMESSAGE(JTRC_JFIF_EXTENSION,
-	 "JFIF extension marker: type 0x%02x, length %u")
-JMESSAGE(JTRC_JFIF_THUMBNAIL, "    with %d x %d thumbnail image")
-JMESSAGE(JTRC_MISC_MARKER, "Miscellaneous marker 0x%02x, length %u")
-JMESSAGE(JTRC_PARMLESS_MARKER, "Unexpected marker 0x%02x")
-JMESSAGE(JTRC_QUANTVALS, "        %4u %4u %4u %4u %4u %4u %4u %4u")
-JMESSAGE(JTRC_QUANT_3_NCOLORS, "Quantizing to %d = %d*%d*%d colors")
-JMESSAGE(JTRC_QUANT_NCOLORS, "Quantizing to %d colors")
-JMESSAGE(JTRC_QUANT_SELECTED, "Selected %d colors for quantization")
-JMESSAGE(JTRC_RECOVERY_ACTION, "At marker 0x%02x, recovery action %d")
-JMESSAGE(JTRC_RST, "RST%d")
-JMESSAGE(JTRC_SMOOTH_NOTIMPL,
-	 "Smoothing not supported with nonstandard sampling ratios")
-JMESSAGE(JTRC_SOF, "Start Of Frame 0x%02x: width=%u, height=%u, components=%d")
-JMESSAGE(JTRC_SOF_COMPONENT, "    Component %d: %dhx%dv q=%d")
-JMESSAGE(JTRC_SOI, "Start of Image")
-JMESSAGE(JTRC_SOS, "Start Of Scan: %d components")
-JMESSAGE(JTRC_SOS_COMPONENT, "    Component %d: dc=%d ac=%d")
-JMESSAGE(JTRC_SOS_PARAMS, "  Ss=%d, Se=%d, Ah=%d, Al=%d")
-JMESSAGE(JTRC_TFILE_CLOSE, "Closed temporary file %s")
-JMESSAGE(JTRC_TFILE_OPEN, "Opened temporary file %s")
-JMESSAGE(JTRC_THUMB_JPEG,
-	 "JFIF extension marker: JPEG-compressed thumbnail image, length %u")
-JMESSAGE(JTRC_THUMB_PALETTE,
-	 "JFIF extension marker: palette thumbnail image, length %u")
-JMESSAGE(JTRC_THUMB_RGB,
-	 "JFIF extension marker: RGB thumbnail image, length %u")
-JMESSAGE(JTRC_UNKNOWN_IDS,
-	 "Unrecognized component IDs %d %d %d, assuming YCbCr")
-JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
-JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
-JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
-JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
-JMESSAGE(JWRN_BOGUS_PROGRESSION,
-	 "Inconsistent progression sequence for component %d coefficient %d")
-JMESSAGE(JWRN_EXTRANEOUS_DATA,
-	 "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x")
-JMESSAGE(JWRN_HIT_MARKER, "Corrupt JPEG data: premature end of data segment")
-JMESSAGE(JWRN_HUFF_BAD_CODE, "Corrupt JPEG data: bad Huffman code")
-JMESSAGE(JWRN_JFIF_MAJOR, "Warning: unknown JFIF revision number %d.%02d")
-JMESSAGE(JWRN_JPEG_EOF, "Premature end of JPEG file")
-JMESSAGE(JWRN_MUST_RESYNC,
-	 "Corrupt JPEG data: found marker 0x%02x instead of RST%d")
-JMESSAGE(JWRN_NOT_SEQUENTIAL, "Invalid SOS parameters for sequential JPEG")
-JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines")
-
-#ifdef JMAKE_ENUM_LIST
-
-  JMSG_LASTMSGCODE
-} J_MESSAGE_CODE;
-
-#undef JMAKE_ENUM_LIST
-#endif /* JMAKE_ENUM_LIST */
-
-/* Zap JMESSAGE macro so that future re-inclusions do nothing by default */
-#undef JMESSAGE
-
-
-#ifndef JERROR_H
-#define JERROR_H
-
-/* Macros to simplify using the error and trace message stuff */
-/* The first parameter is either type of cinfo pointer */
-
-/* Fatal errors (print message and exit) */
-#define ERREXIT(cinfo,code)  \
-  ((cinfo)->err->msg_code = (code), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXIT1(cinfo,code,p1)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXIT2(cinfo,code,p1,p2)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXIT3(cinfo,code,p1,p2,p3)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (cinfo)->err->msg_parm.i[2] = (p3), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXIT4(cinfo,code,p1,p2,p3,p4)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (cinfo)->err->msg_parm.i[2] = (p3), \
-   (cinfo)->err->msg_parm.i[3] = (p4), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXIT6(cinfo,code,p1,p2,p3,p4,p5,p6)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (cinfo)->err->msg_parm.i[2] = (p3), \
-   (cinfo)->err->msg_parm.i[3] = (p4), \
-   (cinfo)->err->msg_parm.i[4] = (p5), \
-   (cinfo)->err->msg_parm.i[5] = (p6), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-#define ERREXITS(cinfo,code,str)  \
-  ((cinfo)->err->msg_code = (code), \
-   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
-   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
-
-#define MAKESTMT(stuff)		do { stuff } while (0)
-
-/* Nonfatal errors (we can keep going, but the data is probably corrupt) */
-#define WARNMS(cinfo,code)  \
-  ((cinfo)->err->msg_code = (code), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
-#define WARNMS1(cinfo,code,p1)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
-#define WARNMS2(cinfo,code,p1,p2)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
-
-/* Informational/debugging messages */
-#define TRACEMS(cinfo,lvl,code)  \
-  ((cinfo)->err->msg_code = (code), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
-#define TRACEMS1(cinfo,lvl,code,p1)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
-#define TRACEMS2(cinfo,lvl,code,p1,p2)  \
-  ((cinfo)->err->msg_code = (code), \
-   (cinfo)->err->msg_parm.i[0] = (p1), \
-   (cinfo)->err->msg_parm.i[1] = (p2), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
-#define TRACEMS3(cinfo,lvl,code,p1,p2,p3)  \
-  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
-#define TRACEMS4(cinfo,lvl,code,p1,p2,p3,p4)  \
-  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
-#define TRACEMS5(cinfo,lvl,code,p1,p2,p3,p4,p5)  \
-  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
-	   _mp[4] = (p5); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
-#define TRACEMS8(cinfo,lvl,code,p1,p2,p3,p4,p5,p6,p7,p8)  \
-  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
-	   _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
-#define TRACEMSS(cinfo,lvl,code,str)  \
-  ((cinfo)->err->msg_code = (code), \
-   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
-   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
-
-#endif /* JERROR_H */
diff --git a/cmake/externals/libjpeg/jfdctflt.c b/cmake/externals/libjpeg/jfdctflt.c
deleted file mode 100644
index 013f29e0c..000000000
--- a/cmake/externals/libjpeg/jfdctflt.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * jfdctflt.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2003-2017 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a floating-point implementation of the
- * forward DCT (Discrete Cosine Transform).
- *
- * This implementation should be more accurate than either of the integer
- * DCT implementations.  However, it may not give the same results on all
- * machines because of differences in roundoff behavior.  Speed will depend
- * on the hardware's floating point capacity.
- *
- * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
- * on each column.  Direct algorithms are also available, but they are
- * much more complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on Arai, Agui, and Nakajima's algorithm for
- * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
- * Japanese, but the algorithm is described in the Pennebaker & Mitchell
- * JPEG textbook (see REFERENCES section in file README).  The following code
- * is based directly on figure 4-8 in P&M.
- * While an 8-point DCT cannot be done in less than 11 multiplies, it is
- * possible to arrange the computation so that many of the multiplies are
- * simple scalings of the final outputs.  These multiplies can then be
- * folded into the multiplications or divisions by the JPEG quantization
- * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
- * to be done in the DCT itself.
- * The primary disadvantage of this method is that with a fixed-point
- * implementation, accuracy is lost due to imprecise representation of the
- * scaled quantization values.  However, that problem does not arise if
- * we use floating point arithmetic.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_FLOAT_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
-#endif
-
-
-/*
- * Perform the forward DCT on one block of samples.
- *
- * cK represents cos(K*pi/16).
- */
-
-GLOBAL(void)
-jpeg_fdct_float (FAST_FLOAT * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
-  FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;
-  FAST_FLOAT *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-
-  /* Pass 1: process rows. */
-
-  dataptr = data;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Load data into workspace */
-    tmp0 = (FAST_FLOAT) (GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]));
-    tmp7 = (FAST_FLOAT) (GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]));
-    tmp1 = (FAST_FLOAT) (GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]));
-    tmp6 = (FAST_FLOAT) (GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]));
-    tmp2 = (FAST_FLOAT) (GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]));
-    tmp5 = (FAST_FLOAT) (GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]));
-    tmp3 = (FAST_FLOAT) (GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]));
-    tmp4 = (FAST_FLOAT) (GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]));
-
-    /* Even part */
-
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
-    tmp13 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp1 - tmp2;
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = tmp10 + tmp11 - 8 * CENTERJSAMPLE; /* phase 3 */
-    dataptr[4] = tmp10 - tmp11;
-
-    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
-    dataptr[2] = tmp13 + z1;	/* phase 5 */
-    dataptr[6] = tmp13 - z1;
-
-    /* Odd part */
-
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
-    tmp11 = tmp5 + tmp6;
-    tmp12 = tmp6 + tmp7;
-
-    /* The rotator is modified from fig 4-8 to avoid extra negations. */
-    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
-    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
-    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
-    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
-
-    z11 = tmp7 + z3;		/* phase 5 */
-    z13 = tmp7 - z3;
-
-    dataptr[5] = z13 + z2;	/* phase 6 */
-    dataptr[3] = z13 - z2;
-    dataptr[1] = z11 + z4;
-    dataptr[7] = z11 - z4;
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns. */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
-    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
-    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
-    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
-    /* Even part */
-
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
-    tmp13 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp1 - tmp2;
-
-    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
-    dataptr[DCTSIZE*4] = tmp10 - tmp11;
-
-    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
-    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
-    dataptr[DCTSIZE*6] = tmp13 - z1;
-
-    /* Odd part */
-
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
-    tmp11 = tmp5 + tmp6;
-    tmp12 = tmp6 + tmp7;
-
-    /* The rotator is modified from fig 4-8 to avoid extra negations. */
-    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
-    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
-    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
-    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
-
-    z11 = tmp7 + z3;		/* phase 5 */
-    z13 = tmp7 - z3;
-
-    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
-    dataptr[DCTSIZE*3] = z13 - z2;
-    dataptr[DCTSIZE*1] = z11 + z4;
-    dataptr[DCTSIZE*7] = z11 - z4;
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-#endif /* DCT_FLOAT_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jfdctfst.c b/cmake/externals/libjpeg/jfdctfst.c
deleted file mode 100644
index 5e4e01722..000000000
--- a/cmake/externals/libjpeg/jfdctfst.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * jfdctfst.c
- *
- * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2003-2017 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a fast, not so accurate integer implementation of the
- * forward DCT (Discrete Cosine Transform).
- *
- * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
- * on each column.  Direct algorithms are also available, but they are
- * much more complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on Arai, Agui, and Nakajima's algorithm for
- * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
- * Japanese, but the algorithm is described in the Pennebaker & Mitchell
- * JPEG textbook (see REFERENCES section in file README).  The following code
- * is based directly on figure 4-8 in P&M.
- * While an 8-point DCT cannot be done in less than 11 multiplies, it is
- * possible to arrange the computation so that many of the multiplies are
- * simple scalings of the final outputs.  These multiplies can then be
- * folded into the multiplications or divisions by the JPEG quantization
- * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
- * to be done in the DCT itself.
- * The primary disadvantage of this method is that with fixed-point math,
- * accuracy is lost due to imprecise representation of the scaled
- * quantization values.  The smaller the quantization table entry, the less
- * precise the scaled value, so this implementation does worse with high-
- * quality-setting files than with low-quality ones.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_IFAST_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
-#endif
-
-
-/* Scaling decisions are generally the same as in the LL&M algorithm;
- * see jfdctint.c for more details.  However, we choose to descale
- * (right shift) multiplication products as soon as they are formed,
- * rather than carrying additional fractional bits into subsequent additions.
- * This compromises accuracy slightly, but it lets us save a few shifts.
- * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
- * everywhere except in the multiplications proper; this saves a good deal
- * of work on 16-bit-int machines.
- *
- * Again to save a few shifts, the intermediate results between pass 1 and
- * pass 2 are not upscaled, but are represented only to integral precision.
- *
- * A final compromise is to represent the multiplicative constants to only
- * 8 fractional bits, rather than 13.  This saves some shifting work on some
- * machines, and may also reduce the cost of multiplication (since there
- * are fewer one-bits in the constants).
- */
-
-#define CONST_BITS  8
-
-
-/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
- * causing a lot of useless floating-point operations at run time.
- * To get around this we use the following pre-calculated constants.
- * If you change CONST_BITS you may want to add appropriate values.
- * (With a reasonable C compiler, you can just rely on the FIX() macro...)
- */
-
-#if CONST_BITS == 8
-#define FIX_0_382683433  ((INT32)   98)		/* FIX(0.382683433) */
-#define FIX_0_541196100  ((INT32)  139)		/* FIX(0.541196100) */
-#define FIX_0_707106781  ((INT32)  181)		/* FIX(0.707106781) */
-#define FIX_1_306562965  ((INT32)  334)		/* FIX(1.306562965) */
-#else
-#define FIX_0_382683433  FIX(0.382683433)
-#define FIX_0_541196100  FIX(0.541196100)
-#define FIX_0_707106781  FIX(0.707106781)
-#define FIX_1_306562965  FIX(1.306562965)
-#endif
-
-
-/* We can gain a little more speed, with a further compromise in accuracy,
- * by omitting the addition in a descaling shift.  This yields an incorrectly
- * rounded result half the time...
- */
-
-#ifndef USE_ACCURATE_ROUNDING
-#undef DESCALE
-#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
-#endif
-
-
-/* Multiply a DCTELEM variable by an INT32 constant, and immediately
- * descale to yield a DCTELEM result.
- */
-
-#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
-
-
-/*
- * Perform the forward DCT on one block of samples.
- *
- * cK represents cos(K*pi/16).
- */
-
-GLOBAL(void)
-jpeg_fdct_ifast (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  DCTELEM tmp10, tmp11, tmp12, tmp13;
-  DCTELEM z1, z2, z3, z4, z5, z11, z13;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows. */
-
-  dataptr = data;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Load data into workspace */
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
-    tmp7 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
-    tmp6 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
-    tmp5 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
-    tmp4 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
-
-    /* Even part */
-
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
-    tmp13 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp1 - tmp2;
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = tmp10 + tmp11 - 8 * CENTERJSAMPLE; /* phase 3 */
-    dataptr[4] = tmp10 - tmp11;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
-    dataptr[2] = tmp13 + z1;	/* phase 5 */
-    dataptr[6] = tmp13 - z1;
-
-    /* Odd part */
-
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
-    tmp11 = tmp5 + tmp6;
-    tmp12 = tmp6 + tmp7;
-
-    /* The rotator is modified from fig 4-8 to avoid extra negations. */
-    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
-    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
-    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
-    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
-
-    z11 = tmp7 + z3;		/* phase 5 */
-    z13 = tmp7 - z3;
-
-    dataptr[5] = z13 + z2;	/* phase 6 */
-    dataptr[3] = z13 - z2;
-    dataptr[1] = z11 + z4;
-    dataptr[7] = z11 - z4;
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns. */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
-    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
-    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
-    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
-    /* Even part */
-
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
-    tmp13 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp12 = tmp1 - tmp2;
-
-    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
-    dataptr[DCTSIZE*4] = tmp10 - tmp11;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
-    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
-    dataptr[DCTSIZE*6] = tmp13 - z1;
-
-    /* Odd part */
-
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
-    tmp11 = tmp5 + tmp6;
-    tmp12 = tmp6 + tmp7;
-
-    /* The rotator is modified from fig 4-8 to avoid extra negations. */
-    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
-    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
-    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
-    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
-
-    z11 = tmp7 + z3;		/* phase 5 */
-    z13 = tmp7 - z3;
-
-    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
-    dataptr[DCTSIZE*3] = z13 - z2;
-    dataptr[DCTSIZE*1] = z11 + z4;
-    dataptr[DCTSIZE*7] = z11 - z4;
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-#endif /* DCT_IFAST_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jfdctint.c b/cmake/externals/libjpeg/jfdctint.c
deleted file mode 100644
index 05df4750b..000000000
--- a/cmake/externals/libjpeg/jfdctint.c
+++ /dev/null
@@ -1,4415 +0,0 @@
-/*
- * jfdctint.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modification developed 2003-2018 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a slow-but-accurate integer implementation of the
- * forward DCT (Discrete Cosine Transform).
- *
- * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
- * on each column.  Direct algorithms are also available, but they are
- * much more complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on an algorithm described in
- *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
- *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
- *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
- * The primary algorithm described there uses 11 multiplies and 29 adds.
- * We use their alternate method with 12 multiplies and 32 adds.
- * The advantage of this method is that no data path contains more than one
- * multiplication; this allows a very simple and accurate implementation in
- * scaled fixed-point arithmetic, with a minimal number of shifts.
- *
- * We also provide FDCT routines with various input sample block sizes for
- * direct resolution reduction or enlargement and for direct resolving the
- * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
- * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 output DCT block.
- *
- * For N<8 we fill the remaining block coefficients with zero.
- * For N>8 we apply a partial N-point FDCT on the input samples, computing
- * just the lower 8 frequency coefficients and discarding the rest.
- *
- * We must scale the output coefficients of the N-point FDCT appropriately
- * to the standard 8-point FDCT level by 8/N per 1-D pass.  This scaling
- * is folded into the constant multipliers (pass 2) and/or final/initial
- * shifting.
- *
- * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
- * since there would be too many additional constants to pre-calculate.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_ISLOW_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
-#endif
-
-
-/*
- * The poop on this scaling stuff is as follows:
- *
- * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
- * larger than the true DCT outputs.  The final outputs are therefore
- * a factor of N larger than desired; since N=8 this can be cured by
- * a simple right shift at the end of the algorithm.  The advantage of
- * this arrangement is that we save two multiplications per 1-D DCT,
- * because the y0 and y4 outputs need not be divided by sqrt(N).
- * In the IJG code, this factor of 8 is removed by the quantization step
- * (in jcdctmgr.c), NOT in this module.
- *
- * We have to do addition and subtraction of the integer inputs, which
- * is no problem, and multiplication by fractional constants, which is
- * a problem to do in integer arithmetic.  We multiply all the constants
- * by CONST_SCALE and convert them to integer constants (thus retaining
- * CONST_BITS bits of precision in the constants).  After doing a
- * multiplication we have to divide the product by CONST_SCALE, with proper
- * rounding, to produce the correct output.  This division can be done
- * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
- * as long as possible so that partial sums can be added together with
- * full fractional precision.
- *
- * The outputs of the first pass are scaled up by PASS1_BITS bits so that
- * they are represented to better-than-integral precision.  These outputs
- * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
- * with the recommended scaling.  (For 12-bit sample data, the intermediate
- * array is INT32 anyway.)
- *
- * To avoid overflow of the 32-bit intermediate results in pass 2, we must
- * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
- * shows that the values given below are the most effective.
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define CONST_BITS  13
-#define PASS1_BITS  2
-#else
-#define CONST_BITS  13
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
-#endif
-
-/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
- * causing a lot of useless floating-point operations at run time.
- * To get around this we use the following pre-calculated constants.
- * If you change CONST_BITS you may want to add appropriate values.
- * (With a reasonable C compiler, you can just rely on the FIX() macro...)
- */
-
-#if CONST_BITS == 13
-#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
-#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
-#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
-#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
-#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
-#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
-#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
-#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
-#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
-#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
-#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
-#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
-#else
-#define FIX_0_298631336  FIX(0.298631336)
-#define FIX_0_390180644  FIX(0.390180644)
-#define FIX_0_541196100  FIX(0.541196100)
-#define FIX_0_765366865  FIX(0.765366865)
-#define FIX_0_899976223  FIX(0.899976223)
-#define FIX_1_175875602  FIX(1.175875602)
-#define FIX_1_501321110  FIX(1.501321110)
-#define FIX_1_847759065  FIX(1.847759065)
-#define FIX_1_961570560  FIX(1.961570560)
-#define FIX_2_053119869  FIX(2.053119869)
-#define FIX_2_562915447  FIX(2.562915447)
-#define FIX_3_072711026  FIX(3.072711026)
-#endif
-
-
-/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
- * For 8-bit samples with the recommended scaling, all the variable
- * and constant values involved are no more than 16 bits wide, so a
- * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
- * For 12-bit samples, a full 32-bit multiplication will be needed.
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
-#else
-#define MULTIPLY(var,const)  ((var) * (const))
-#endif
-
-
-/*
- * Perform the forward DCT on one block of samples.
- */
-
-GLOBAL(void)
-jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "c1" should be "c6".
-     */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
-
-    tmp10 = tmp0 + tmp3;
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    dataptr[2] = (DCTELEM)
-      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM)
-      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS-PASS1_BITS);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
-    tmp12 += z1;
-    tmp13 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
-    tmp0 += z1 + tmp12;
-    tmp3 += z1 + tmp13;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
-    tmp1 += z1 + tmp13;
-    tmp2 += z1 + tmp12;
-
-    dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "c1" should be "c6".
-     */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
-
-    /* Add fudge factor here for final descale. */
-    tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
-
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS+PASS1_BITS);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
-
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
-    tmp12 += z1;
-    tmp13 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
-    tmp0 += z1 + tmp12;
-    tmp3 += z1 + tmp13;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
-    tmp1 += z1 + tmp13;
-    tmp2 += z1 + tmp12;
-
-    dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-#ifdef DCT_SCALING_SUPPORTED
-
-
-/*
- * Perform the forward DCT on a 7x7 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12;
-  INT32 z1, z2, z3;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * cK represents sqrt(2) * cos(K*pi/14).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 7; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
-    tmp3 = GETJSAMPLE(elemptr[3]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
-
-    z1 = tmp0 + tmp2;
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
-    tmp3 += tmp3;
-    z1 -= tmp3;
-    z1 -= tmp3;
-    z1 = MULTIPLY(z1, FIX(0.353553391));                /* (c2+c6-c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002));       /* (c2+c4-c6)/2 */
-    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123));       /* c6 */
-    dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
-    z1 -= z2;
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734));       /* c4 */
-    dataptr[4] = (DCTELEM)
-      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347));   /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339));   /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
-    tmp1 += tmp2;
-    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268));   /* c5 */
-    tmp0 += tmp3;
-    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693));   /* c3+c1-c5 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/7)**2 = 64/49, which we fold
-   * into the constant multipliers:
-   * cK now represents sqrt(2) * cos(K*pi/14) * 64/49.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 7; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
-    tmp3 = dataptr[DCTSIZE*3];
-
-    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
-    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
-
-    z1 = tmp0 + tmp2;
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
-	      CONST_BITS+PASS1_BITS);
-    tmp3 += tmp3;
-    z1 -= tmp3;
-    z1 -= tmp3;
-    z1 = MULTIPLY(z1, FIX(0.461784020));                /* (c2+c6-c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084));       /* (c2+c4-c6)/2 */
-    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446));       /* c6 */
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS);
-    z1 -= z2;
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509));       /* c4 */
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677));   /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464));   /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
-    tmp1 += tmp2;
-    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310));   /* c5 */
-    tmp0 += tmp3;
-    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355));   /* c3+c1-c5 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 6x6 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2;
-  INT32 tmp10, tmp11, tmp12;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * cK represents sqrt(2) * cos(K*pi/12).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 6; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
-    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
-		    CONST_BITS-PASS1_BITS);
-
-    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
-    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
-    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/6)**2 = 16/9, which we fold
-   * into the constant multipliers:
-   * cK now represents sqrt(2) * cos(K*pi/12) * 16/9.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 6; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
-    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 5x5 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2;
-  INT32 tmp10, tmp11;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * We scale the results further by 2 as part of output adaption
-   * scaling for different DCT size.
-   * cK represents sqrt(2) * cos(K*pi/10).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 5; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
-    tmp2 = GETJSAMPLE(elemptr[2]);
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1));
-    tmp11 = MULTIPLY(tmp11, FIX(0.790569415));          /* (c2+c4)/2 */
-    tmp10 -= tmp2 << 2;
-    tmp10 = MULTIPLY(tmp10, FIX(0.353553391));          /* (c2-c4)/2 */
-    dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS-1);
-    dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS-1);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876));    /* c3 */
-
-    dataptr[1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
-	      CONST_BITS-PASS1_BITS-1);
-    dataptr[3] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
-	      CONST_BITS-PASS1_BITS-1);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/5)**2 = 64/25, which we partially
-   * fold into the constant multipliers (other part was done in pass 1):
-   * cK now represents sqrt(2) * cos(K*pi/10) * 32/25.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 5; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
-    tmp2 = dataptr[DCTSIZE*2];
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)),        /* 32/25 */
-	      CONST_BITS+PASS1_BITS);
-    tmp11 = MULTIPLY(tmp11, FIX(1.011928851));          /* (c2+c4)/2 */
-    tmp10 -= tmp2 << 2;
-    tmp10 = MULTIPLY(tmp10, FIX(0.452548340));          /* (c2-c4)/2 */
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961));    /* c3 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 4x4 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1;
-  INT32 tmp10, tmp11;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * We must also scale the output by (8/4)**2 = 2**2, which we add here.
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 4; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2));
-    dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2));
-
-    /* Odd part */
-
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-3);
-
-    dataptr[1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS-PASS1_BITS-2);
-    dataptr[3] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS-PASS1_BITS-2);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 4; ctr++) {
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
-
-    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
-
-    /* Odd part */
-
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 3x3 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * We scale the results further by 2**2 as part of output adaption
-   * scaling for different DCT size.
-   * cK represents sqrt(2) * cos(K*pi/6).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 3; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
-    tmp1 = GETJSAMPLE(elemptr[1]);
-
-    tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2));
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
-	      CONST_BITS-PASS1_BITS-2);
-
-    /* Odd part */
-
-    dataptr[1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2, FIX(1.224744871)),               /* c1 */
-	      CONST_BITS-PASS1_BITS-2);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/3)**2 = 64/9, which we partially
-   * fold into the constant multipliers (other part was done in pass 1):
-   * cK now represents sqrt(2) * cos(K*pi/6) * 16/9.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 3; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
-    tmp1 = dataptr[DCTSIZE*1];
-
-    tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),        /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2, FIX(2.177324216)),               /* c1 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 2x2 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  DCTELEM tmp0, tmp1, tmp2, tmp3;
-  JSAMPROW elemptr;
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT.
-   */
-
-  /* Row 0 */
-  elemptr = sample_data[0] + start_col;
-
-  tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
-  tmp1 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
-
-  /* Row 1 */
-  elemptr = sample_data[1] + start_col;
-
-  tmp2 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
-  tmp3 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/2)**2 = 2**4.
-   */
-
-  /* Column 0 */
-  /* Apply unsigned->signed conversion. */
-  data[DCTSIZE*0] = (tmp0 + tmp2 - 4 * CENTERJSAMPLE) << 4;
-  data[DCTSIZE*1] = (tmp0 - tmp2) << 4;
-
-  /* Column 1 */
-  data[DCTSIZE*0+1] = (tmp1 + tmp3) << 4;
-  data[DCTSIZE*1+1] = (tmp1 - tmp3) << 4;
-}
-
-
-/*
- * Perform the forward DCT on a 1x1 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  DCTELEM dcval;
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  dcval = GETJSAMPLE(sample_data[0][start_col]);
-
-  /* We leave the result scaled up by an overall factor of 8. */
-  /* We must also scale the output by (8/1)**2 = 2**6. */
-  /* Apply unsigned->signed conversion. */
-  data[0] = (dcval - CENTERJSAMPLE) << 6;
-}
-
-
-/*
- * Perform the forward DCT on a 9x9 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2;
-  DCTELEM workspace[8];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * we scale the results further by 2 as part of output adaption
-   * scaling for different DCT size.
-   * cK represents sqrt(2) * cos(K*pi/18).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[8]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[7]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[6]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[5]);
-    tmp4 = GETJSAMPLE(elemptr[4]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[8]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[7]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]);
-    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]);
-
-    z1 = tmp0 + tmp2 + tmp3;
-    z2 = tmp1 + tmp4;
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM) ((z1 + z2 - 9 * CENTERJSAMPLE) << 1);
-    dataptr[6] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)),  /* c6 */
-	      CONST_BITS-1);
-    z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049));        /* c2 */
-    z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(0.707106781)); /* c6 */
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.083350441))    /* c4 */
-	      + z1 + z2, CONST_BITS-1);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.245575608))    /* c8 */
-	      + z1 - z2, CONST_BITS-1);
-
-    /* Odd part */
-
-    dataptr[3] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.224744871)), /* c3 */
-	      CONST_BITS-1);
-
-    tmp11 = MULTIPLY(tmp11, FIX(1.224744871));        /* c3 */
-    tmp0 = MULTIPLY(tmp10 + tmp12, FIX(0.909038955)); /* c5 */
-    tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.483689525)); /* c7 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS-1);
-
-    tmp2 = MULTIPLY(tmp12 - tmp13, FIX(1.392728481)); /* c1 */
-
-    dataptr[5] = (DCTELEM) DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS-1);
-    dataptr[7] = (DCTELEM) DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS-1);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 9)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/9)**2 = 64/81, which we partially
-   * fold into the constant multipliers and final/initial shifting:
-   * cK now represents sqrt(2) * cos(K*pi/18) * 128/81.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*0];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*7];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*6];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*5];
-    tmp4 = dataptr[DCTSIZE*4];
-
-    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*0];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*7];
-    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*6];
-    tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*5];
-
-    z1 = tmp0 + tmp2 + tmp3;
-    z2 = tmp1 + tmp4;
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 + z2, FIX(1.580246914)),       /* 128/81 */
-	      CONST_BITS+2);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 - z2 - z2, FIX(1.117403309)),  /* c6 */
-	      CONST_BITS+2);
-    z1 = MULTIPLY(tmp0 - tmp2, FIX(2.100031287));        /* c2 */
-    z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(1.117403309)); /* c6 */
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.711961190))    /* c4 */
-	      + z1 + z2, CONST_BITS+2);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.388070096))    /* c8 */
-	      + z1 - z2, CONST_BITS+2);
-
-    /* Odd part */
-
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.935399303)), /* c3 */
-	      CONST_BITS+2);
-
-    tmp11 = MULTIPLY(tmp11, FIX(1.935399303));        /* c3 */
-    tmp0 = MULTIPLY(tmp10 + tmp12, FIX(1.436506004)); /* c5 */
-    tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.764348879)); /* c7 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS+2);
-
-    tmp2 = MULTIPLY(tmp12 - tmp13, FIX(2.200854883)); /* c1 */
-
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS+2);
-    dataptr[DCTSIZE*7] = (DCTELEM)
-      DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS+2);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 10x10 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  DCTELEM workspace[8*2];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * we scale the results further by 2 as part of output adaption
-   * scaling for different DCT size.
-   * cK represents sqrt(2) * cos(K*pi/20).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
-    tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
-
-    tmp10 = tmp0 + tmp4;
-    tmp13 = tmp0 - tmp4;
-    tmp11 = tmp1 + tmp3;
-    tmp14 = tmp1 - tmp3;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << 1);
-    tmp12 += tmp12;
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
-	      MULTIPLY(tmp11 - tmp12, FIX(0.437016024)),  /* c8 */
-	      CONST_BITS-1);
-    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876));    /* c6 */
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)),  /* c2-c6 */
-	      CONST_BITS-1);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)),  /* c2+c6 */
-	      CONST_BITS-1);
-
-    /* Odd part */
-
-    tmp10 = tmp0 + tmp4;
-    tmp11 = tmp1 - tmp3;
-    dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << 1);
-    tmp2 <<= CONST_BITS;
-    dataptr[1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) +          /* c1 */
-	      MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 +   /* c3 */
-	      MULTIPLY(tmp3, FIX(0.642039522)) +          /* c7 */
-	      MULTIPLY(tmp4, FIX(0.221231742)),           /* c9 */
-	      CONST_BITS-1);
-    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) -     /* (c3+c7)/2 */
-	    MULTIPLY(tmp1 + tmp3, FIX(0.587785252));      /* (c1-c9)/2 */
-    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) +   /* (c3-c7)/2 */
-	    (tmp11 << (CONST_BITS - 1)) - tmp2;
-    dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-1);
-    dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-1);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 10)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/10)**2 = 16/25, which we partially
-   * fold into the constant multipliers and final/initial shifting:
-   * cK now represents sqrt(2) * cos(K*pi/20) * 32/25.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
-    tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
-    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
-
-    tmp10 = tmp0 + tmp4;
-    tmp13 = tmp0 - tmp4;
-    tmp11 = tmp1 + tmp3;
-    tmp14 = tmp1 - tmp3;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
-    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
-    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
-	      CONST_BITS+2);
-    tmp12 += tmp12;
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
-	      MULTIPLY(tmp11 - tmp12, FIX(0.559380511)),  /* c8 */
-	      CONST_BITS+2);
-    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961));    /* c6 */
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)),  /* c2-c6 */
-	      CONST_BITS+2);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)),  /* c2+c6 */
-	      CONST_BITS+2);
-
-    /* Odd part */
-
-    tmp10 = tmp0 + tmp4;
-    tmp11 = tmp1 - tmp3;
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)),  /* 32/25 */
-	      CONST_BITS+2);
-    tmp2 = MULTIPLY(tmp2, FIX(1.28));                     /* 32/25 */
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) +          /* c1 */
-	      MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 +   /* c3 */
-	      MULTIPLY(tmp3, FIX(0.821810588)) +          /* c7 */
-	      MULTIPLY(tmp4, FIX(0.283176630)),           /* c9 */
-	      CONST_BITS+2);
-    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) -     /* (c3+c7)/2 */
-	    MULTIPLY(tmp1 + tmp3, FIX(0.752365123));      /* (c1-c9)/2 */
-    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) +   /* (c3-c7)/2 */
-	    MULTIPLY(tmp11, FIX(0.64)) - tmp2;            /* 16/25 */
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+2);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+2);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on an 11x11 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 z1, z2, z3;
-  DCTELEM workspace[8*3];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * we scale the results further by 2 as part of output adaption
-   * scaling for different DCT size.
-   * cK represents sqrt(2) * cos(K*pi/22).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[10]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[9]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[8]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[7]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[6]);
-    tmp5 = GETJSAMPLE(elemptr[5]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[10]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]);
-    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]);
-    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE) << 1);
-    tmp5 += tmp5;
-    tmp0 -= tmp5;
-    tmp1 -= tmp5;
-    tmp2 -= tmp5;
-    tmp3 -= tmp5;
-    tmp4 -= tmp5;
-    z1 = MULTIPLY(tmp0 + tmp3, FIX(1.356927976)) +       /* c2 */
-	 MULTIPLY(tmp2 + tmp4, FIX(0.201263574));        /* c10 */
-    z2 = MULTIPLY(tmp1 - tmp3, FIX(0.926112931));        /* c6 */
-    z3 = MULTIPLY(tmp0 - tmp1, FIX(1.189712156));        /* c4 */
-    dataptr[2] = (DCTELEM)
-      DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.018300590)) /* c2+c8-c6 */
-	      - MULTIPLY(tmp4, FIX(1.390975730)),        /* c4+c10 */
-	      CONST_BITS-1);
-    dataptr[4] = (DCTELEM)
-      DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.062335650)) /* c4-c6-c10 */
-	      - MULTIPLY(tmp2, FIX(1.356927976))         /* c2 */
-	      + MULTIPLY(tmp4, FIX(0.587485545)),        /* c8 */
-	      CONST_BITS-1);
-    dataptr[6] = (DCTELEM)
-      DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.620527200)) /* c2+c4-c6 */
-	      - MULTIPLY(tmp2, FIX(0.788749120)),        /* c8+c10 */
-	      CONST_BITS-1);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.286413905));    /* c3 */
-    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.068791298));    /* c5 */
-    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.764581576));    /* c7 */
-    tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.719967871)) /* c7+c5+c3-c1 */
-	   + MULTIPLY(tmp14, FIX(0.398430003));          /* c9 */
-    tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.764581576));  /* -c7 */
-    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.399818907));  /* -c1 */
-    tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.276416582)) /* c9+c7+c1-c3 */
-	    - MULTIPLY(tmp14, FIX(1.068791298));         /* c5 */
-    tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.398430003));   /* c9 */
-    tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(1.989053629)) /* c9+c5+c3-c7 */
-	    + MULTIPLY(tmp14, FIX(1.399818907));         /* c1 */
-    tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.305598626)) /* c1+c5-c9-c7 */
-	    - MULTIPLY(tmp14, FIX(1.286413905));         /* c3 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-1);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-1);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-1);
-    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-1);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 11)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/11)**2 = 64/121, which we partially
-   * fold into the constant multipliers and final/initial shifting:
-   * cK now represents sqrt(2) * cos(K*pi/22) * 128/121.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*2];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*1];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*0];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*7];
-    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*6];
-    tmp5 = dataptr[DCTSIZE*5];
-
-    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*2];
-    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*1];
-    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*0];
-    tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*7];
-    tmp14 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*6];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5,
-		       FIX(1.057851240)),                /* 128/121 */
-	      CONST_BITS+2);
-    tmp5 += tmp5;
-    tmp0 -= tmp5;
-    tmp1 -= tmp5;
-    tmp2 -= tmp5;
-    tmp3 -= tmp5;
-    tmp4 -= tmp5;
-    z1 = MULTIPLY(tmp0 + tmp3, FIX(1.435427942)) +       /* c2 */
-	 MULTIPLY(tmp2 + tmp4, FIX(0.212906922));        /* c10 */
-    z2 = MULTIPLY(tmp1 - tmp3, FIX(0.979689713));        /* c6 */
-    z3 = MULTIPLY(tmp0 - tmp1, FIX(1.258538479));        /* c4 */
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.077210542)) /* c2+c8-c6 */
-	      - MULTIPLY(tmp4, FIX(1.471445400)),        /* c4+c10 */
-	      CONST_BITS+2);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.065941844)) /* c4-c6-c10 */
-	      - MULTIPLY(tmp2, FIX(1.435427942))         /* c2 */
-	      + MULTIPLY(tmp4, FIX(0.621472312)),        /* c8 */
-	      CONST_BITS+2);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.714276708)) /* c2+c4-c6 */
-	      - MULTIPLY(tmp2, FIX(0.834379234)),        /* c8+c10 */
-	      CONST_BITS+2);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.360834544));    /* c3 */
-    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.130622199));    /* c5 */
-    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.808813568));    /* c7 */
-    tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.819470145)) /* c7+c5+c3-c1 */
-	   + MULTIPLY(tmp14, FIX(0.421479672));          /* c9 */
-    tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.808813568));  /* -c7 */
-    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.480800167));  /* -c1 */
-    tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.350258864)) /* c9+c7+c1-c3 */
-	    - MULTIPLY(tmp14, FIX(1.130622199));         /* c5 */
-    tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.421479672));   /* c9 */
-    tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(2.104122847)) /* c9+c5+c3-c7 */
-	    + MULTIPLY(tmp14, FIX(1.480800167));         /* c1 */
-    tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.381129125)) /* c1+c5-c9-c7 */
-	    - MULTIPLY(tmp14, FIX(1.360834544));         /* c3 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 12x12 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  DCTELEM workspace[8*4];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT.
-   * cK represents sqrt(2) * cos(K*pi/24).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
-
-    tmp10 = tmp0 + tmp5;
-    tmp13 = tmp0 - tmp5;
-    tmp11 = tmp1 + tmp4;
-    tmp14 = tmp1 - tmp4;
-    tmp12 = tmp2 + tmp3;
-    tmp15 = tmp2 - tmp3;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE);
-    dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
-	      CONST_BITS);
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
-	      CONST_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100);    /* c9 */
-    tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865);   /* c3-c9 */
-    tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065);   /* c3+c9 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054));   /* c5 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669));   /* c7 */
-    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
-	    + MULTIPLY(tmp5, FIX(0.184591911));        /* c11 */
-    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
-    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
-	    + MULTIPLY(tmp5, FIX(0.860918669));        /* c7 */
-    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
-	    - MULTIPLY(tmp5, FIX(1.121971054));        /* c5 */
-    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
-	    - MULTIPLY(tmp2 + tmp5, FIX_0_541196100);  /* c9 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 12)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/12)**2 = 4/9, which we partially
-   * fold into the constant multipliers and final shifting:
-   * cK now represents sqrt(2) * cos(K*pi/24) * 8/9.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
-    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
-    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
-
-    tmp10 = tmp0 + tmp5;
-    tmp13 = tmp0 - tmp5;
-    tmp11 = tmp1 + tmp4;
-    tmp14 = tmp1 - tmp4;
-    tmp12 = tmp2 + tmp3;
-    tmp15 = tmp2 - tmp3;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
-    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
-    tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
-	      CONST_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
-	      CONST_BITS+1);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)),         /* c4 */
-	      CONST_BITS+1);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) +        /* 8/9 */
-	      MULTIPLY(tmp13 + tmp15, FIX(1.214244803)),         /* c2 */
-	      CONST_BITS+1);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200));   /* c9 */
-    tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102));  /* c3-c9 */
-    tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502));  /* c3+c9 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603));   /* c5 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039));   /* c7 */
-    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
-	    + MULTIPLY(tmp5, FIX(0.164081699));        /* c11 */
-    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
-    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
-	    + MULTIPLY(tmp5, FIX(0.765261039));        /* c7 */
-    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
-	    - MULTIPLY(tmp5, FIX(0.997307603));        /* c5 */
-    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
-	    - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+1);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 13x13 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  INT32 z1, z2;
-  DCTELEM workspace[8*5];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT.
-   * cK represents sqrt(2) * cos(K*pi/26).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[12]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[11]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[10]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[9]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[8]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[7]);
-    tmp6 = GETJSAMPLE(elemptr[6]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[12]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[11]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]);
-    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]);
-    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]);
-    tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      (tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE);
-    tmp6 += tmp6;
-    tmp0 -= tmp6;
-    tmp1 -= tmp6;
-    tmp2 -= tmp6;
-    tmp3 -= tmp6;
-    tmp4 -= tmp6;
-    tmp5 -= tmp6;
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.373119086)) +   /* c2 */
-	      MULTIPLY(tmp1, FIX(1.058554052)) +   /* c6 */
-	      MULTIPLY(tmp2, FIX(0.501487041)) -   /* c10 */
-	      MULTIPLY(tmp3, FIX(0.170464608)) -   /* c12 */
-	      MULTIPLY(tmp4, FIX(0.803364869)) -   /* c8 */
-	      MULTIPLY(tmp5, FIX(1.252223920)),    /* c4 */
-	      CONST_BITS);
-    z1 = MULTIPLY(tmp0 - tmp2, FIX(1.155388986)) - /* (c4+c6)/2 */
-	 MULTIPLY(tmp3 - tmp4, FIX(0.435816023)) - /* (c2-c10)/2 */
-	 MULTIPLY(tmp1 - tmp5, FIX(0.316450131));  /* (c8-c12)/2 */
-    z2 = MULTIPLY(tmp0 + tmp2, FIX(0.096834934)) - /* (c4-c6)/2 */
-	 MULTIPLY(tmp3 + tmp4, FIX(0.937303064)) + /* (c2+c10)/2 */
-	 MULTIPLY(tmp1 + tmp5, FIX(0.486914739));  /* (c8+c12)/2 */
-
-    dataptr[4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS);
-    dataptr[6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.322312651));   /* c3 */
-    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.163874945));   /* c5 */
-    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.937797057)) +  /* c7 */
-	   MULTIPLY(tmp14 + tmp15, FIX(0.338443458));   /* c11 */
-    tmp0 = tmp1 + tmp2 + tmp3 -
-	   MULTIPLY(tmp10, FIX(2.020082300)) +          /* c3+c5+c7-c1 */
-	   MULTIPLY(tmp14, FIX(0.318774355));           /* c9-c11 */
-    tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.937797057)) -  /* c7 */
-	   MULTIPLY(tmp11 + tmp12, FIX(0.338443458));   /* c11 */
-    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.163874945)); /* -c5 */
-    tmp1 += tmp4 + tmp5 +
-	    MULTIPLY(tmp11, FIX(0.837223564)) -         /* c5+c9+c11-c3 */
-	    MULTIPLY(tmp14, FIX(2.341699410));          /* c1+c7 */
-    tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.657217813)); /* -c9 */
-    tmp2 += tmp4 + tmp6 -
-	    MULTIPLY(tmp12, FIX(1.572116027)) +         /* c1+c5-c9-c11 */
-	    MULTIPLY(tmp15, FIX(2.260109708));          /* c3+c7 */
-    tmp3 += tmp5 + tmp6 +
-	    MULTIPLY(tmp13, FIX(2.205608352)) -         /* c3+c5+c9-c7 */
-	    MULTIPLY(tmp15, FIX(1.742345811));          /* c1+c11 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 13)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/13)**2 = 64/169, which we partially
-   * fold into the constant multipliers and final shifting:
-   * cK now represents sqrt(2) * cos(K*pi/26) * 128/169.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*4];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*3];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*2];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*1];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*0];
-    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*7];
-    tmp6 = dataptr[DCTSIZE*6];
-
-    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*4];
-    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*3];
-    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*2];
-    tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*1];
-    tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*0];
-    tmp15 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*7];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6,
-		       FIX(0.757396450)),          /* 128/169 */
-	      CONST_BITS+1);
-    tmp6 += tmp6;
-    tmp0 -= tmp6;
-    tmp1 -= tmp6;
-    tmp2 -= tmp6;
-    tmp3 -= tmp6;
-    tmp4 -= tmp6;
-    tmp5 -= tmp6;
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.039995521)) +   /* c2 */
-	      MULTIPLY(tmp1, FIX(0.801745081)) +   /* c6 */
-	      MULTIPLY(tmp2, FIX(0.379824504)) -   /* c10 */
-	      MULTIPLY(tmp3, FIX(0.129109289)) -   /* c12 */
-	      MULTIPLY(tmp4, FIX(0.608465700)) -   /* c8 */
-	      MULTIPLY(tmp5, FIX(0.948429952)),    /* c4 */
-	      CONST_BITS+1);
-    z1 = MULTIPLY(tmp0 - tmp2, FIX(0.875087516)) - /* (c4+c6)/2 */
-	 MULTIPLY(tmp3 - tmp4, FIX(0.330085509)) - /* (c2-c10)/2 */
-	 MULTIPLY(tmp1 - tmp5, FIX(0.239678205));  /* (c8-c12)/2 */
-    z2 = MULTIPLY(tmp0 + tmp2, FIX(0.073342435)) - /* (c4-c6)/2 */
-	 MULTIPLY(tmp3 + tmp4, FIX(0.709910013)) + /* (c2+c10)/2 */
-	 MULTIPLY(tmp1 + tmp5, FIX(0.368787494));  /* (c8+c12)/2 */
-
-    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS+1);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.001514908));   /* c3 */
-    tmp2 = MULTIPLY(tmp10 + tmp12, FIX(0.881514751));   /* c5 */
-    tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.710284161)) +  /* c7 */
-	   MULTIPLY(tmp14 + tmp15, FIX(0.256335874));   /* c11 */
-    tmp0 = tmp1 + tmp2 + tmp3 -
-	   MULTIPLY(tmp10, FIX(1.530003162)) +          /* c3+c5+c7-c1 */
-	   MULTIPLY(tmp14, FIX(0.241438564));           /* c9-c11 */
-    tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.710284161)) -  /* c7 */
-	   MULTIPLY(tmp11 + tmp12, FIX(0.256335874));   /* c11 */
-    tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(0.881514751)); /* -c5 */
-    tmp1 += tmp4 + tmp5 +
-	    MULTIPLY(tmp11, FIX(0.634110155)) -         /* c5+c9+c11-c3 */
-	    MULTIPLY(tmp14, FIX(1.773594819));          /* c1+c7 */
-    tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.497774438)); /* -c9 */
-    tmp2 += tmp4 + tmp6 -
-	    MULTIPLY(tmp12, FIX(1.190715098)) +         /* c1+c5-c9-c11 */
-	    MULTIPLY(tmp15, FIX(1.711799069));          /* c3+c7 */
-    tmp3 += tmp5 + tmp6 +
-	    MULTIPLY(tmp13, FIX(1.670519935)) -         /* c3+c5+c9-c7 */
-	    MULTIPLY(tmp15, FIX(1.319646532));          /* c1+c11 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+1);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 14x14 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  DCTELEM workspace[8*6];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT.
-   * cK represents sqrt(2) * cos(K*pi/28).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
-    tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
-    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
-
-    tmp10 = tmp0 + tmp6;
-    tmp14 = tmp0 - tmp6;
-    tmp11 = tmp1 + tmp5;
-    tmp15 = tmp1 - tmp5;
-    tmp12 = tmp2 + tmp4;
-    tmp16 = tmp2 - tmp4;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
-    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      (tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE);
-    tmp13 += tmp13;
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
-	      MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
-	      MULTIPLY(tmp12 - tmp13, FIX(0.881747734)),  /* c8 */
-	      CONST_BITS);
-
-    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686));    /* c6 */
-
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590))   /* c2-c6 */
-	      + MULTIPLY(tmp16, FIX(0.613604268)),        /* c10 */
-	      CONST_BITS);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954))   /* c6+c10 */
-	      - MULTIPLY(tmp16, FIX(1.378756276)),        /* c2 */
-	      CONST_BITS);
-
-    /* Odd part */
-
-    tmp10 = tmp1 + tmp2;
-    tmp11 = tmp5 - tmp4;
-    dataptr[7] = (DCTELEM) (tmp0 - tmp10 + tmp3 - tmp11 - tmp6);
-    tmp3 <<= CONST_BITS;
-    tmp10 = MULTIPLY(tmp10, - FIX(0.158341681));          /* -c13 */
-    tmp11 = MULTIPLY(tmp11, FIX(1.405321284));            /* c1 */
-    tmp10 += tmp11 - tmp3;
-    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) +     /* c5 */
-	    MULTIPLY(tmp4 + tmp6, FIX(0.752406978));      /* c9 */
-    dataptr[5] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
-	      + MULTIPLY(tmp4, FIX(1.119999435)),         /* c1+c11-c9 */
-	      CONST_BITS);
-    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) +     /* c3 */
-	    MULTIPLY(tmp5 - tmp6, FIX(0.467085129));      /* c11 */
-    dataptr[3] = (DCTELEM)
-      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
-	      - MULTIPLY(tmp5, FIX(3.069855259)),         /* c1+c5+c11 */
-	      CONST_BITS);
-    dataptr[1] = (DCTELEM)
-      DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
-	      MULTIPLY(tmp0 + tmp6, FIX(1.126980169)),    /* c3+c5-c1 */
-	      CONST_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 14)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/14)**2 = 16/49, which we partially
-   * fold into the constant multipliers and final shifting:
-   * cK now represents sqrt(2) * cos(K*pi/28) * 32/49.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
-    tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
-    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
-    tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
-
-    tmp10 = tmp0 + tmp6;
-    tmp14 = tmp0 - tmp6;
-    tmp11 = tmp1 + tmp5;
-    tmp15 = tmp1 - tmp5;
-    tmp12 = tmp2 + tmp4;
-    tmp16 = tmp2 - tmp4;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
-    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
-    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
-    tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
-		       FIX(0.653061224)),                 /* 32/49 */
-	      CONST_BITS+1);
-    tmp13 += tmp13;
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
-	      MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
-	      MULTIPLY(tmp12 - tmp13, FIX(0.575835255)),  /* c8 */
-	      CONST_BITS+1);
-
-    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570));    /* c6 */
-
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691))   /* c2-c6 */
-	      + MULTIPLY(tmp16, FIX(0.400721155)),        /* c10 */
-	      CONST_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725))   /* c6+c10 */
-	      - MULTIPLY(tmp16, FIX(0.900412262)),        /* c2 */
-	      CONST_BITS+1);
-
-    /* Odd part */
-
-    tmp10 = tmp1 + tmp2;
-    tmp11 = tmp5 - tmp4;
-    dataptr[DCTSIZE*7] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
-		       FIX(0.653061224)),                 /* 32/49 */
-	      CONST_BITS+1);
-    tmp3  = MULTIPLY(tmp3 , FIX(0.653061224));            /* 32/49 */
-    tmp10 = MULTIPLY(tmp10, - FIX(0.103406812));          /* -c13 */
-    tmp11 = MULTIPLY(tmp11, FIX(0.917760839));            /* c1 */
-    tmp10 += tmp11 - tmp3;
-    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) +     /* c5 */
-	    MULTIPLY(tmp4 + tmp6, FIX(0.491367823));      /* c9 */
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
-	      + MULTIPLY(tmp4, FIX(0.731428202)),         /* c1+c11-c9 */
-	      CONST_BITS+1);
-    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) +     /* c3 */
-	    MULTIPLY(tmp5 - tmp6, FIX(0.305035186));      /* c11 */
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
-	      - MULTIPLY(tmp5, FIX(2.004803435)),         /* c1+c5+c11 */
-	      CONST_BITS+1);
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp11 + tmp12 + tmp3
-	      - MULTIPLY(tmp0, FIX(0.735987049))          /* c3+c5-c1 */
-	      - MULTIPLY(tmp6, FIX(0.082925825)),         /* c9-c11-c13 */
-	      CONST_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 15x15 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 z1, z2, z3;
-  DCTELEM workspace[8*7];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT.
-   * cK represents sqrt(2) * cos(K*pi/30).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[14]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[13]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[12]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[11]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[10]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[9]);
-    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[8]);
-    tmp7 = GETJSAMPLE(elemptr[7]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[14]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[13]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[12]);
-    tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[11]);
-    tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[10]);
-    tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[9]);
-    tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]);
-
-    z1 = tmp0 + tmp4 + tmp5;
-    z2 = tmp1 + tmp3 + tmp6;
-    z3 = tmp2 + tmp7;
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE);
-    z3 += z3;
-    dataptr[6] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */
-	      MULTIPLY(z2 - z3, FIX(0.437016024)),  /* c12 */
-	      CONST_BITS);
-    tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
-    z1 = MULTIPLY(tmp3 - tmp2, FIX(1.531135173)) -  /* c2+c14 */
-         MULTIPLY(tmp6 - tmp2, FIX(2.238241955));   /* c4+c8 */
-    z2 = MULTIPLY(tmp5 - tmp2, FIX(0.798468008)) -  /* c8-c14 */
-	 MULTIPLY(tmp0 - tmp2, FIX(0.091361227));   /* c2-c4 */
-    z3 = MULTIPLY(tmp0 - tmp3, FIX(1.383309603)) +  /* c2 */
-	 MULTIPLY(tmp6 - tmp5, FIX(0.946293579)) +  /* c8 */
-	 MULTIPLY(tmp1 - tmp4, FIX(0.790569415));   /* (c6+c12)/2 */
-
-    dataptr[2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS);
-    dataptr[4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS);
-
-    /* Odd part */
-
-    tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
-		    FIX(1.224744871));                         /* c5 */
-    tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.344997024)) + /* c3 */
-	   MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.831253876));  /* c9 */
-    tmp12 = MULTIPLY(tmp12, FIX(1.224744871));                 /* c5 */
-    tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.406466353)) +         /* c1 */
-	   MULTIPLY(tmp11 + tmp14, FIX(1.344997024)) +         /* c3 */
-	   MULTIPLY(tmp13 + tmp15, FIX(0.575212477));          /* c11 */
-    tmp0 = MULTIPLY(tmp13, FIX(0.475753014)) -                 /* c7-c11 */
-	   MULTIPLY(tmp14, FIX(0.513743148)) +                 /* c3-c9 */
-	   MULTIPLY(tmp16, FIX(1.700497885)) + tmp4 + tmp12;   /* c1+c13 */
-    tmp3 = MULTIPLY(tmp10, - FIX(0.355500862)) -               /* -(c1-c7) */
-	   MULTIPLY(tmp11, FIX(2.176250899)) -                 /* c3+c9 */
-	   MULTIPLY(tmp15, FIX(0.869244010)) + tmp4 - tmp12;   /* c11+c13 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 15)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/15)**2 = 64/225, which we partially
-   * fold into the constant multipliers and final shifting:
-   * cK now represents sqrt(2) * cos(K*pi/30) * 256/225.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*6];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*5];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*4];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*3];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*2];
-    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*1];
-    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*0];
-    tmp7 = dataptr[DCTSIZE*7];
-
-    tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*6];
-    tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*5];
-    tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*4];
-    tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*3];
-    tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*2];
-    tmp15 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*1];
-    tmp16 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*0];
-
-    z1 = tmp0 + tmp4 + tmp5;
-    z2 = tmp1 + tmp3 + tmp6;
-    z3 = tmp2 + tmp7;
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 + z2 + z3, FIX(1.137777778)), /* 256/225 */
-	      CONST_BITS+2);
-    z3 += z3;
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 - z3, FIX(1.301757503)) - /* c6 */
-	      MULTIPLY(z2 - z3, FIX(0.497227121)),  /* c12 */
-	      CONST_BITS+2);
-    tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
-    z1 = MULTIPLY(tmp3 - tmp2, FIX(1.742091575)) -  /* c2+c14 */
-         MULTIPLY(tmp6 - tmp2, FIX(2.546621957));   /* c4+c8 */
-    z2 = MULTIPLY(tmp5 - tmp2, FIX(0.908479156)) -  /* c8-c14 */
-	 MULTIPLY(tmp0 - tmp2, FIX(0.103948774));   /* c2-c4 */
-    z3 = MULTIPLY(tmp0 - tmp3, FIX(1.573898926)) +  /* c2 */
-	 MULTIPLY(tmp6 - tmp5, FIX(1.076671805)) +  /* c8 */
-	 MULTIPLY(tmp1 - tmp4, FIX(0.899492312));   /* (c6+c12)/2 */
-
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS+2);
-    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS+2);
-
-    /* Odd part */
-
-    tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
-		    FIX(1.393487498));                         /* c5 */
-    tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.530307725)) + /* c3 */
-	   MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.945782187));  /* c9 */
-    tmp12 = MULTIPLY(tmp12, FIX(1.393487498));                 /* c5 */
-    tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.600246161)) +         /* c1 */
-	   MULTIPLY(tmp11 + tmp14, FIX(1.530307725)) +         /* c3 */
-	   MULTIPLY(tmp13 + tmp15, FIX(0.654463974));          /* c11 */
-    tmp0 = MULTIPLY(tmp13, FIX(0.541301207)) -                 /* c7-c11 */
-	   MULTIPLY(tmp14, FIX(0.584525538)) +                 /* c3-c9 */
-	   MULTIPLY(tmp16, FIX(1.934788705)) + tmp4 + tmp12;   /* c1+c13 */
-    tmp3 = MULTIPLY(tmp10, - FIX(0.404480980)) -               /* -(c1-c7) */
-	   MULTIPLY(tmp11, FIX(2.476089912)) -                 /* c3+c9 */
-	   MULTIPLY(tmp15, FIX(0.989006518)) + tmp4 - tmp12;   /* c11+c13 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 16x16 sample block.
- */
-
-GLOBAL(void)
-jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
-  DCTELEM workspace[DCTSIZE2];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * cK represents sqrt(2) * cos(K*pi/32).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
-    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
-    tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
-
-    tmp10 = tmp0 + tmp7;
-    tmp14 = tmp0 - tmp7;
-    tmp11 = tmp1 + tmp6;
-    tmp15 = tmp1 - tmp6;
-    tmp12 = tmp2 + tmp5;
-    tmp16 = tmp2 - tmp5;
-    tmp13 = tmp3 + tmp4;
-    tmp17 = tmp3 - tmp4;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
-    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
-    tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
-	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
-	      CONST_BITS-PASS1_BITS);
-
-    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
-	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
-
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
-	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
-	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
-	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
-	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
-	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
-    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
-	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
-    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
-	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
-    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
-	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
-	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
-    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
-	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
-    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
-	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
-    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
-	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == DCTSIZE * 2)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/16)**2 = 1/2**2.
-   * cK represents sqrt(2) * cos(K*pi/32).
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
-    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
-    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
-    tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
-
-    tmp10 = tmp0 + tmp7;
-    tmp14 = tmp0 - tmp7;
-    tmp11 = tmp1 + tmp6;
-    tmp15 = tmp1 - tmp6;
-    tmp12 = tmp2 + tmp5;
-    tmp16 = tmp2 - tmp5;
-    tmp13 = tmp3 + tmp4;
-    tmp17 = tmp3 - tmp4;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
-    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
-    tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
-    tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+2);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
-	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
-	      CONST_BITS+PASS1_BITS+2);
-
-    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
-	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
-
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
-	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+10 */
-	      CONST_BITS+PASS1_BITS+2);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
-	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
-	      CONST_BITS+PASS1_BITS+2);
-
-    /* Odd part */
-
-    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
-	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
-	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
-	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
-    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
-	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
-    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
-	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
-    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
-	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
-	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
-    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
-	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
-    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
-	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
-    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
-	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+2);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+2);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+2);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+2);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 16x8 sample block.
- *
- * 16-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
-  INT32 z1;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
-    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
-    tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
-
-    tmp10 = tmp0 + tmp7;
-    tmp14 = tmp0 - tmp7;
-    tmp11 = tmp1 + tmp6;
-    tmp15 = tmp1 - tmp6;
-    tmp12 = tmp2 + tmp5;
-    tmp16 = tmp2 - tmp5;
-    tmp13 = tmp3 + tmp4;
-    tmp17 = tmp3 - tmp4;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
-    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
-    tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
-	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
-	      CONST_BITS-PASS1_BITS);
-
-    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
-	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
-
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
-	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
-	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
-	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
-	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
-	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
-    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
-	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
-    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
-	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
-    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
-	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
-	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
-    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
-	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
-    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
-	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
-    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
-	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by 8/16 = 1/2.
-   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "c1" should be "c6".
-     */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
-
-    tmp10 = tmp0 + tmp3;
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1);
-    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1);
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);   /* c6 */
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
-	      CONST_BITS+PASS1_BITS+1);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);   /*  c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);      /* -c3+c5 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);      /* -c3-c5 */
-    tmp12 += z1;
-    tmp13 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);   /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);          /*  c1+c3-c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);          /* -c1+c3+c5-c7 */
-    tmp0 += z1 + tmp12;
-    tmp3 += z1 + tmp13;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);   /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);          /*  c1+c3+c5-c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);          /*  c1+c3-c5+c7 */
-    tmp1 += z1 + tmp13;
-    tmp2 += z1 + tmp12;
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+PASS1_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 14x7 sample block.
- *
- * 14-point FDCT in pass 1 (rows), 7-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 z1, z2, z3;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Zero bottom row of output coefficient block. */
-  MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 7; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
-    tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
-    tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
-
-    tmp10 = tmp0 + tmp6;
-    tmp14 = tmp0 - tmp6;
-    tmp11 = tmp1 + tmp5;
-    tmp15 = tmp1 - tmp5;
-    tmp12 = tmp2 + tmp4;
-    tmp16 = tmp2 - tmp4;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
-    tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE) << PASS1_BITS);
-    tmp13 += tmp13;
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
-	      MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
-	      MULTIPLY(tmp12 - tmp13, FIX(0.881747734)),  /* c8 */
-	      CONST_BITS-PASS1_BITS);
-
-    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686));    /* c6 */
-
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590))   /* c2-c6 */
-	      + MULTIPLY(tmp16, FIX(0.613604268)),        /* c10 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954))   /* c6+c10 */
-	      - MULTIPLY(tmp16, FIX(1.378756276)),        /* c2 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = tmp1 + tmp2;
-    tmp11 = tmp5 - tmp4;
-    dataptr[7] = (DCTELEM) ((tmp0 - tmp10 + tmp3 - tmp11 - tmp6) << PASS1_BITS);
-    tmp3 <<= CONST_BITS;
-    tmp10 = MULTIPLY(tmp10, - FIX(0.158341681));          /* -c13 */
-    tmp11 = MULTIPLY(tmp11, FIX(1.405321284));            /* c1 */
-    tmp10 += tmp11 - tmp3;
-    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) +     /* c5 */
-	    MULTIPLY(tmp4 + tmp6, FIX(0.752406978));      /* c9 */
-    dataptr[5] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
-	      + MULTIPLY(tmp4, FIX(1.119999435)),         /* c1+c11-c9 */
-	      CONST_BITS-PASS1_BITS);
-    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) +     /* c3 */
-	    MULTIPLY(tmp5 - tmp6, FIX(0.467085129));      /* c11 */
-    dataptr[3] = (DCTELEM)
-      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
-	      - MULTIPLY(tmp5, FIX(3.069855259)),         /* c1+c5+c11 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[1] = (DCTELEM)
-      DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
-	      MULTIPLY(tmp0 + tmp6, FIX(1.126980169)),    /* c3+c5-c1 */
-	      CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/14)*(8/7) = 32/49, which we
-   * partially fold into the constant multipliers and final shifting:
-   * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14) * 64/49.
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
-    tmp3 = dataptr[DCTSIZE*3];
-
-    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
-    tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
-
-    z1 = tmp0 + tmp2;
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
-	      CONST_BITS+PASS1_BITS+1);
-    tmp3 += tmp3;
-    z1 -= tmp3;
-    z1 -= tmp3;
-    z1 = MULTIPLY(z1, FIX(0.461784020));                /* (c2+c6-c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084));       /* (c2+c4-c6)/2 */
-    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446));       /* c6 */
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS+1);
-    z1 -= z2;
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509));       /* c4 */
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS+1);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677));   /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464));   /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
-    tmp1 += tmp2;
-    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310));   /* c5 */
-    tmp0 += tmp3;
-    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355));   /* c3+c1-c5 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 12x6 sample block.
- *
- * 12-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Zero 2 bottom rows of output coefficient block. */
-  MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 6; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
-    tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
-
-    tmp10 = tmp0 + tmp5;
-    tmp13 = tmp0 - tmp5;
-    tmp11 = tmp1 + tmp4;
-    tmp14 = tmp1 - tmp4;
-    tmp12 = tmp2 + tmp3;
-    tmp15 = tmp2 - tmp3;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
-    tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[6] = (DCTELEM) ((tmp13 - tmp14 - tmp15) << PASS1_BITS);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100);    /* c9 */
-    tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865);   /* c3-c9 */
-    tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065);   /* c3+c9 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054));   /* c5 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669));   /* c7 */
-    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
-	    + MULTIPLY(tmp5, FIX(0.184591911));        /* c11 */
-    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
-    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
-	    + MULTIPLY(tmp5, FIX(0.860918669));        /* c7 */
-    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
-	    - MULTIPLY(tmp5, FIX(1.121971054));        /* c5 */
-    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
-	    - MULTIPLY(tmp2 + tmp5, FIX_0_541196100);  /* c9 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/12)*(8/6) = 8/9, which we
-   * partially fold into the constant multipliers and final shifting:
-   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
-    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
-	      CONST_BITS+PASS1_BITS+1);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 10x5 sample block.
- *
- * 10-point FDCT in pass 1 (rows), 5-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Zero 3 bottom rows of output coefficient block. */
-  MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 5; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
-    tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
-    tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
-
-    tmp10 = tmp0 + tmp4;
-    tmp13 = tmp0 - tmp4;
-    tmp11 = tmp1 + tmp3;
-    tmp14 = tmp1 - tmp3;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
-    tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << PASS1_BITS);
-    tmp12 += tmp12;
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
-	      MULTIPLY(tmp11 - tmp12, FIX(0.437016024)),  /* c8 */
-	      CONST_BITS-PASS1_BITS);
-    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876));    /* c6 */
-    dataptr[2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)),  /* c2-c6 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)),  /* c2+c6 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = tmp0 + tmp4;
-    tmp11 = tmp1 - tmp3;
-    dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << PASS1_BITS);
-    tmp2 <<= CONST_BITS;
-    dataptr[1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) +          /* c1 */
-	      MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 +   /* c3 */
-	      MULTIPLY(tmp3, FIX(0.642039522)) +          /* c7 */
-	      MULTIPLY(tmp4, FIX(0.221231742)),           /* c9 */
-	      CONST_BITS-PASS1_BITS);
-    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) -     /* (c3+c7)/2 */
-	    MULTIPLY(tmp1 + tmp3, FIX(0.587785252));      /* (c1-c9)/2 */
-    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) +   /* (c3-c7)/2 */
-	    (tmp11 << (CONST_BITS - 1)) - tmp2;
-    dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-PASS1_BITS);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/10)*(8/5) = 32/25, which we
-   * fold into the constant multipliers:
-   * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10) * 32/25.
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
-    tmp2 = dataptr[DCTSIZE*2];
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)),        /* 32/25 */
-	      CONST_BITS+PASS1_BITS);
-    tmp11 = MULTIPLY(tmp11, FIX(1.011928851));          /* (c2+c4)/2 */
-    tmp10 -= tmp2 << 2;
-    tmp10 = MULTIPLY(tmp10, FIX(0.452548340));          /* (c2-c4)/2 */
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961));    /* c3 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on an 8x4 sample block.
- *
- * 8-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Zero 4 bottom rows of output coefficient block. */
-  MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * We must also scale the output by 8/4 = 2, which we add here.
-   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 4; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "c1" should be "c6".
-     */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
-
-    tmp10 = tmp0 + tmp3;
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1));
-    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1));
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-2);
-
-    dataptr[2] = (DCTELEM)
-      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS-PASS1_BITS-1);
-    dataptr[6] = (DCTELEM)
-      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS-PASS1_BITS-1);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-2);
-
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
-    tmp12 += z1;
-    tmp13 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
-    tmp0 += z1 + tmp12;
-    tmp3 += z1 + tmp13;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
-    tmp1 += z1 + tmp13;
-    tmp2 += z1 + tmp12;
-
-    dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS-1);
-    dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS-1);
-    dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS-1);
-    dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS-1);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * 4-point FDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
-   */
-
-  dataptr = data;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    /* Add fudge factor here for final descale. */
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
-
-    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
-
-    /* Odd part */
-
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 6x3 sample block.
- *
- * 6-point FDCT in pass 1 (rows), 3-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2;
-  INT32 tmp10, tmp11, tmp12;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * We scale the results further by 2 as part of output adaption
-   * scaling for different DCT size.
-   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 3; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
-    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1));
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
-	      CONST_BITS-PASS1_BITS-1);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
-	      CONST_BITS-PASS1_BITS-1);
-
-    /* Odd part */
-
-    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
-		    CONST_BITS-PASS1_BITS-1);
-
-    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << (PASS1_BITS+1)));
-    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << (PASS1_BITS+1));
-    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << (PASS1_BITS+1)));
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
-   * fold into the constant multipliers (other part was done in pass 1):
-   * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6) * 16/9.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 6; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
-    tmp1 = dataptr[DCTSIZE*1];
-
-    tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),        /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2, FIX(2.177324216)),               /* c1 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 4x2 sample block.
- *
- * 4-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  DCTELEM tmp0, tmp2, tmp10, tmp12, tmp4, tmp5;
-  INT32 tmp1, tmp3, tmp11, tmp13;
-  INT32 z1, z2, z3;
-  JSAMPROW elemptr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT.
-   * 4-point FDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
-   */
-
-  /* Row 0 */
-  elemptr = sample_data[0] + start_col;
-
-  /* Even part */
-
-  tmp4 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
-  tmp5 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
-
-  tmp0 = tmp4 + tmp5;
-  tmp2 = tmp4 - tmp5;
-
-  /* Odd part */
-
-  z2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
-  z3 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
-
-  z1 = MULTIPLY(z2 + z3, FIX_0_541196100);    /* c6 */
-  /* Add fudge factor here for final descale. */
-  z1 += ONE << (CONST_BITS-3-1);
-  tmp1 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
-  tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
-
-  /* Row 1 */
-  elemptr = sample_data[1] + start_col;
-
-  /* Even part */
-
-  tmp4 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
-  tmp5 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
-
-  tmp10 = tmp4 + tmp5;
-  tmp12 = tmp4 - tmp5;
-
-  /* Odd part */
-
-  z2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
-  z3 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
-
-  z1 = MULTIPLY(z2 + z3, FIX_0_541196100);    /* c6 */
-  tmp11 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
-  tmp13 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/4)*(8/2) = 2**3.
-   */
-
-  /* Column 0 */
-  /* Apply unsigned->signed conversion. */
-  data[DCTSIZE*0] = (tmp0 + tmp10 - 8 * CENTERJSAMPLE) << 3;
-  data[DCTSIZE*1] = (tmp0 - tmp10) << 3;
-
-  /* Column 1 */
-  data[DCTSIZE*0+1] = (DCTELEM) RIGHT_SHIFT(tmp1 + tmp11, CONST_BITS-3);
-  data[DCTSIZE*1+1] = (DCTELEM) RIGHT_SHIFT(tmp1 - tmp11, CONST_BITS-3);
-
-  /* Column 2 */
-  data[DCTSIZE*0+2] = (tmp2 + tmp12) << 3;
-  data[DCTSIZE*1+2] = (tmp2 - tmp12) << 3;
-
-  /* Column 3 */
-  data[DCTSIZE*0+3] = (DCTELEM) RIGHT_SHIFT(tmp3 + tmp13, CONST_BITS-3);
-  data[DCTSIZE*1+3] = (DCTELEM) RIGHT_SHIFT(tmp3 - tmp13, CONST_BITS-3);
-}
-
-
-/*
- * Perform the forward DCT on a 2x1 sample block.
- *
- * 2-point FDCT in pass 1 (rows), 1-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  DCTELEM tmp0, tmp1;
-  JSAMPROW elemptr;
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  elemptr = sample_data[0] + start_col;
-
-  tmp0 = GETJSAMPLE(elemptr[0]);
-  tmp1 = GETJSAMPLE(elemptr[1]);
-
-  /* We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/2)*(8/1) = 2**5.
-   */
-
-  /* Even part */
-
-  /* Apply unsigned->signed conversion. */
-  data[0] = (tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5;
-
-  /* Odd part */
-
-  data[1] = (tmp0 - tmp1) << 5;
-}
-
-
-/*
- * Perform the forward DCT on an 8x16 sample block.
- *
- * 8-point FDCT in pass 1 (rows), 16-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
-  INT32 z1;
-  DCTELEM workspace[DCTSIZE2];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "c1" should be "c6".
-     */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
-
-    tmp10 = tmp0 + tmp3;
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
-    tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);   /* c6 */
-    dataptr[2] = (DCTELEM)
-      DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM)
-      DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);   /*  c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);      /* -c3+c5 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);      /* -c3-c5 */
-    tmp12 += z1;
-    tmp13 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);   /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);          /*  c1+c3-c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);          /* -c1+c3+c5-c7 */
-    tmp0 += z1 + tmp12;
-    tmp3 += z1 + tmp13;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);   /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);          /*  c1+c3+c5-c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);          /*  c1+c3-c5+c7 */
-    tmp1 += z1 + tmp13;
-    tmp2 += z1 + tmp12;
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-PASS1_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == DCTSIZE * 2)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by 8/16 = 1/2.
-   * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
-    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
-    tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
-    tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
-
-    tmp10 = tmp0 + tmp7;
-    tmp14 = tmp0 - tmp7;
-    tmp11 = tmp1 + tmp6;
-    tmp15 = tmp1 - tmp6;
-    tmp12 = tmp2 + tmp5;
-    tmp16 = tmp2 - tmp5;
-    tmp13 = tmp3 + tmp4;
-    tmp17 = tmp3 - tmp4;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
-    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
-    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
-    tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
-    tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+1);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
-	      MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
-	      CONST_BITS+PASS1_BITS+1);
-
-    tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
-	    MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
-
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
-	      + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
-	      CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
-	      - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
-	      CONST_BITS+PASS1_BITS+1);
-
-    /* Odd part */
-
-    tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
-	    MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
-	    MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
-	    MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
-    tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
-	    MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
-    tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
-	    MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
-    tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
-	    MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
-	    MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
-    tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
-	     - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
-    tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
-	     + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
-    tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
-	     + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+1);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 7x14 sample block.
- *
- * 7-point FDCT in pass 1 (rows), 14-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 z1, z2, z3;
-  DCTELEM workspace[8*6];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
-    tmp3 = GETJSAMPLE(elemptr[3]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
-    tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
-
-    z1 = tmp0 + tmp2;
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
-    tmp3 += tmp3;
-    z1 -= tmp3;
-    z1 -= tmp3;
-    z1 = MULTIPLY(z1, FIX(0.353553391));                /* (c2+c6-c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002));       /* (c2+c4-c6)/2 */
-    z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123));       /* c6 */
-    dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
-    z1 -= z2;
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734));       /* c4 */
-    dataptr[4] = (DCTELEM)
-      DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347));   /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339));   /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
-    tmp1 += tmp2;
-    tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268));   /* c5 */
-    tmp0 += tmp3;
-    tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693));   /* c3+c1-c5 */
-
-    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 14)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/7)*(8/14) = 32/49, which we
-   * fold into the constant multipliers:
-   * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28) * 32/49.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 7; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
-    tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
-    tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
-    tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
-    tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
-
-    tmp10 = tmp0 + tmp6;
-    tmp14 = tmp0 - tmp6;
-    tmp11 = tmp1 + tmp5;
-    tmp15 = tmp1 - tmp5;
-    tmp12 = tmp2 + tmp4;
-    tmp16 = tmp2 - tmp4;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
-    tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
-    tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
-    tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
-		       FIX(0.653061224)),                 /* 32/49 */
-	      CONST_BITS+PASS1_BITS);
-    tmp13 += tmp13;
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
-	      MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
-	      MULTIPLY(tmp12 - tmp13, FIX(0.575835255)),  /* c8 */
-	      CONST_BITS+PASS1_BITS);
-
-    tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570));    /* c6 */
-
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691))   /* c2-c6 */
-	      + MULTIPLY(tmp16, FIX(0.400721155)),        /* c10 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725))   /* c6+c10 */
-	      - MULTIPLY(tmp16, FIX(0.900412262)),        /* c2 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = tmp1 + tmp2;
-    tmp11 = tmp5 - tmp4;
-    dataptr[DCTSIZE*7] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
-		       FIX(0.653061224)),                 /* 32/49 */
-	      CONST_BITS+PASS1_BITS);
-    tmp3  = MULTIPLY(tmp3 , FIX(0.653061224));            /* 32/49 */
-    tmp10 = MULTIPLY(tmp10, - FIX(0.103406812));          /* -c13 */
-    tmp11 = MULTIPLY(tmp11, FIX(0.917760839));            /* c1 */
-    tmp10 += tmp11 - tmp3;
-    tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) +     /* c5 */
-	    MULTIPLY(tmp4 + tmp6, FIX(0.491367823));      /* c9 */
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
-	      + MULTIPLY(tmp4, FIX(0.731428202)),         /* c1+c11-c9 */
-	      CONST_BITS+PASS1_BITS);
-    tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) +     /* c3 */
-	    MULTIPLY(tmp5 - tmp6, FIX(0.305035186));      /* c11 */
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
-	      - MULTIPLY(tmp5, FIX(2.004803435)),         /* c1+c5+c11 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp11 + tmp12 + tmp3
-	      - MULTIPLY(tmp0, FIX(0.735987049))          /* c3+c5-c1 */
-	      - MULTIPLY(tmp6, FIX(0.082925825)),         /* c9-c11-c13 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 6x12 sample block.
- *
- * 6-point FDCT in pass 1 (rows), 12-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  DCTELEM workspace[8*4];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
-    tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
-    tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
-	      CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
-		    CONST_BITS-PASS1_BITS);
-
-    dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
-    dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
-    dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 12)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/6)*(8/12) = 8/9, which we
-   * fold into the constant multipliers:
-   * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24) * 8/9.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
-    tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
-    tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
-    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
-    tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
-
-    tmp10 = tmp0 + tmp5;
-    tmp13 = tmp0 - tmp5;
-    tmp11 = tmp1 + tmp4;
-    tmp14 = tmp1 - tmp4;
-    tmp12 = tmp2 + tmp3;
-    tmp15 = tmp2 - tmp3;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
-    tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
-    tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
-    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
-    tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)),         /* c4 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) +        /* 8/9 */
-	      MULTIPLY(tmp13 + tmp15, FIX(1.214244803)),         /* c2 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200));   /* c9 */
-    tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102));  /* c3-c9 */
-    tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502));  /* c3+c9 */
-    tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603));   /* c5 */
-    tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039));   /* c7 */
-    tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
-	    + MULTIPLY(tmp5, FIX(0.164081699));        /* c11 */
-    tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
-    tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
-	    + MULTIPLY(tmp5, FIX(0.765261039));        /* c7 */
-    tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
-	    - MULTIPLY(tmp5, FIX(0.997307603));        /* c5 */
-    tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
-	    - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 5x10 sample block.
- *
- * 5-point FDCT in pass 1 (rows), 10-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  DCTELEM workspace[8*2];
-  DCTELEM *dataptr;
-  DCTELEM *wsptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
-   */
-
-  dataptr = data;
-  ctr = 0;
-  for (;;) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
-    tmp2 = GETJSAMPLE(elemptr[2]);
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-
-    tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
-    tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << PASS1_BITS);
-    tmp11 = MULTIPLY(tmp11, FIX(0.790569415));          /* (c2+c4)/2 */
-    tmp10 -= tmp2 << 2;
-    tmp10 = MULTIPLY(tmp10, FIX(0.353553391));          /* (c2-c4)/2 */
-    dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS);
-    dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876));    /* c3 */
-
-    dataptr[1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
-	      CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
-	      CONST_BITS-PASS1_BITS);
-
-    ctr++;
-
-    if (ctr != DCTSIZE) {
-      if (ctr == 10)
-	break;			/* Done. */
-      dataptr += DCTSIZE;	/* advance pointer to next row */
-    } else
-      dataptr = workspace;	/* switch pointer to extended workspace */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/5)*(8/10) = 32/25, which we
-   * fold into the constant multipliers:
-   * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20) * 32/25.
-   */
-
-  dataptr = data;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 5; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
-    tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
-    tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
-    tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
-
-    tmp10 = tmp0 + tmp4;
-    tmp13 = tmp0 - tmp4;
-    tmp11 = tmp1 + tmp3;
-    tmp14 = tmp1 - tmp3;
-
-    tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
-    tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
-    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
-    tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
-	      CONST_BITS+PASS1_BITS);
-    tmp12 += tmp12;
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
-	      MULTIPLY(tmp11 - tmp12, FIX(0.559380511)),  /* c8 */
-	      CONST_BITS+PASS1_BITS);
-    tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961));    /* c6 */
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)),  /* c2-c6 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)),  /* c2+c6 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = tmp0 + tmp4;
-    tmp11 = tmp1 - tmp3;
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)),  /* 32/25 */
-	      CONST_BITS+PASS1_BITS);
-    tmp2 = MULTIPLY(tmp2, FIX(1.28));                     /* 32/25 */
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) +          /* c1 */
-	      MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 +   /* c3 */
-	      MULTIPLY(tmp3, FIX(0.821810588)) +          /* c7 */
-	      MULTIPLY(tmp4, FIX(0.283176630)),           /* c9 */
-	      CONST_BITS+PASS1_BITS);
-    tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) -     /* (c3+c7)/2 */
-	    MULTIPLY(tmp1 + tmp3, FIX(0.752365123));      /* (c1-c9)/2 */
-    tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) +   /* (c3-c7)/2 */
-	    MULTIPLY(tmp11, FIX(0.64)) - tmp2;            /* 16/25 */
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-    wsptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 4x8 sample block.
- *
- * 4-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * We must also scale the output by 8/4 = 2, which we add here.
-   * 4-point FDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
-    tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
-
-    tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
-    tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1));
-    dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1));
-
-    /* Odd part */
-
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-2);
-
-    dataptr[1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS-PASS1_BITS-1);
-    dataptr[3] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS-PASS1_BITS-1);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 4; ctr++) {
-    /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "c1" should be "c6".
-     */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
-
-    /* Add fudge factor here for final descale. */
-    tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
-    tmp12 = tmp0 - tmp3;
-    tmp11 = tmp1 + tmp2;
-    tmp13 = tmp1 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
-    tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
-
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*6] = (DCTELEM)
-      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS+PASS1_BITS);
-
-    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * i0..i3 in the paper are tmp0..tmp3 here.
-     */
-
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS+PASS1_BITS-1);
-
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
-    tmp12 += z1;
-    tmp13 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
-    tmp0 += z1 + tmp12;
-    tmp3 += z1 + tmp13;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
-    tmp1 += z1 + tmp13;
-    tmp2 += z1 + tmp12;
-
-    dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 3x6 sample block.
- *
- * 3-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1, tmp2;
-  INT32 tmp10, tmp11, tmp12;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * We scale the results further by 2 as part of output adaption
-   * scaling for different DCT size.
-   * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 6; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
-    tmp1 = GETJSAMPLE(elemptr[1]);
-
-    tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM)
-      ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1));
-    dataptr[2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
-	      CONST_BITS-PASS1_BITS-1);
-
-    /* Odd part */
-
-    dataptr[1] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp2, FIX(1.224744871)),               /* c1 */
-	      CONST_BITS-PASS1_BITS-1);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We remove the PASS1_BITS scaling, but leave the results scaled up
-   * by an overall factor of 8.
-   * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
-   * fold into the constant multipliers (other part was done in pass 1):
-   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 3; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
-    tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
-
-    tmp10 = tmp0 + tmp2;
-    tmp12 = tmp0 - tmp2;
-
-    tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
-    tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
-    tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
-
-    dataptr[DCTSIZE*0] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*2] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*4] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
-	      CONST_BITS+PASS1_BITS);
-
-    /* Odd part */
-
-    tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
-	      CONST_BITS+PASS1_BITS);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 2x4 sample block.
- *
- * 2-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  INT32 tmp0, tmp1;
-  INT32 tmp10, tmp11;
-  DCTELEM *dataptr;
-  JSAMPROW elemptr;
-  int ctr;
-  SHIFT_TEMPS
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: process rows.
-   * Note results are scaled up by sqrt(8) compared to a true DCT.
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 4; ctr++) {
-    elemptr = sample_data[ctr] + start_col;
-
-    /* Even part */
-
-    tmp0 = GETJSAMPLE(elemptr[0]);
-    tmp1 = GETJSAMPLE(elemptr[1]);
-
-    /* Apply unsigned->signed conversion. */
-    dataptr[0] = (DCTELEM) (tmp0 + tmp1 - 2 * CENTERJSAMPLE);
-
-    /* Odd part */
-
-    dataptr[1] = (DCTELEM) (tmp0 - tmp1);
-
-    dataptr += DCTSIZE;		/* advance pointer to next row */
-  }
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/2)*(8/4) = 2**3.
-   * 4-point FDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
-   */
-
-  dataptr = data;
-  for (ctr = 0; ctr < 2; ctr++) {
-    /* Even part */
-
-    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
-    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
-
-    tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
-    tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
-
-    dataptr[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1) << 3);
-    dataptr[DCTSIZE*2] = (DCTELEM) ((tmp0 - tmp1) << 3);
-
-    /* Odd part */
-
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-3-1);
-
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
-		  CONST_BITS-3);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
-		  CONST_BITS-3);
-
-    dataptr++;			/* advance pointer to next column */
-  }
-}
-
-
-/*
- * Perform the forward DCT on a 1x2 sample block.
- *
- * 1-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
- */
-
-GLOBAL(void)
-jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
-{
-  DCTELEM tmp0, tmp1;
-
-  /* Pre-zero output coefficient block. */
-  MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
-
-  /* Pass 1: empty. */
-
-  /* Pass 2: process columns.
-   * We leave the results scaled up by an overall factor of 8.
-   * We must also scale the output by (8/1)*(8/2) = 2**5.
-   */
-
-  /* Even part */
-
-  tmp0 = GETJSAMPLE(sample_data[0][start_col]);
-  tmp1 = GETJSAMPLE(sample_data[1][start_col]);
-
-  /* Apply unsigned->signed conversion. */
-  data[DCTSIZE*0] = (tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5;
-
-  /* Odd part */
-
-  data[DCTSIZE*1] = (tmp0 - tmp1) << 5;
-}
-
-#endif /* DCT_SCALING_SUPPORTED */
-#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jidctflt.c b/cmake/externals/libjpeg/jidctflt.c
deleted file mode 100644
index e33a2b5e4..000000000
--- a/cmake/externals/libjpeg/jidctflt.c
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * jidctflt.c
- *
- * Copyright (C) 1994-1998, Thomas G. Lane.
- * Modified 2010-2017 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a floating-point implementation of the
- * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
- * must also perform dequantization of the input coefficients.
- *
- * This implementation should be more accurate than either of the integer
- * IDCT implementations.  However, it may not give the same results on all
- * machines because of differences in roundoff behavior.  Speed will depend
- * on the hardware's floating point capacity.
- *
- * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
- * on each row (or vice versa, but it's more convenient to emit a row at
- * a time).  Direct algorithms are also available, but they are much more
- * complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on Arai, Agui, and Nakajima's algorithm for
- * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
- * Japanese, but the algorithm is described in the Pennebaker & Mitchell
- * JPEG textbook (see REFERENCES section in file README).  The following code
- * is based directly on figure 4-8 in P&M.
- * While an 8-point DCT cannot be done in less than 11 multiplies, it is
- * possible to arrange the computation so that many of the multiplies are
- * simple scalings of the final outputs.  These multiplies can then be
- * folded into the multiplications or divisions by the JPEG quantization
- * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
- * to be done in the DCT itself.
- * The primary disadvantage of this method is that with a fixed-point
- * implementation, accuracy is lost due to imprecise representation of the
- * scaled quantization values.  However, that problem does not arise if
- * we use floating point arithmetic.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_FLOAT_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
-#endif
-
-
-/* Dequantize a coefficient by multiplying it by the multiplier-table
- * entry; produce a float result.
- */
-
-#define DEQUANTIZE(coef,quantval)  (((FAST_FLOAT) (coef)) * (quantval))
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients.
- *
- * cK represents cos(K*pi/16).
- */
-
-GLOBAL(void)
-jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
-  FAST_FLOAT z5, z10, z11, z12, z13;
-  JCOEFPTR inptr;
-  FLOAT_MULT_TYPE * quantptr;
-  FAST_FLOAT * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = DCTSIZE; ctr > 0; ctr--) {
-    /* Due to quantization, we will usually find that many of the input
-     * coefficients are zero, especially the AC terms.  We can exploit this
-     * by short-circuiting the IDCT calculation for any column in which all
-     * the AC terms are zero.  In that case each output is equal to the
-     * DC coefficient (with scale factor as needed).
-     * With typical images and quantization tables, half or more of the
-     * column DCT calculations can be simplified this way.
-     */
-
-    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
-      /* AC terms all zero */
-      FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-
-      wsptr[DCTSIZE*0] = dcval;
-      wsptr[DCTSIZE*1] = dcval;
-      wsptr[DCTSIZE*2] = dcval;
-      wsptr[DCTSIZE*3] = dcval;
-      wsptr[DCTSIZE*4] = dcval;
-      wsptr[DCTSIZE*5] = dcval;
-      wsptr[DCTSIZE*6] = dcval;
-      wsptr[DCTSIZE*7] = dcval;
-
-      inptr++;			/* advance pointers to next column */
-      quantptr++;
-      wsptr++;
-      continue;
-    }
-
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp10 = tmp0 + tmp2;	/* phase 3 */
-    tmp11 = tmp0 - tmp2;
-
-    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
-    tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
-
-    tmp0 = tmp10 + tmp13;	/* phase 2 */
-    tmp3 = tmp10 - tmp13;
-    tmp1 = tmp11 + tmp12;
-    tmp2 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    z13 = tmp6 + tmp5;		/* phase 6 */
-    z10 = tmp6 - tmp5;
-    z11 = tmp4 + tmp7;
-    z12 = tmp4 - tmp7;
-
-    tmp7 = z11 + z13;		/* phase 5 */
-    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
-
-    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
-    tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */
-    tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */
-
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
-    tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 - tmp5;
-
-    wsptr[DCTSIZE*0] = tmp0 + tmp7;
-    wsptr[DCTSIZE*7] = tmp0 - tmp7;
-    wsptr[DCTSIZE*1] = tmp1 + tmp6;
-    wsptr[DCTSIZE*6] = tmp1 - tmp6;
-    wsptr[DCTSIZE*2] = tmp2 + tmp5;
-    wsptr[DCTSIZE*5] = tmp2 - tmp5;
-    wsptr[DCTSIZE*3] = tmp3 + tmp4;
-    wsptr[DCTSIZE*4] = tmp3 - tmp4;
-
-    inptr++;			/* advance pointers to next column */
-    quantptr++;
-    wsptr++;
-  }
-
-  /* Pass 2: process rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-    /* Rows of zeroes can be exploited in the same way as we did with columns.
-     * However, the column calculation has created many nonzero AC terms, so
-     * the simplification applies less often (typically 5% to 10% of the time).
-     * And testing floats for zero is relatively expensive, so we don't bother.
-     */
-
-    /* Even part */
-
-    /* Prepare range-limit and float->int conversion */
-    z5 = wsptr[0] + (((FAST_FLOAT) RANGE_CENTER) + ((FAST_FLOAT) 0.5));
-    tmp10 = z5 + wsptr[4];
-    tmp11 = z5 - wsptr[4];
-
-    tmp13 = wsptr[2] + wsptr[6];
-    tmp12 = (wsptr[2] - wsptr[6]) *
-	      ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
-
-    tmp0 = tmp10 + tmp13;
-    tmp3 = tmp10 - tmp13;
-    tmp1 = tmp11 + tmp12;
-    tmp2 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z13 = wsptr[5] + wsptr[3];
-    z10 = wsptr[5] - wsptr[3];
-    z11 = wsptr[1] + wsptr[7];
-    z12 = wsptr[1] - wsptr[7];
-
-    tmp7 = z11 + z13;		/* phase 5 */
-    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
-
-    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
-    tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */
-    tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */
-
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
-    tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 - tmp5;
-
-    /* Final output stage: float->int conversion and range-limit */
-
-    outptr[0] = range_limit[(int) (tmp0 + tmp7) & RANGE_MASK];
-    outptr[7] = range_limit[(int) (tmp0 - tmp7) & RANGE_MASK];
-    outptr[1] = range_limit[(int) (tmp1 + tmp6) & RANGE_MASK];
-    outptr[6] = range_limit[(int) (tmp1 - tmp6) & RANGE_MASK];
-    outptr[2] = range_limit[(int) (tmp2 + tmp5) & RANGE_MASK];
-    outptr[5] = range_limit[(int) (tmp2 - tmp5) & RANGE_MASK];
-    outptr[3] = range_limit[(int) (tmp3 + tmp4) & RANGE_MASK];
-    outptr[4] = range_limit[(int) (tmp3 - tmp4) & RANGE_MASK];
-
-    wsptr += DCTSIZE;		/* advance pointer to next row */
-  }
-}
-
-#endif /* DCT_FLOAT_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jidctfst.c b/cmake/externals/libjpeg/jidctfst.c
deleted file mode 100644
index 1ac3e39cb..000000000
--- a/cmake/externals/libjpeg/jidctfst.c
+++ /dev/null
@@ -1,351 +0,0 @@
-/*
- * jidctfst.c
- *
- * Copyright (C) 1994-1998, Thomas G. Lane.
- * Modified 2015-2017 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a fast, not so accurate integer implementation of the
- * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
- * must also perform dequantization of the input coefficients.
- *
- * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
- * on each row (or vice versa, but it's more convenient to emit a row at
- * a time).  Direct algorithms are also available, but they are much more
- * complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on Arai, Agui, and Nakajima's algorithm for
- * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
- * Japanese, but the algorithm is described in the Pennebaker & Mitchell
- * JPEG textbook (see REFERENCES section in file README).  The following code
- * is based directly on figure 4-8 in P&M.
- * While an 8-point DCT cannot be done in less than 11 multiplies, it is
- * possible to arrange the computation so that many of the multiplies are
- * simple scalings of the final outputs.  These multiplies can then be
- * folded into the multiplications or divisions by the JPEG quantization
- * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
- * to be done in the DCT itself.
- * The primary disadvantage of this method is that with fixed-point math,
- * accuracy is lost due to imprecise representation of the scaled
- * quantization values.  The smaller the quantization table entry, the less
- * precise the scaled value, so this implementation does worse with high-
- * quality-setting files than with low-quality ones.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_IFAST_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
-#endif
-
-
-/* Scaling decisions are generally the same as in the LL&M algorithm;
- * see jidctint.c for more details.  However, we choose to descale
- * (right shift) multiplication products as soon as they are formed,
- * rather than carrying additional fractional bits into subsequent additions.
- * This compromises accuracy slightly, but it lets us save a few shifts.
- * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
- * everywhere except in the multiplications proper; this saves a good deal
- * of work on 16-bit-int machines.
- *
- * The dequantized coefficients are not integers because the AA&N scaling
- * factors have been incorporated.  We represent them scaled up by PASS1_BITS,
- * so that the first and second IDCT rounds have the same input scaling.
- * For 8-bit JSAMPLEs, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
- * avoid a descaling shift; this compromises accuracy rather drastically
- * for small quantization table entries, but it saves a lot of shifts.
- * For 12-bit JSAMPLEs, there's no hope of using 16x16 multiplies anyway,
- * so we use a much larger scaling factor to preserve accuracy.
- *
- * A final compromise is to represent the multiplicative constants to only
- * 8 fractional bits, rather than 13.  This saves some shifting work on some
- * machines, and may also reduce the cost of multiplication (since there
- * are fewer one-bits in the constants).
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define CONST_BITS  8
-#define PASS1_BITS  2
-#else
-#define CONST_BITS  8
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
-#endif
-
-/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
- * causing a lot of useless floating-point operations at run time.
- * To get around this we use the following pre-calculated constants.
- * If you change CONST_BITS you may want to add appropriate values.
- * (With a reasonable C compiler, you can just rely on the FIX() macro...)
- */
-
-#if CONST_BITS == 8
-#define FIX_1_082392200  ((INT32)  277)		/* FIX(1.082392200) */
-#define FIX_1_414213562  ((INT32)  362)		/* FIX(1.414213562) */
-#define FIX_1_847759065  ((INT32)  473)		/* FIX(1.847759065) */
-#define FIX_2_613125930  ((INT32)  669)		/* FIX(2.613125930) */
-#else
-#define FIX_1_082392200  FIX(1.082392200)
-#define FIX_1_414213562  FIX(1.414213562)
-#define FIX_1_847759065  FIX(1.847759065)
-#define FIX_2_613125930  FIX(2.613125930)
-#endif
-
-
-/* We can gain a little more speed, with a further compromise in accuracy,
- * by omitting the addition in a descaling shift.  This yields an incorrectly
- * rounded result half the time...
- */
-
-#ifndef USE_ACCURATE_ROUNDING
-#undef DESCALE
-#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
-#endif
-
-
-/* Multiply a DCTELEM variable by an INT32 constant, and immediately
- * descale to yield a DCTELEM result.
- */
-
-#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
-
-
-/* Dequantize a coefficient by multiplying it by the multiplier-table
- * entry; produce a DCTELEM result.  For 8-bit data a 16x16->16
- * multiplication will do.  For 12-bit data, the multiplier table is
- * declared INT32, so a 32-bit multiply will be used.
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define DEQUANTIZE(coef,quantval)  (((IFAST_MULT_TYPE) (coef)) * (quantval))
-#else
-#define DEQUANTIZE(coef,quantval)  \
-	DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS)
-#endif
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients.
- *
- * cK represents cos(K*pi/16).
- */
-
-GLOBAL(void)
-jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  DCTELEM tmp10, tmp11, tmp12, tmp13;
-  DCTELEM z5, z10, z11, z12, z13;
-  JCOEFPTR inptr;
-  IFAST_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[DCTSIZE2];	/* buffers data between passes */
-  SHIFT_TEMPS			/* for DESCALE */
-  ISHIFT_TEMPS			/* for IRIGHT_SHIFT */
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (IFAST_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = DCTSIZE; ctr > 0; ctr--) {
-    /* Due to quantization, we will usually find that many of the input
-     * coefficients are zero, especially the AC terms.  We can exploit this
-     * by short-circuiting the IDCT calculation for any column in which all
-     * the AC terms are zero.  In that case each output is equal to the
-     * DC coefficient (with scale factor as needed).
-     * With typical images and quantization tables, half or more of the
-     * column DCT calculations can be simplified this way.
-     */
-    
-    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
-      /* AC terms all zero */
-      int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-
-      wsptr[DCTSIZE*0] = dcval;
-      wsptr[DCTSIZE*1] = dcval;
-      wsptr[DCTSIZE*2] = dcval;
-      wsptr[DCTSIZE*3] = dcval;
-      wsptr[DCTSIZE*4] = dcval;
-      wsptr[DCTSIZE*5] = dcval;
-      wsptr[DCTSIZE*6] = dcval;
-      wsptr[DCTSIZE*7] = dcval;
-      
-      inptr++;			/* advance pointers to next column */
-      quantptr++;
-      wsptr++;
-      continue;
-    }
-    
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp10 = tmp0 + tmp2;	/* phase 3 */
-    tmp11 = tmp0 - tmp2;
-
-    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
-    tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
-
-    tmp0 = tmp10 + tmp13;	/* phase 2 */
-    tmp3 = tmp10 - tmp13;
-    tmp1 = tmp11 + tmp12;
-    tmp2 = tmp11 - tmp12;
-    
-    /* Odd part */
-
-    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    z13 = tmp6 + tmp5;		/* phase 6 */
-    z10 = tmp6 - tmp5;
-    z11 = tmp4 + tmp7;
-    z12 = tmp4 - tmp7;
-
-    tmp7 = z11 + z13;		/* phase 5 */
-    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
-
-    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
-    tmp10 = z5 - MULTIPLY(z12, FIX_1_082392200); /* 2*(c2-c6) */
-    tmp12 = z5 - MULTIPLY(z10, FIX_2_613125930); /* 2*(c2+c6) */
-
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
-    tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 - tmp5;
-
-    wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
-    wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
-    wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
-    wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
-    wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5);
-    wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
-    wsptr[DCTSIZE*3] = (int) (tmp3 + tmp4);
-    wsptr[DCTSIZE*4] = (int) (tmp3 - tmp4);
-
-    inptr++;			/* advance pointers to next column */
-    quantptr++;
-    wsptr++;
-  }
-  
-  /* Pass 2: process rows from work array, store into output array.
-   * Note that we must descale the results by a factor of 8 == 2**3,
-   * and also undo the PASS1_BITS scaling.
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z5 = (DCTELEM) wsptr[0] +
-	   ((((DCTELEM) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (1 << (PASS1_BITS+2)));
-
-    /* Rows of zeroes can be exploited in the same way as we did with columns.
-     * However, the column calculation has created many nonzero AC terms, so
-     * the simplification applies less often (typically 5% to 10% of the time).
-     * On machines with very fast multiplication, it's possible that the
-     * test takes more time than it's worth.  In that case this section
-     * may be commented out.
-     */
-    
-#ifndef NO_ZERO_ROW_TEST
-    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
-	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
-      /* AC terms all zero */
-      JSAMPLE dcval = range_limit[(int) IRIGHT_SHIFT(z5, PASS1_BITS+3)
-				  & RANGE_MASK];
-      
-      outptr[0] = dcval;
-      outptr[1] = dcval;
-      outptr[2] = dcval;
-      outptr[3] = dcval;
-      outptr[4] = dcval;
-      outptr[5] = dcval;
-      outptr[6] = dcval;
-      outptr[7] = dcval;
-
-      wsptr += DCTSIZE;		/* advance pointer to next row */
-      continue;
-    }
-#endif
-    
-    /* Even part */
-
-    tmp10 = z5 + (DCTELEM) wsptr[4];
-    tmp11 = z5 - (DCTELEM) wsptr[4];
-
-    tmp13 = (DCTELEM) wsptr[2] + (DCTELEM) wsptr[6];
-    tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6],
-		     FIX_1_414213562) - tmp13; /* 2*c4 */
-
-    tmp0 = tmp10 + tmp13;
-    tmp3 = tmp10 - tmp13;
-    tmp1 = tmp11 + tmp12;
-    tmp2 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
-    z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
-    z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
-    z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
-
-    tmp7 = z11 + z13;		/* phase 5 */
-    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
-
-    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
-    tmp10 = z5 - MULTIPLY(z12, FIX_1_082392200); /* 2*(c2-c6) */
-    tmp12 = z5 - MULTIPLY(z10, FIX_2_613125930); /* 2*(c2+c6) */
-
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
-    tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 - tmp5;
-
-    /* Final output stage: scale down by a factor of 8 and range-limit */
-
-    outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp7, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp7, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp1 + tmp6, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) IRIGHT_SHIFT(tmp1 - tmp6, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp5, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp5, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) IRIGHT_SHIFT(tmp3 + tmp4, PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) IRIGHT_SHIFT(tmp3 - tmp4, PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += DCTSIZE;		/* advance pointer to next row */
-  }
-}
-
-#endif /* DCT_IFAST_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jidctint.c b/cmake/externals/libjpeg/jidctint.c
deleted file mode 100644
index e30ec8c8a..000000000
--- a/cmake/externals/libjpeg/jidctint.c
+++ /dev/null
@@ -1,5240 +0,0 @@
-/*
- * jidctint.c
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modification developed 2002-2018 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains a slow-but-accurate integer implementation of the
- * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
- * must also perform dequantization of the input coefficients.
- *
- * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
- * on each row (or vice versa, but it's more convenient to emit a row at
- * a time).  Direct algorithms are also available, but they are much more
- * complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on an algorithm described in
- *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
- *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
- *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
- * The primary algorithm described there uses 11 multiplies and 29 adds.
- * We use their alternate method with 12 multiplies and 32 adds.
- * The advantage of this method is that no data path contains more than one
- * multiplication; this allows a very simple and accurate implementation in
- * scaled fixed-point arithmetic, with a minimal number of shifts.
- *
- * We also provide IDCT routines with various output sample block sizes for
- * direct resolution reduction or enlargement and for direct resolving the
- * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
- * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
- *
- * For N<8 we simply take the corresponding low-frequency coefficients of
- * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
- * to yield the downscaled outputs.
- * This can be seen as direct low-pass downsampling from the DCT domain
- * point of view rather than the usual spatial domain point of view,
- * yielding significant computational savings and results at least
- * as good as common bilinear (averaging) spatial downsampling.
- *
- * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
- * lower frequencies and higher frequencies assumed to be zero.
- * It turns out that the computational effort is similar to the 8x8 IDCT
- * regarding the output size.
- * Furthermore, the scaling and descaling is the same for all IDCT sizes.
- *
- * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
- * since there would be too many additional constants to pre-calculate.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
-
-#ifdef DCT_ISLOW_SUPPORTED
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
-#endif
-
-
-/*
- * The poop on this scaling stuff is as follows:
- *
- * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
- * larger than the true IDCT outputs.  The final outputs are therefore
- * a factor of N larger than desired; since N=8 this can be cured by
- * a simple right shift at the end of the algorithm.  The advantage of
- * this arrangement is that we save two multiplications per 1-D IDCT,
- * because the y0 and y4 inputs need not be divided by sqrt(N).
- *
- * We have to do addition and subtraction of the integer inputs, which
- * is no problem, and multiplication by fractional constants, which is
- * a problem to do in integer arithmetic.  We multiply all the constants
- * by CONST_SCALE and convert them to integer constants (thus retaining
- * CONST_BITS bits of precision in the constants).  After doing a
- * multiplication we have to divide the product by CONST_SCALE, with proper
- * rounding, to produce the correct output.  This division can be done
- * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
- * as long as possible so that partial sums can be added together with
- * full fractional precision.
- *
- * The outputs of the first pass are scaled up by PASS1_BITS bits so that
- * they are represented to better-than-integral precision.  These outputs
- * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
- * with the recommended scaling.  (To scale up 12-bit sample data further, an
- * intermediate INT32 array would be needed.)
- *
- * To avoid overflow of the 32-bit intermediate results in pass 2, we must
- * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
- * shows that the values given below are the most effective.
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define CONST_BITS  13
-#define PASS1_BITS  2
-#else
-#define CONST_BITS  13
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
-#endif
-
-/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
- * causing a lot of useless floating-point operations at run time.
- * To get around this we use the following pre-calculated constants.
- * If you change CONST_BITS you may want to add appropriate values.
- * (With a reasonable C compiler, you can just rely on the FIX() macro...)
- */
-
-#if CONST_BITS == 13
-#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
-#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
-#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
-#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
-#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
-#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
-#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
-#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
-#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
-#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
-#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
-#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
-#else
-#define FIX_0_298631336  FIX(0.298631336)
-#define FIX_0_390180644  FIX(0.390180644)
-#define FIX_0_541196100  FIX(0.541196100)
-#define FIX_0_765366865  FIX(0.765366865)
-#define FIX_0_899976223  FIX(0.899976223)
-#define FIX_1_175875602  FIX(1.175875602)
-#define FIX_1_501321110  FIX(1.501321110)
-#define FIX_1_847759065  FIX(1.847759065)
-#define FIX_1_961570560  FIX(1.961570560)
-#define FIX_2_053119869  FIX(2.053119869)
-#define FIX_2_562915447  FIX(2.562915447)
-#define FIX_3_072711026  FIX(3.072711026)
-#endif
-
-
-/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
- * For 8-bit samples with the recommended scaling, all the variable
- * and constant values involved are no more than 16 bits wide, so a
- * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
- * For 12-bit samples, a full 32-bit multiplication will be needed.
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
-#else
-#define MULTIPLY(var,const)  ((var) * (const))
-#endif
-
-
-/* Dequantize a coefficient by multiplying it by the multiplier-table
- * entry; produce an int result.  In this module, both inputs and result
- * are 16 bits or less, so either int or short multiply will work.
- */
-
-#define DEQUANTIZE(coef,quantval)  (((ISLOW_MULT_TYPE) (coef)) * (quantval))
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients.
- *
- * Optimized algorithm with 12 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/16).
- */
-
-GLOBAL(void)
-jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[DCTSIZE2];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * Note results are scaled up by sqrt(8) compared to a true IDCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = DCTSIZE; ctr > 0; ctr--) {
-    /* Due to quantization, we will usually find that many of the input
-     * coefficients are zero, especially the AC terms.  We can exploit this
-     * by short-circuiting the IDCT calculation for any column in which all
-     * the AC terms are zero.  In that case each output is equal to the
-     * DC coefficient (with scale factor as needed).
-     * With typical images and quantization tables, half or more of the
-     * column DCT calculations can be simplified this way.
-     */
-
-    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
-      /* AC terms all zero */
-      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
-
-      wsptr[DCTSIZE*0] = dcval;
-      wsptr[DCTSIZE*1] = dcval;
-      wsptr[DCTSIZE*2] = dcval;
-      wsptr[DCTSIZE*3] = dcval;
-      wsptr[DCTSIZE*4] = dcval;
-      wsptr[DCTSIZE*5] = dcval;
-      wsptr[DCTSIZE*6] = dcval;
-      wsptr[DCTSIZE*7] = dcval;
-
-      inptr++;			/* advance pointers to next column */
-      quantptr++;
-      wsptr++;
-      continue;
-    }
-
-    /* Even part: reverse the even part of the forward DCT.
-     * The rotator is c(-6).
-     */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z2 <<= CONST_BITS;
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    tmp0 = z2 + z3;
-    tmp1 = z2 - z3;
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
-
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
-    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-
-    wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
-
-    inptr++;			/* advance pointers to next column */
-    quantptr++;
-    wsptr++;
-  }
-
-  /* Pass 2: process rows from work array, store into output array.
-   * Note that we must descale the results by a factor of 8 == 2**3,
-   * and also undo the PASS1_BITS scaling.
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < DCTSIZE; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z2 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-
-    /* Rows of zeroes can be exploited in the same way as we did with columns.
-     * However, the column calculation has created many nonzero AC terms, so
-     * the simplification applies less often (typically 5% to 10% of the time).
-     * On machines with very fast multiplication, it's possible that the
-     * test takes more time than it's worth.  In that case this section
-     * may be commented out.
-     */
-
-#ifndef NO_ZERO_ROW_TEST
-    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
-	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
-      /* AC terms all zero */
-      JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3)
-				  & RANGE_MASK];
-
-      outptr[0] = dcval;
-      outptr[1] = dcval;
-      outptr[2] = dcval;
-      outptr[3] = dcval;
-      outptr[4] = dcval;
-      outptr[5] = dcval;
-      outptr[6] = dcval;
-      outptr[7] = dcval;
-
-      wsptr += DCTSIZE;		/* advance pointer to next row */
-      continue;
-    }
-#endif
-
-    /* Even part: reverse the even part of the forward DCT.
-     * The rotator is c(-6).
-     */
-
-    z3 = (INT32) wsptr[4];
-
-    tmp0 = (z2 + z3) << CONST_BITS;
-    tmp1 = (z2 - z3) << CONST_BITS;
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[6];
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
-
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-
-    tmp0 = (INT32) wsptr[7];
-    tmp1 = (INT32) wsptr[5];
-    tmp2 = (INT32) wsptr[3];
-    tmp3 = (INT32) wsptr[1];
-
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
-    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += DCTSIZE;		/* advance pointer to next row */
-  }
-}
-
-#ifdef IDCT_SCALING_SUPPORTED
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 7x7 output block.
- *
- * Optimized algorithm with 12 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/14).
- */
-
-GLOBAL(void)
-jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[7*7];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp13 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
-    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
-    tmp0 = z1 + z3;
-    z2 -= tmp0;
-    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
-    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
-    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
-    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-
-    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
-    tmp1 += tmp2;
-    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
-    tmp0 += z2;
-    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
-
-    /* Final output stage */
-
-    wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 7 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 7; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp13 = (INT32) wsptr[0] +
-	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	       (ONE << (PASS1_BITS+2)));
-    tmp13 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[4];
-    z3 = (INT32) wsptr[6];
-
-    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
-    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
-    tmp0 = z1 + z3;
-    z2 -= tmp0;
-    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
-    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
-    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
-    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-
-    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
-    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
-    tmp0 = tmp1 - tmp2;
-    tmp1 += tmp2;
-    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
-    tmp1 += tmp2;
-    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
-    tmp0 += z2;
-    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 7;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 6x6 output block.
- *
- * Optimized algorithm with 3 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/12).
- */
-
-GLOBAL(void)
-jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[6*6];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
-    tmp1 = tmp0 + tmp10;
-    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
-    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
-    tmp10 = tmp1 + tmp0;
-    tmp12 = tmp1 - tmp0;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
-    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
-    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
-
-    /* Final output stage */
-
-    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[6*1] = (int) (tmp11 + tmp1);
-    wsptr[6*4] = (int) (tmp11 - tmp1);
-    wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 6 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp0 <<= CONST_BITS;
-    tmp2 = (INT32) wsptr[4];
-    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
-    tmp1 = tmp0 + tmp10;
-    tmp11 = tmp0 - tmp10 - tmp10;
-    tmp10 = (INT32) wsptr[2];
-    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
-    tmp10 = tmp1 + tmp0;
-    tmp12 = tmp1 - tmp0;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
-    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
-    tmp1 = (z1 - z2 - z3) << CONST_BITS;
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 6;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 5x5 output block.
- *
- * Optimized algorithm with 5 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/10).
- */
-
-GLOBAL(void)
-jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[5*5];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp12 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
-    z3 = tmp12 + z2;
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z1;
-    tmp12 -= z2 << 2;
-
-    /* Odd part */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
-    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
-    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
-
-    /* Final output stage */
-
-    wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 5 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 5; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp12 = (INT32) wsptr[0] +
-	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	       (ONE << (PASS1_BITS+2)));
-    tmp12 <<= CONST_BITS;
-    tmp0 = (INT32) wsptr[2];
-    tmp1 = (INT32) wsptr[4];
-    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
-    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
-    z3 = tmp12 + z2;
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z1;
-    tmp12 -= z2 << 2;
-
-    /* Odd part */
-
-    z2 = (INT32) wsptr[1];
-    z3 = (INT32) wsptr[3];
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
-    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
-    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 5;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 4x4 output block.
- *
- * Optimized algorithm with 3 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
- */
-
-GLOBAL(void)
-jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp2, tmp10, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[4*4];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    
-    tmp10 = (tmp0 + tmp2) << PASS1_BITS;
-    tmp12 = (tmp0 - tmp2) << PASS1_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
-		       CONST_BITS-PASS1_BITS);
-    tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
-		       CONST_BITS-PASS1_BITS);
-
-    /* Final output stage */
-
-    wsptr[4*0] = (int) (tmp10 + tmp0);
-    wsptr[4*3] = (int) (tmp10 - tmp0);
-    wsptr[4*1] = (int) (tmp12 + tmp2);
-    wsptr[4*2] = (int) (tmp12 - tmp2);
-  }
-
-  /* Pass 2: process 4 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 4; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp2 = (INT32) wsptr[2];
-
-    tmp10 = (tmp0 + tmp2) << CONST_BITS;
-    tmp12 = (tmp0 - tmp2) << CONST_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = (INT32) wsptr[1];
-    z3 = (INT32) wsptr[3];
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
-    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
-    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 4;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 3x3 output block.
- *
- * Optimized algorithm with 2 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/6).
- */
-
-GLOBAL(void)
-jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp2, tmp10, tmp12;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[3*3];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
-    tmp10 = tmp0 + tmp12;
-    tmp2 = tmp0 - tmp12 - tmp12;
-
-    /* Odd part */
-
-    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
-
-    /* Final output stage */
-
-    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 3 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 3; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp0 <<= CONST_BITS;
-    tmp2 = (INT32) wsptr[2];
-    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
-    tmp10 = tmp0 + tmp12;
-    tmp2 = tmp0 - tmp12 - tmp12;
-
-    /* Odd part */
-
-    tmp12 = (INT32) wsptr[1];
-    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 3;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 2x2 output block.
- *
- * Multiplication-less algorithm.
- */
-
-GLOBAL(void)
-jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-  ISLOW_MULT_TYPE * quantptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  ISHIFT_TEMPS
-
-  /* Pass 1: process columns from input. */
-
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-
-  /* Column 0 */
-  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
-  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
-  /* Add range center and fudge factor for final descale and range-limit. */
-  tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
-
-  tmp0 = tmp4 + tmp5;
-  tmp2 = tmp4 - tmp5;
-
-  /* Column 1 */
-  tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
-  tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
-
-  tmp1 = tmp4 + tmp5;
-  tmp3 = tmp4 - tmp5;
-
-  /* Pass 2: process 2 rows, store into output array. */
-
-  /* Row 0 */
-  outptr = output_buf[0] + output_col;
-
-  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
-  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
-
-  /* Row 1 */
-  outptr = output_buf[1] + output_col;
-
-  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
-  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a reduced-size 1x1 output block.
- *
- * We hardly need an inverse DCT routine for this: just take the
- * average pixel value, which is one-eighth of the DC coefficient.
- */
-
-GLOBAL(void)
-jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  DCTELEM dcval;
-  ISLOW_MULT_TYPE * quantptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  ISHIFT_TEMPS
-
-  /* 1x1 is trivial: just take the DC coefficient divided by 8. */
-
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-
-  dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
-  /* Add range center and fudge factor for descale and range-limit. */
-  dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
-
-  output_buf[0][output_col] =
-    range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK];
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 9x9 output block.
- *
- * Optimized algorithm with 10 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/18).
- */
-
-GLOBAL(void)
-jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*9];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
-    tmp1 = tmp0 + tmp3;
-    tmp2 = tmp0 - tmp3 - tmp3;
-
-    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
-    tmp11 = tmp2 + tmp0;
-    tmp14 = tmp2 - tmp0 - tmp0;
-
-    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
-    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
-    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
-
-    tmp10 = tmp1 + tmp0 - tmp3;
-    tmp12 = tmp1 - tmp0 + tmp2;
-    tmp13 = tmp1 - tmp2 + tmp3;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
-
-    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
-    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
-    tmp0 = tmp2 + tmp3 - z2;
-    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
-    tmp2 += z2 - tmp1;
-    tmp3 += z2 + tmp1;
-    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 9 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 9; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp0 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[4];
-    z3 = (INT32) wsptr[6];
-
-    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
-    tmp1 = tmp0 + tmp3;
-    tmp2 = tmp0 - tmp3 - tmp3;
-
-    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
-    tmp11 = tmp2 + tmp0;
-    tmp14 = tmp2 - tmp0 - tmp0;
-
-    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
-    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
-    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
-
-    tmp10 = tmp1 + tmp0 - tmp3;
-    tmp12 = tmp1 - tmp0 + tmp2;
-    tmp13 = tmp1 - tmp2 + tmp3;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
-
-    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
-    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
-    tmp0 = tmp2 + tmp3 - z2;
-    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
-    tmp2 += z2 - tmp1;
-    tmp3 += z2 + tmp1;
-    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 10x10 output block.
- *
- * Optimized algorithm with 12 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/20).
- */
-
-GLOBAL(void)
-jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
-  INT32 z1, z2, z3, z4, z5;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*10];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
-    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z2;
-
-    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
-			CONST_BITS-PASS1_BITS);
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
-    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
-    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
-
-    tmp20 = tmp10 + tmp12;
-    tmp24 = tmp10 - tmp12;
-    tmp21 = tmp11 + tmp13;
-    tmp23 = tmp11 - tmp13;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = z2 + z4;
-    tmp13 = z2 - z4;
-
-    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
-    z5 = z3 << CONST_BITS;
-
-    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
-    z4 = z5 + tmp12;
-
-    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
-    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
-    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
-
-    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
-
-    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
-    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2] = (int) (tmp22 + tmp12);
-    wsptr[8*7] = (int) (tmp22 - tmp12);
-    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 10 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 10; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z3 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z3 <<= CONST_BITS;
-    z4 = (INT32) wsptr[4];
-    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
-    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z2;
-
-    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[6];
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
-    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
-    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
-
-    tmp20 = tmp10 + tmp12;
-    tmp24 = tmp10 - tmp12;
-    tmp21 = tmp11 + tmp13;
-    tmp23 = tmp11 - tmp13;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z3 <<= CONST_BITS;
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = z2 + z4;
-    tmp13 = z2 - z4;
-
-    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
-    z4 = z3 + tmp12;
-
-    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
-    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
-    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
-
-    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
-
-    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
-    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing an 11x11 output block.
- *
- * Optimized algorithm with 24 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/22).
- */
-
-GLOBAL(void)
-jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*11];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp10 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
-    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
-    z4 = z1 + z3;
-    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
-    z4 -= z2;
-    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
-    tmp21 = tmp20 + tmp23 + tmp25 -
-	    MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
-    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
-    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
-    tmp24 += tmp25;
-    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
-    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
-	     MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
-    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = z1 + z2;
-    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
-    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
-    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
-    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
-    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
-    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
-    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
-    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
-    tmp11 += z1;
-    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
-    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
-	     MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
-	     MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 11 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 11; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp10 = (INT32) wsptr[0] +
-	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	       (ONE << (PASS1_BITS+2)));
-    tmp10 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[4];
-    z3 = (INT32) wsptr[6];
-
-    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
-    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
-    z4 = z1 + z3;
-    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
-    z4 -= z2;
-    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
-    tmp21 = tmp20 + tmp23 + tmp25 -
-	    MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
-    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
-    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
-    tmp24 += tmp25;
-    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
-    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
-	     MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
-    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = z1 + z2;
-    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
-    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
-    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
-    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
-    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
-    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
-    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
-    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
-    tmp11 += z1;
-    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
-    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
-	     MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
-	     MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 12x12 output block.
- *
- * Optimized algorithm with 15 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/24).
- */
-
-GLOBAL(void)
-jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*12];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
-    z1 <<= CONST_BITS;
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    z2 <<= CONST_BITS;
-
-    tmp12 = z1 - z2;
-
-    tmp21 = z3 + tmp12;
-    tmp24 = z3 - tmp12;
-
-    tmp12 = z4 + z2;
-
-    tmp20 = tmp10 + tmp12;
-    tmp25 = tmp10 - tmp12;
-
-    tmp12 = z4 - z1 - z2;
-
-    tmp22 = tmp11 + tmp12;
-    tmp23 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
-    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
-
-    tmp10 = z1 + z3;
-    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
-    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
-    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
-    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
-    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
-    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
-    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
-	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
-
-    z1 -= z4;
-    z2 -= z3;
-    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
-    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
-    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 12 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 12; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z3 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z3 <<= CONST_BITS;
-
-    z4 = (INT32) wsptr[4];
-    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    z1 = (INT32) wsptr[2];
-    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
-    z1 <<= CONST_BITS;
-    z2 = (INT32) wsptr[6];
-    z2 <<= CONST_BITS;
-
-    tmp12 = z1 - z2;
-
-    tmp21 = z3 + tmp12;
-    tmp24 = z3 - tmp12;
-
-    tmp12 = z4 + z2;
-
-    tmp20 = tmp10 + tmp12;
-    tmp25 = tmp10 - tmp12;
-
-    tmp12 = z4 - z1 - z2;
-
-    tmp22 = tmp11 + tmp12;
-    tmp23 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
-    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
-
-    tmp10 = z1 + z3;
-    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
-    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
-    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
-    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
-    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
-    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
-    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
-	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
-
-    z1 -= z4;
-    z2 -= z3;
-    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
-    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
-    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 13x13 output block.
- *
- * Optimized algorithm with 29 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/26).
- */
-
-GLOBAL(void)
-jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*13];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z1 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
-
-    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
-    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
-
-    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
-
-    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
-    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
-
-    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
-
-    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
-    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
-
-    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
-    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
-    tmp15 = z1 + z4;
-    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
-    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
-    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
-    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
-    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
-    tmp11 += tmp14;
-    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
-    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
-    tmp12 += tmp14;
-    tmp13 += tmp14;
-    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
-    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
-	    MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
-    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
-    tmp14 += z1;
-    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
-	     MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 13 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 13; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z1 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z1 <<= CONST_BITS;
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[4];
-    z4 = (INT32) wsptr[6];
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
-
-    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
-    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
-
-    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
-
-    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
-    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
-
-    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
-    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
-
-    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
-    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
-
-    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
-    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
-    tmp15 = z1 + z4;
-    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
-    tmp10 = tmp11 + tmp12 + tmp13 -
-	    MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
-    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
-    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
-    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
-    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
-    tmp11 += tmp14;
-    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
-    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
-    tmp12 += tmp14;
-    tmp13 += tmp14;
-    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
-    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
-	    MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
-    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
-    tmp14 += z1;
-    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
-	     MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 14x14 output block.
- *
- * Optimized algorithm with 20 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/28).
- */
-
-GLOBAL(void)
-jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*14];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z1 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
-    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
-    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
-
-    tmp10 = z1 + z2;
-    tmp11 = z1 + z3;
-    tmp12 = z1 - z4;
-
-    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
-			CONST_BITS-PASS1_BITS);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
-
-    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
-    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
-    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
-	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
-
-    tmp20 = tmp10 + tmp13;
-    tmp26 = tmp10 - tmp13;
-    tmp21 = tmp11 + tmp14;
-    tmp25 = tmp11 - tmp14;
-    tmp22 = tmp12 + tmp15;
-    tmp24 = tmp12 - tmp15;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-    tmp13 = z4 << CONST_BITS;
-
-    tmp14 = z1 + z3;
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
-    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
-    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
-    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
-    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
-    z1    -= z2;
-    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
-    tmp16 += tmp15;
-    z1    += z4;
-    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
-    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
-    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
-    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
-    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
-    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
-
-    tmp13 = (z1 - z3) << PASS1_BITS;
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) (tmp23 + tmp13);
-    wsptr[8*10] = (int) (tmp23 - tmp13);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 14 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 14; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z1 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z1 <<= CONST_BITS;
-    z4 = (INT32) wsptr[4];
-    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
-    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
-    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
-
-    tmp10 = z1 + z2;
-    tmp11 = z1 + z3;
-    tmp12 = z1 - z4;
-
-    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[6];
-
-    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
-
-    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
-    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
-    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
-	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
-
-    tmp20 = tmp10 + tmp13;
-    tmp26 = tmp10 - tmp13;
-    tmp21 = tmp11 + tmp14;
-    tmp25 = tmp11 - tmp14;
-    tmp22 = tmp12 + tmp15;
-    tmp24 = tmp12 - tmp15;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-    z4 <<= CONST_BITS;
-
-    tmp14 = z1 + z3;
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
-    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
-    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
-    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
-    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
-    z1    -= z2;
-    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
-    tmp16 += tmp15;
-    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
-    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
-    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
-    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
-    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
-    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
-
-    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 15x15 output block.
- *
- * Optimized algorithm with 22 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/30).
- */
-
-GLOBAL(void)
-jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*15];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z1 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
-    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
-
-    tmp12 = z1 - tmp10;
-    tmp13 = z1 + tmp11;
-    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
-
-    z4 = z2 - z3;
-    z3 += z2;
-    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
-    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
-
-    tmp20 = tmp13 + tmp10 + tmp11;
-    tmp23 = tmp12 - tmp10 + tmp11 + z2;
-
-    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
-
-    tmp25 = tmp13 - tmp10 - tmp11;
-    tmp26 = tmp12 + tmp10 - tmp11 - z2;
-
-    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
-
-    tmp21 = tmp12 + tmp10 + tmp11;
-    tmp24 = tmp13 - tmp10 + tmp11;
-    tmp11 += tmp11;
-    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
-    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp13 = z2 - z4;
-    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
-    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
-    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
-
-    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
-    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
-    z2 = z1 - z4;
-    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
-
-    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
-    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
-    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
-    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
-    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
-    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 15 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 15; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z1 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z1 <<= CONST_BITS;
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[4];
-    z4 = (INT32) wsptr[6];
-
-    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
-    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
-
-    tmp12 = z1 - tmp10;
-    tmp13 = z1 + tmp11;
-    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
-
-    z4 = z2 - z3;
-    z3 += z2;
-    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
-    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
-
-    tmp20 = tmp13 + tmp10 + tmp11;
-    tmp23 = tmp12 - tmp10 + tmp11 + z2;
-
-    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
-
-    tmp25 = tmp13 - tmp10 - tmp11;
-    tmp26 = tmp12 + tmp10 - tmp11 - z2;
-
-    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
-    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
-
-    tmp21 = tmp12 + tmp10 + tmp11;
-    tmp24 = tmp13 - tmp10 + tmp11;
-    tmp11 += tmp11;
-    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
-    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z4 = (INT32) wsptr[5];
-    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
-    z4 = (INT32) wsptr[7];
-
-    tmp13 = z2 - z4;
-    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
-    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
-    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
-
-    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
-    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
-    z2 = z1 - z4;
-    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
-
-    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
-    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
-    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
-    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
-    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
-    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 16x16 output block.
- *
- * Optimized algorithm with 28 multiplications in the 1-D kernel.
- * cK represents sqrt(2) * cos(K*pi/32).
- */
-
-GLOBAL(void)
-jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*16];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
-    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    z3 = z1 - z2;
-    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
-    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
-
-    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
-    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
-    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
-    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
-
-    tmp20 = tmp10 + tmp0;
-    tmp27 = tmp10 - tmp0;
-    tmp21 = tmp12 + tmp1;
-    tmp26 = tmp12 - tmp1;
-    tmp22 = tmp13 + tmp2;
-    tmp25 = tmp13 - tmp2;
-    tmp23 = tmp11 + tmp3;
-    tmp24 = tmp11 - tmp3;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = z1 + z3;
-
-    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
-    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
-    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
-    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
-    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
-    tmp0  = tmp1 + tmp2 + tmp3 -
-	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
-    tmp13 = tmp10 + tmp11 + tmp12 -
-	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
-    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
-    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
-    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
-    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
-    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
-    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
-    z2    += z4;
-    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
-    tmp1  += z1;
-    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
-    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
-    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
-    tmp12 += z2;
-    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
-    tmp2  += z2;
-    tmp3  += z2;
-    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
-    tmp10 += z2;
-    tmp11 += z2;
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
-    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
-    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
-    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
-    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 16 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 16; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp0 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[4];
-    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
-    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[6];
-    z3 = z1 - z2;
-    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
-    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
-
-    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
-    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
-    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
-    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
-
-    tmp20 = tmp10 + tmp0;
-    tmp27 = tmp10 - tmp0;
-    tmp21 = tmp12 + tmp1;
-    tmp26 = tmp12 - tmp1;
-    tmp22 = tmp13 + tmp2;
-    tmp25 = tmp13 - tmp2;
-    tmp23 = tmp11 + tmp3;
-    tmp24 = tmp11 - tmp3;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = z1 + z3;
-
-    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
-    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
-    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
-    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
-    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
-    tmp0  = tmp1 + tmp2 + tmp3 -
-	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
-    tmp13 = tmp10 + tmp11 + tmp12 -
-	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
-    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
-    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
-    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
-    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
-    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
-    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
-    z2    += z4;
-    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
-    tmp1  += z1;
-    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
-    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
-    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
-    tmp12 += z2;
-    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
-    tmp2  += z2;
-    tmp3  += z2;
-    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
-    tmp10 += z2;
-    tmp11 += z2;
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 16x8 output block.
- *
- * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*8];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * Note results are scaled up by sqrt(8) compared to a true IDCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = DCTSIZE; ctr > 0; ctr--) {
-    /* Due to quantization, we will usually find that many of the input
-     * coefficients are zero, especially the AC terms.  We can exploit this
-     * by short-circuiting the IDCT calculation for any column in which all
-     * the AC terms are zero.  In that case each output is equal to the
-     * DC coefficient (with scale factor as needed).
-     * With typical images and quantization tables, half or more of the
-     * column DCT calculations can be simplified this way.
-     */
-
-    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
-      /* AC terms all zero */
-      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
-
-      wsptr[DCTSIZE*0] = dcval;
-      wsptr[DCTSIZE*1] = dcval;
-      wsptr[DCTSIZE*2] = dcval;
-      wsptr[DCTSIZE*3] = dcval;
-      wsptr[DCTSIZE*4] = dcval;
-      wsptr[DCTSIZE*5] = dcval;
-      wsptr[DCTSIZE*6] = dcval;
-      wsptr[DCTSIZE*7] = dcval;
-
-      inptr++;			/* advance pointers to next column */
-      quantptr++;
-      wsptr++;
-      continue;
-    }
-
-    /* Even part: reverse the even part of the forward DCT.
-     * The rotator is c(-6).
-     */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z2 <<= CONST_BITS;
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    tmp0 = z2 + z3;
-    tmp1 = z2 - z3;
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
-
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
-    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-
-    wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
-
-    inptr++;			/* advance pointers to next column */
-    quantptr++;
-    wsptr++;
-  }
-
-  /* Pass 2: process 8 rows from work array, store into output array.
-   * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp0 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[4];
-    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
-    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[6];
-    z3 = z1 - z2;
-    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
-    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
-
-    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
-    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
-    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
-    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
-
-    tmp20 = tmp10 + tmp0;
-    tmp27 = tmp10 - tmp0;
-    tmp21 = tmp12 + tmp1;
-    tmp26 = tmp12 - tmp1;
-    tmp22 = tmp13 + tmp2;
-    tmp25 = tmp13 - tmp2;
-    tmp23 = tmp11 + tmp3;
-    tmp24 = tmp11 - tmp3;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = z1 + z3;
-
-    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
-    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
-    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
-    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
-    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
-    tmp0  = tmp1 + tmp2 + tmp3 -
-	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
-    tmp13 = tmp10 + tmp11 + tmp12 -
-	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
-    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
-    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
-    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
-    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
-    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
-    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
-    z2    += z4;
-    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
-    tmp1  += z1;
-    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
-    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
-    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
-    tmp12 += z2;
-    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
-    tmp2  += z2;
-    tmp3  += z2;
-    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
-    tmp10 += z2;
-    tmp11 += z2;
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 14x7 output block.
- *
- * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*7];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp23 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
-    tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
-    tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
-    tmp10 = z1 + z3;
-    z2 -= tmp10;
-    tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
-    tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
-    tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
-    tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-
-    tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
-    tmp10 = tmp11 - tmp12;
-    tmp11 += tmp12;
-    tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
-    tmp11 += tmp12;
-    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
-    tmp10 += z2;
-    tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 7 rows from work array, store into output array.
-   * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 7; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z1 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z1 <<= CONST_BITS;
-    z4 = (INT32) wsptr[4];
-    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
-    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
-    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
-
-    tmp10 = z1 + z2;
-    tmp11 = z1 + z3;
-    tmp12 = z1 - z4;
-
-    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[6];
-
-    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
-
-    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
-    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
-    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
-	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
-
-    tmp20 = tmp10 + tmp13;
-    tmp26 = tmp10 - tmp13;
-    tmp21 = tmp11 + tmp14;
-    tmp25 = tmp11 - tmp14;
-    tmp22 = tmp12 + tmp15;
-    tmp24 = tmp12 - tmp15;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-    z4 <<= CONST_BITS;
-
-    tmp14 = z1 + z3;
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
-    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
-    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
-    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
-    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
-    z1    -= z2;
-    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
-    tmp16 += tmp15;
-    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
-    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
-    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
-    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
-    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
-    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
-
-    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 12x6 output block.
- *
- * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*6];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp10 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
-    tmp11 = tmp10 + tmp20;
-    tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
-    tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
-    tmp20 = tmp11 + tmp10;
-    tmp22 = tmp11 - tmp10;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
-    tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
-    tmp11 = (z1 - z2 - z3) << PASS1_BITS;
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*1] = (int) (tmp21 + tmp11);
-    wsptr[8*4] = (int) (tmp21 - tmp11);
-    wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 6 rows from work array, store into output array.
-   * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z3 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z3 <<= CONST_BITS;
-
-    z4 = (INT32) wsptr[4];
-    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    z1 = (INT32) wsptr[2];
-    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
-    z1 <<= CONST_BITS;
-    z2 = (INT32) wsptr[6];
-    z2 <<= CONST_BITS;
-
-    tmp12 = z1 - z2;
-
-    tmp21 = z3 + tmp12;
-    tmp24 = z3 - tmp12;
-
-    tmp12 = z4 + z2;
-
-    tmp20 = tmp10 + tmp12;
-    tmp25 = tmp10 - tmp12;
-
-    tmp12 = z4 - z1 - z2;
-
-    tmp22 = tmp11 + tmp12;
-    tmp23 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
-    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
-
-    tmp10 = z1 + z3;
-    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
-    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
-    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
-    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
-    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
-    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
-    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
-	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
-
-    z1 -= z4;
-    z2 -= z3;
-    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
-    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
-    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
-
-    /* Final output stage */
-
-    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
-					       CONST_BITS+PASS1_BITS+3)
-			     & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 10x5 output block.
- *
- * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*5];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp12 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
-    z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
-    z3 = tmp12 + z2;
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z1;
-    tmp12 -= z2 << 2;
-
-    /* Odd part */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
-    tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
-    tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 5 rows from work array, store into output array.
-   * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 5; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z3 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z3 <<= CONST_BITS;
-    z4 = (INT32) wsptr[4];
-    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
-    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z2;
-
-    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[6];
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
-    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
-    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
-
-    tmp20 = tmp10 + tmp12;
-    tmp24 = tmp10 - tmp12;
-    tmp21 = tmp11 + tmp13;
-    tmp23 = tmp11 - tmp13;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z3 <<= CONST_BITS;
-    z4 = (INT32) wsptr[7];
-
-    tmp11 = z2 + z4;
-    tmp13 = z2 - z4;
-
-    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
-    z4 = z3 + tmp12;
-
-    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
-    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
-    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
-
-    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
-
-    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
-    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 8;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing an 8x4 output block.
- *
- * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*4];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 4-point IDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-
-    tmp10 = (tmp0 + tmp2) << PASS1_BITS;
-    tmp12 = (tmp0 - tmp2) << PASS1_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
-		       CONST_BITS-PASS1_BITS);
-    tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
-		       CONST_BITS-PASS1_BITS);
-
-    /* Final output stage */
-
-    wsptr[8*0] = (int) (tmp10 + tmp0);
-    wsptr[8*3] = (int) (tmp10 - tmp0);
-    wsptr[8*1] = (int) (tmp12 + tmp2);
-    wsptr[8*2] = (int) (tmp12 - tmp2);
-  }
-
-  /* Pass 2: process rows from work array, store into output array.
-   * Note that we must descale the results by a factor of 8 == 2**3,
-   * and also undo the PASS1_BITS scaling.
-   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 4; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part: reverse the even part of the forward DCT.
-     * The rotator is c(-6).
-     */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z2 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z3 = (INT32) wsptr[4];
-
-    tmp0 = (z2 + z3) << CONST_BITS;
-    tmp1 = (z2 - z3) << CONST_BITS;
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[6];
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
-
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-
-    tmp0 = (INT32) wsptr[7];
-    tmp1 = (INT32) wsptr[5];
-    tmp2 = (INT32) wsptr[3];
-    tmp3 = (INT32) wsptr[1];
-
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
-    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += DCTSIZE;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 6x3 output block.
- *
- * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[6*3];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
-    tmp10 = tmp0 + tmp12;
-    tmp2 = tmp0 - tmp12 - tmp12;
-
-    /* Odd part */
-
-    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
-
-    /* Final output stage */
-
-    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
-  }
-  
-  /* Pass 2: process 3 rows from work array, store into output array.
-   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 3; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp0 <<= CONST_BITS;
-    tmp2 = (INT32) wsptr[4];
-    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
-    tmp1 = tmp0 + tmp10;
-    tmp11 = tmp0 - tmp10 - tmp10;
-    tmp10 = (INT32) wsptr[2];
-    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
-    tmp10 = tmp1 + tmp0;
-    tmp12 = tmp1 - tmp0;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
-    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
-    tmp1 = (z1 - z2 - z3) << CONST_BITS;
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 6;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 4x2 output block.
- *
- * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp2, tmp10, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  INT32 * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  INT32 workspace[4*2];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array. */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-
-    /* Odd part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-
-    /* Final output stage */
-
-    wsptr[4*0] = tmp10 + tmp0;
-    wsptr[4*1] = tmp10 - tmp0;
-  }
-
-  /* Pass 2: process 2 rows from work array, store into output array.
-   * 4-point IDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 2; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2));
-    tmp2 = wsptr[2];
-
-    tmp10 = (tmp0 + tmp2) << CONST_BITS;
-    tmp12 = (tmp0 - tmp2) << CONST_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = wsptr[1];
-    z3 = wsptr[3];
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
-    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
-    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 4;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 2x1 output block.
- *
- * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  DCTELEM tmp0, tmp1;
-  ISLOW_MULT_TYPE * quantptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  ISHIFT_TEMPS
-
-  /* Pass 1: empty. */
-
-  /* Pass 2: process 1 row from input, store into output array. */
-
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  outptr = output_buf[0] + output_col;
-
-  /* Even part */
-
-  tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
-  /* Add range center and fudge factor for final descale and range-limit. */
-  tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
-
-  /* Odd part */
-
-  tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
-
-  /* Final output stage */
-
-  outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
-  outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing an 8x16 output block.
- *
- * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[8*16];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
-    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
-
-    tmp10 = tmp0 + tmp1;
-    tmp11 = tmp0 - tmp1;
-    tmp12 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    z3 = z1 - z2;
-    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
-    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
-
-    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
-    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
-    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
-    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
-
-    tmp20 = tmp10 + tmp0;
-    tmp27 = tmp10 - tmp0;
-    tmp21 = tmp12 + tmp1;
-    tmp26 = tmp12 - tmp1;
-    tmp22 = tmp13 + tmp2;
-    tmp25 = tmp13 - tmp2;
-    tmp23 = tmp11 + tmp3;
-    tmp24 = tmp11 - tmp3;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = z1 + z3;
-
-    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
-    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
-    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
-    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
-    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
-    tmp0  = tmp1 + tmp2 + tmp3 -
-	    MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
-    tmp13 = tmp10 + tmp11 + tmp12 -
-	    MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
-    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
-    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
-    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
-    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
-    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
-    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
-    z2    += z4;
-    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
-    tmp1  += z1;
-    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
-    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
-    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
-    tmp12 += z2;
-    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
-    tmp2  += z2;
-    tmp3  += z2;
-    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
-    tmp10 += z2;
-    tmp11 += z2;
-
-    /* Final output stage */
-
-    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
-    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
-    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
-    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
-    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
-    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
-    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
-    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
-    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process rows from work array, store into output array.
-   * Note that we must descale the results by a factor of 8 == 2**3,
-   * and also undo the PASS1_BITS scaling.
-   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 16; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part: reverse the even part of the forward DCT.
-     * The rotator is c(-6).
-     */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    z2 = (INT32) wsptr[0] +
-	   ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	    (ONE << (PASS1_BITS+2)));
-    z3 = (INT32) wsptr[4];
-
-    tmp0 = (z2 + z3) << CONST_BITS;
-    tmp1 = (z2 - z3) << CONST_BITS;
-
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[6];
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
-
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-
-    tmp0 = (INT32) wsptr[7];
-    tmp1 = (INT32) wsptr[5];
-    tmp2 = (INT32) wsptr[3];
-    tmp3 = (INT32) wsptr[1];
-
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
-    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += DCTSIZE;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 7x14 output block.
- *
- * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[7*14];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z1 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
-    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
-    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
-
-    tmp10 = z1 + z2;
-    tmp11 = z1 + z3;
-    tmp12 = z1 - z4;
-
-    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
-			CONST_BITS-PASS1_BITS);
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
-
-    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
-    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
-    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
-	    MULTIPLY(z2, FIX(1.378756276));      /* c2 */
-
-    tmp20 = tmp10 + tmp13;
-    tmp26 = tmp10 - tmp13;
-    tmp21 = tmp11 + tmp14;
-    tmp25 = tmp11 - tmp14;
-    tmp22 = tmp12 + tmp15;
-    tmp24 = tmp12 - tmp15;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-    tmp13 = z4 << CONST_BITS;
-
-    tmp14 = z1 + z3;
-    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
-    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
-    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
-    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
-    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
-    z1    -= z2;
-    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
-    tmp16 += tmp15;
-    z1    += z4;
-    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
-    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
-    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
-    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
-    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
-    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
-
-    tmp13 = (z1 - z3) << PASS1_BITS;
-
-    /* Final output stage */
-
-    wsptr[7*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[7*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[7*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[7*3]  = (int) (tmp23 + tmp13);
-    wsptr[7*10] = (int) (tmp23 - tmp13);
-    wsptr[7*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[7*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[7*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[7*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[7*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
-    wsptr[7*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 14 rows from work array, store into output array.
-   * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 14; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp23 = (INT32) wsptr[0] +
-	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	       (ONE << (PASS1_BITS+2)));
-    tmp23 <<= CONST_BITS;
-
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[4];
-    z3 = (INT32) wsptr[6];
-
-    tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
-    tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
-    tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
-    tmp10 = z1 + z3;
-    z2 -= tmp10;
-    tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
-    tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
-    tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
-    tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-
-    tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
-    tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
-    tmp10 = tmp11 - tmp12;
-    tmp11 += tmp12;
-    tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
-    tmp11 += tmp12;
-    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
-    tmp10 += z2;
-    tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 7;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 6x12 output block.
- *
- * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
-  INT32 z1, z2, z3, z4;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[6*12];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
-
-    tmp10 = z3 + z4;
-    tmp11 = z3 - z4;
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
-    z1 <<= CONST_BITS;
-    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    z2 <<= CONST_BITS;
-
-    tmp12 = z1 - z2;
-
-    tmp21 = z3 + tmp12;
-    tmp24 = z3 - tmp12;
-
-    tmp12 = z4 + z2;
-
-    tmp20 = tmp10 + tmp12;
-    tmp25 = tmp10 - tmp12;
-
-    tmp12 = z4 - z1 - z2;
-
-    tmp22 = tmp11 + tmp12;
-    tmp23 = tmp11 - tmp12;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
-    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
-
-    tmp10 = z1 + z3;
-    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
-    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
-    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
-    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
-    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
-    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
-    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
-	     MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
-
-    z1 -= z4;
-    z2 -= z3;
-    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
-    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
-    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
-
-    /* Final output stage */
-
-    wsptr[6*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[6*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[6*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[6*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
-    wsptr[6*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[6*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[6*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[6*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[6*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
-    wsptr[6*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 12 rows from work array, store into output array.
-   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 12; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp10 = (INT32) wsptr[0] +
-	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	       (ONE << (PASS1_BITS+2)));
-    tmp10 <<= CONST_BITS;
-    tmp12 = (INT32) wsptr[4];
-    tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
-    tmp11 = tmp10 + tmp20;
-    tmp21 = tmp10 - tmp20 - tmp20;
-    tmp20 = (INT32) wsptr[2];
-    tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
-    tmp20 = tmp11 + tmp10;
-    tmp22 = tmp11 - tmp10;
-
-    /* Odd part */
-
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
-    tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
-    tmp11 = (z1 - z2 - z3) << CONST_BITS;
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 6;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 5x10 output block.
- *
- * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		JCOEFPTR coef_block,
-		JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
-  INT32 z1, z2, z3, z4, z5;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[5*10];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
-    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z2;
-
-    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
-			CONST_BITS-PASS1_BITS);
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
-    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
-    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
-
-    tmp20 = tmp10 + tmp12;
-    tmp24 = tmp10 - tmp12;
-    tmp21 = tmp11 + tmp13;
-    tmp23 = tmp11 - tmp13;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-
-    tmp11 = z2 + z4;
-    tmp13 = z2 - z4;
-
-    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
-    z5 = z3 << CONST_BITS;
-
-    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
-    z4 = z5 + tmp12;
-
-    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
-    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
-
-    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
-    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
-
-    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
-
-    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
-    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
-
-    /* Final output stage */
-
-    wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
-    wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
-    wsptr[5*2] = (int) (tmp22 + tmp12);
-    wsptr[5*7] = (int) (tmp22 - tmp12);
-    wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
-    wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
-    wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 10 rows from work array, store into output array.
-   * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 10; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp12 = (INT32) wsptr[0] +
-	      ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	       (ONE << (PASS1_BITS+2)));
-    tmp12 <<= CONST_BITS;
-    tmp13 = (INT32) wsptr[2];
-    tmp14 = (INT32) wsptr[4];
-    z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
-    z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
-    z3 = tmp12 + z2;
-    tmp10 = z3 + z1;
-    tmp11 = z3 - z1;
-    tmp12 -= z2 << 2;
-
-    /* Odd part */
-
-    z2 = (INT32) wsptr[1];
-    z3 = (INT32) wsptr[3];
-
-    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
-    tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
-    tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 5;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 4x8 output block.
- *
- * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[4*8];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * Note results are scaled up by sqrt(8) compared to a true IDCT;
-   * furthermore, we scale the results by 2**PASS1_BITS.
-   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 4; ctr > 0; ctr--) {
-    /* Due to quantization, we will usually find that many of the input
-     * coefficients are zero, especially the AC terms.  We can exploit this
-     * by short-circuiting the IDCT calculation for any column in which all
-     * the AC terms are zero.  In that case each output is equal to the
-     * DC coefficient (with scale factor as needed).
-     * With typical images and quantization tables, half or more of the
-     * column DCT calculations can be simplified this way.
-     */
-
-    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
-      /* AC terms all zero */
-      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
-
-      wsptr[4*0] = dcval;
-      wsptr[4*1] = dcval;
-      wsptr[4*2] = dcval;
-      wsptr[4*3] = dcval;
-      wsptr[4*4] = dcval;
-      wsptr[4*5] = dcval;
-      wsptr[4*6] = dcval;
-      wsptr[4*7] = dcval;
-
-      inptr++;			/* advance pointers to next column */
-      quantptr++;
-      wsptr++;
-      continue;
-    }
-
-    /* Even part: reverse the even part of the forward DCT.
-     * The rotator is c(-6).
-     */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    z2 <<= CONST_BITS;
-    z3 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    z2 += ONE << (CONST_BITS-PASS1_BITS-1);
-
-    tmp0 = z2 + z3;
-    tmp1 = z2 - z3;
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
-
-    tmp10 = tmp0 + tmp2;
-    tmp13 = tmp0 - tmp2;
-    tmp11 = tmp1 + tmp3;
-    tmp12 = tmp1 - tmp3;
-
-    /* Odd part per figure 8; the matrix is unitary and hence its
-     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
-     */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-
-    z2 = tmp0 + tmp2;
-    z3 = tmp1 + tmp3;
-
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
-    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
-    z2 += z1;
-    z3 += z1;
-
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
-    tmp0 += z1 + z2;
-    tmp3 += z1 + z3;
-
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
-    tmp1 += z1 + z3;
-    tmp2 += z1 + z2;
-
-    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-
-    wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
-    wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
-    wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
-
-    inptr++;			/* advance pointers to next column */
-    quantptr++;
-    wsptr++;
-  }
-
-  /* Pass 2: process 8 rows from work array, store into output array.
-   * 4-point IDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 8; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp2 = (INT32) wsptr[2];
-
-    tmp10 = (tmp0 + tmp2) << CONST_BITS;
-    tmp12 = (tmp0 - tmp2) << CONST_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = (INT32) wsptr[1];
-    z3 = (INT32) wsptr[3];
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
-    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
-    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 4;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 3x6 output block.
- *
- * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  int * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  int workspace[3*6];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp0 <<= CONST_BITS;
-    /* Add fudge factor here for final descale. */
-    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
-    tmp1 = tmp0 + tmp10;
-    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
-    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
-    tmp10 = tmp1 + tmp0;
-    tmp12 = tmp1 - tmp0;
-
-    /* Odd part */
-
-    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
-    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
-    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
-    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
-
-    /* Final output stage */
-
-    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
-    wsptr[3*1] = (int) (tmp11 + tmp1);
-    wsptr[3*4] = (int) (tmp11 - tmp1);
-    wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
-    wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
-  }
-
-  /* Pass 2: process 6 rows from work array, store into output array.
-   * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
-   */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 6; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp0 = (INT32) wsptr[0] +
-	     ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
-	      (ONE << (PASS1_BITS+2)));
-    tmp0 <<= CONST_BITS;
-    tmp2 = (INT32) wsptr[2];
-    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
-    tmp10 = tmp0 + tmp12;
-    tmp2 = tmp0 - tmp12 - tmp12;
-
-    /* Odd part */
-
-    tmp12 = (INT32) wsptr[1];
-    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
-					      CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 3;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 2x4 output block.
- *
- * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  INT32 tmp0, tmp2, tmp10, tmp12;
-  INT32 z1, z2, z3;
-  JCOEFPTR inptr;
-  ISLOW_MULT_TYPE * quantptr;
-  INT32 * wsptr;
-  JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  int ctr;
-  INT32 workspace[2*4];	/* buffers data between passes */
-  SHIFT_TEMPS
-
-  /* Pass 1: process columns from input, store into work array.
-   * 4-point IDCT kernel,
-   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
-   */
-
-  inptr = coef_block;
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-  wsptr = workspace;
-  for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
-    /* Even part */
-
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-
-    tmp10 = (tmp0 + tmp2) << CONST_BITS;
-    tmp12 = (tmp0 - tmp2) << CONST_BITS;
-
-    /* Odd part */
-    /* Same rotation as in the even part of the 8x8 LL&M IDCT */
-
-    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
-    tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
-    tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
-
-    /* Final output stage */
-
-    wsptr[2*0] = tmp10 + tmp0;
-    wsptr[2*3] = tmp10 - tmp0;
-    wsptr[2*1] = tmp12 + tmp2;
-    wsptr[2*2] = tmp12 - tmp2;
-  }
-
-  /* Pass 2: process 4 rows from work array, store into output array. */
-
-  wsptr = workspace;
-  for (ctr = 0; ctr < 4; ctr++) {
-    outptr = output_buf[ctr] + output_col;
-
-    /* Even part */
-
-    /* Add range center and fudge factor for final descale and range-limit. */
-    tmp10 = wsptr[0] +
-	      ((((INT32) RANGE_CENTER) << (CONST_BITS+3)) +
-	       (ONE << (CONST_BITS+2)));
-
-    /* Odd part */
-
-    tmp0 = wsptr[1];
-
-    /* Final output stage */
-
-    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3)
-			    & RANGE_MASK];
-
-    wsptr += 2;		/* advance pointer to next row */
-  }
-}
-
-
-/*
- * Perform dequantization and inverse DCT on one block of coefficients,
- * producing a 1x2 output block.
- *
- * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
- */
-
-GLOBAL(void)
-jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
-{
-  DCTELEM tmp0, tmp1;
-  ISLOW_MULT_TYPE * quantptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
-  ISHIFT_TEMPS
-
-  /* Process 1 column from input, store into output array. */
-
-  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
-
-  /* Even part */
-
-  tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
-  /* Add range center and fudge factor for final descale and range-limit. */
-  tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
-
-  /* Odd part */
-
-  tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
-
-  /* Final output stage */
-
-  output_buf[0][output_col] =
-    range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
-  output_buf[1][output_col] =
-    range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
-}
-
-#endif /* IDCT_SCALING_SUPPORTED */
-#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jinclude.h b/cmake/externals/libjpeg/jinclude.h
deleted file mode 100644
index 12ea8cd2f..000000000
--- a/cmake/externals/libjpeg/jinclude.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * jinclude.h
- *
- * Copyright (C) 1991-1994, Thomas G. Lane.
- * Modified 2017-2022 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file exists to provide a single place to fix any problems with
- * including the wrong system include files.  (Common problems are taken
- * care of by the standard jconfig symbols, but on really weird systems
- * you may have to edit this file.)
- *
- * NOTE: this file is NOT intended to be included by applications using
- * the JPEG library.  Most applications need only include jpeglib.h.
- */
-
-
-/* Include auto-config file to find out which system include files we need. */
-
-#include "jconfig.h"		/* auto configuration options */
-#define JCONFIG_INCLUDED	/* so that jpeglib.h doesn't do it again */
-
-/*
- * We need the NULL macro and size_t typedef.
- * On an ANSI-conforming system it is sufficient to include <stddef.h>.
- * Otherwise, we get them from <stdlib.h> or <stdio.h>; we may have to
- * pull in <sys/types.h> as well.
- * Note that the core JPEG library does not require <stdio.h>;
- * only the default error handler and data source/destination modules do.
- * But we must pull it in because of the references to FILE in jpeglib.h.
- * You can remove those references if you want to compile without <stdio.h>.
- */
-
-#ifdef HAVE_STDDEF_H
-#include <stddef.h>
-#endif
-
-#ifdef HAVE_STDLIB_H
-#include <stdlib.h>
-#endif
-
-#ifdef NEED_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-
-#include <stdio.h>
-
-/*
- * We need memory copying and zeroing functions, plus strncpy().
- * ANSI and System V implementations declare these in <string.h>.
- * BSD doesn't have the mem() functions, but it does have bcopy()/bzero().
- * Some systems may declare memset and memcpy in <memory.h>.
- *
- * NOTE: we assume the size parameters to these functions are of type size_t.
- * Change the casts in these macros if not!
- */
-
-#ifdef NEED_BSD_STRINGS
-
-#include <strings.h>
-#define MEMZERO(target,size)	bzero((void *)(target), (size_t)(size))
-#define MEMCOPY(dest,src,size)	bcopy((const void *)(src), (void *)(dest), (size_t)(size))
-
-#else /* not BSD, assume ANSI/SysV string lib */
-
-#include <string.h>
-#define MEMZERO(target,size)	memset((void *)(target), 0, (size_t)(size))
-#define MEMCOPY(dest,src,size)	memcpy((void *)(dest), (const void *)(src), (size_t)(size))
-
-#endif
-
-/*
- * In ANSI C, and indeed any rational implementation, size_t is also the
- * type returned by sizeof().  However, it seems there are some irrational
- * implementations out there, in which sizeof() returns an int even though
- * size_t is defined as long or unsigned long.  To ensure consistent results
- * we always use this SIZEOF() macro in place of using sizeof() directly.
- */
-
-#define SIZEOF(object)	((size_t) sizeof(object))
-
-/*
- * The modules that use fread() and fwrite() always invoke them through
- * these macros.  On some systems you may need to twiddle the argument casts.
- * CAUTION: argument order is different from underlying functions!
- *
- * Furthermore, macros are provided for fflush() and ferror() in order
- * to facilitate adaption by applications using an own FILE class.
- *
- * You can define your own custom file I/O functions in jconfig.h and
- * #define JPEG_HAVE_FILE_IO_CUSTOM there to prevent redefinition here.
- *
- * You can #define JPEG_USE_FILE_IO_CUSTOM in jconfig.h to use custom file
- * I/O functions implemented in Delphi VCL (Visual Component Library)
- * in Vcl.Imaging.jpeg.pas for the TJPEGImage component utilizing
- * the Delphi RTL (Run-Time Library) TMemoryStream component:
- *
- *   procedure jpeg_stdio_src(var cinfo: jpeg_decompress_struct;
- *     input_file: TStream); external;
- *
- *   procedure jpeg_stdio_dest(var cinfo: jpeg_compress_struct;
- *     output_file: TStream); external;
- *
- *   function jfread(var buf; recsize, reccount: Integer; S: TStream): Integer;
- *   begin
- *     Result := S.Read(buf, recsize * reccount);
- *   end;
- *
- *   function jfwrite(const buf; recsize, reccount: Integer; S: TStream): Integer;
- *   begin
- *     Result := S.Write(buf, recsize * reccount);
- *   end;
- *
- *   function jfflush(S: TStream): Integer;
- *   begin
- *     Result := 0;
- *   end;
- *
- *   function jferror(S: TStream): Integer;
- *   begin
- *     Result := 0;
- *   end;
- *
- * TMemoryStream of Delphi RTL has the distinctive feature to provide dynamic
- * memory buffer management with a file/stream-based interface, particularly for
- * the write (output) operation, which is easier to apply compared with direct
- * implementations as given in jdatadst.c for memory destination.  Those direct
- * implementations of dynamic memory write tend to be more difficult to use,
- * so providing an option like TMemoryStream may be a useful alternative.
- *
- * The CFile/CMemFile classes of the Microsoft Foundation Class (MFC) Library
- * may be used in a similar fashion.
- */
-
-#ifndef JPEG_HAVE_FILE_IO_CUSTOM
-#ifdef JPEG_USE_FILE_IO_CUSTOM
-extern size_t jfread(void * __ptr, size_t __size, size_t __n, FILE * __stream);
-extern size_t jfwrite(const void * __ptr, size_t __size, size_t __n, FILE * __stream);
-extern int    jfflush(FILE * __stream);
-extern int    jferror(FILE * __fp);
-
-#define JFREAD(file,buf,sizeofbuf)  \
-  ((size_t) jfread((void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
-#define JFWRITE(file,buf,sizeofbuf)  \
-  ((size_t) jfwrite((const void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
-#define JFFLUSH(file)	jfflush(file)
-#define JFERROR(file)	jferror(file)
-#else
-#define JFREAD(file,buf,sizeofbuf)  \
-  ((size_t) fread((void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
-#define JFWRITE(file,buf,sizeofbuf)  \
-  ((size_t) fwrite((const void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
-#define JFFLUSH(file)	fflush(file)
-#define JFERROR(file)	ferror(file)
-#endif
-#endif
diff --git a/cmake/externals/libjpeg/jmemansi.c b/cmake/externals/libjpeg/jmemansi.c
deleted file mode 100644
index 2d93e4962..000000000
--- a/cmake/externals/libjpeg/jmemansi.c
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * jmemansi.c
- *
- * Copyright (C) 1992-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file provides a simple generic implementation of the system-
- * dependent portion of the JPEG memory manager.  This implementation
- * assumes that you have the ANSI-standard library routine tmpfile().
- * Also, the problem of determining the amount of memory available
- * is shoved onto the user.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jmemsys.h"		/* import the system-dependent declarations */
-
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
-extern void * malloc JPP((size_t size));
-extern void free JPP((void *ptr));
-#endif
-
-#ifndef SEEK_SET		/* pre-ANSI systems may not define this; */
-#define SEEK_SET  0		/* if not, assume 0 is correct */
-#endif
-
-
-/*
- * Memory allocation and freeing are controlled by the regular library
- * routines malloc() and free().
- */
-
-GLOBAL(void *)
-jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
-{
-  return (void *) malloc(sizeofobject);
-}
-
-GLOBAL(void)
-jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
-{
-  free(object);
-}
-
-
-/*
- * "Large" objects are treated the same as "small" ones.
- * NB: although we include FAR keywords in the routine declarations,
- * this file won't actually work in 80x86 small/medium model; at least,
- * you probably won't be able to process useful-size images in only 64KB.
- */
-
-GLOBAL(void FAR *)
-jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
-{
-  return (void FAR *) malloc(sizeofobject);
-}
-
-GLOBAL(void)
-jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
-{
-  free(object);
-}
-
-
-/*
- * This routine computes the total memory space available for allocation.
- * It's impossible to do this in a portable way; our current solution is
- * to make the user tell us (with a default value set at compile time).
- * If you can actually get the available space, it's a good idea to subtract
- * a slop factor of 5% or so.
- */
-
-#ifndef DEFAULT_MAX_MEM		/* so can override from makefile */
-#define DEFAULT_MAX_MEM		1000000L /* default: one megabyte */
-#endif
-
-GLOBAL(long)
-jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
-		    long max_bytes_needed, long already_allocated)
-{
-  return cinfo->mem->max_memory_to_use - already_allocated;
-}
-
-
-/*
- * Backing store (temporary file) management.
- * Backing store objects are only used when the value returned by
- * jpeg_mem_available is less than the total space needed.  You can dispense
- * with these routines if you have plenty of virtual memory; see jmemnobs.c.
- */
-
-
-METHODDEF(void)
-read_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-		    void FAR * buffer_address,
-		    long file_offset, long byte_count)
-{
-  if (fseek(info->temp_file, file_offset, SEEK_SET))
-    ERREXIT(cinfo, JERR_TFILE_SEEK);
-  if (JFREAD(info->temp_file, buffer_address, byte_count)
-      != (size_t) byte_count)
-    ERREXIT(cinfo, JERR_TFILE_READ);
-}
-
-
-METHODDEF(void)
-write_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-		     void FAR * buffer_address,
-		     long file_offset, long byte_count)
-{
-  if (fseek(info->temp_file, file_offset, SEEK_SET))
-    ERREXIT(cinfo, JERR_TFILE_SEEK);
-  if (JFWRITE(info->temp_file, buffer_address, byte_count)
-      != (size_t) byte_count)
-    ERREXIT(cinfo, JERR_TFILE_WRITE);
-}
-
-
-METHODDEF(void)
-close_backing_store (j_common_ptr cinfo, backing_store_ptr info)
-{
-  fclose(info->temp_file);
-  /* Since this implementation uses tmpfile() to create the file,
-   * no explicit file deletion is needed.
-   */
-}
-
-
-/*
- * Initial opening of a backing-store object.
- *
- * This version uses tmpfile(), which constructs a suitable file name
- * behind the scenes.  We don't have to use info->temp_name[] at all;
- * indeed, we can't even find out the actual name of the temp file.
- */
-
-GLOBAL(void)
-jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-			 long total_bytes_needed)
-{
-  if ((info->temp_file = tmpfile()) == NULL)
-    ERREXITS(cinfo, JERR_TFILE_CREATE, "");
-  info->read_backing_store = read_backing_store;
-  info->write_backing_store = write_backing_store;
-  info->close_backing_store = close_backing_store;
-}
-
-
-/*
- * These routines take care of any system-dependent initialization and
- * cleanup required.
- */
-
-GLOBAL(long)
-jpeg_mem_init (j_common_ptr cinfo)
-{
-  return DEFAULT_MAX_MEM;	/* default for max_memory_to_use */
-}
-
-GLOBAL(void)
-jpeg_mem_term (j_common_ptr cinfo)
-{
-  /* no work */
-}
diff --git a/cmake/externals/libjpeg/jmemmgr.c b/cmake/externals/libjpeg/jmemmgr.c
deleted file mode 100644
index 40377de47..000000000
--- a/cmake/externals/libjpeg/jmemmgr.c
+++ /dev/null
@@ -1,1115 +0,0 @@
-/*
- * jmemmgr.c
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2011-2019 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains the JPEG system-independent memory management
- * routines.  This code is usable across a wide variety of machines; most
- * of the system dependencies have been isolated in a separate file.
- * The major functions provided here are:
- *   * pool-based allocation and freeing of memory;
- *   * policy decisions about how to divide available memory among the
- *     virtual arrays;
- *   * control logic for swapping virtual arrays between main memory and
- *     backing storage.
- * The separate system-dependent file provides the actual backing-storage
- * access code, and it contains the policy decision about how much total
- * main memory to use.
- * This file is system-dependent in the sense that some of its functions
- * are unnecessary in some systems.  For example, if there is enough virtual
- * memory so that backing storage will never be used, much of the virtual
- * array control logic could be removed.  (Of course, if you have that much
- * memory then you shouldn't care about a little bit of unused code...)
- */
-
-#define JPEG_INTERNALS
-#define AM_MEMORY_MANAGER	/* we define jvirt_Xarray_control structs */
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jmemsys.h"		/* import the system-dependent declarations */
-
-#ifndef NO_GETENV
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare getenv() */
-extern char * getenv JPP((const char * name));
-#endif
-#endif
-
-
-/*
- * Some important notes:
- *   The allocation routines provided here must never return NULL.
- *   They should exit to error_exit if unsuccessful.
- *
- *   It's not a good idea to try to merge the sarray and barray routines,
- *   even though they are textually almost the same, because samples are
- *   usually stored as bytes while coefficients are shorts or ints.  Thus,
- *   in machines where byte pointers have a different representation from
- *   word pointers, the resulting machine code could not be the same.
- */
-
-
-/*
- * Many machines require storage alignment: longs must start on 4-byte
- * boundaries, doubles on 8-byte boundaries, etc.  On such machines, malloc()
- * always returns pointers that are multiples of the worst-case alignment
- * requirement, and we had better do so too.
- * There isn't any really portable way to determine the worst-case alignment
- * requirement.  This module assumes that the alignment requirement is
- * multiples of sizeof(ALIGN_TYPE).
- * By default, we define ALIGN_TYPE as double.  This is necessary on some
- * workstations (where doubles really do need 8-byte alignment) and will work
- * fine on nearly everything.  If your machine has lesser alignment needs,
- * you can save a few bytes by making ALIGN_TYPE smaller.
- * The only place I know of where this will NOT work is certain Macintosh
- * 680x0 compilers that define double as a 10-byte IEEE extended float.
- * Doing 10-byte alignment is counterproductive because longwords won't be
- * aligned well.  Put "#define ALIGN_TYPE long" in jconfig.h if you have
- * such a compiler.
- */
-
-#ifndef ALIGN_TYPE		/* so can override from jconfig.h */
-#define ALIGN_TYPE  double
-#endif
-
-
-/*
- * We allocate objects from "pools", where each pool is gotten with a single
- * request to jpeg_get_small() or jpeg_get_large().  There is no per-object
- * overhead within a pool, except for alignment padding.  Each pool has a
- * header with a link to the next pool of the same class.
- * Small and large pool headers are identical except that the latter's
- * link pointer must be FAR on 80x86 machines.
- * Notice that the "real" header fields are union'ed with a dummy ALIGN_TYPE
- * field.  This forces the compiler to make SIZEOF(small_pool_hdr) a multiple
- * of the alignment requirement of ALIGN_TYPE.
- */
-
-typedef union small_pool_struct * small_pool_ptr;
-
-typedef union small_pool_struct {
-  struct {
-    small_pool_ptr next;	/* next in list of pools */
-    size_t bytes_used;		/* how many bytes already used within pool */
-    size_t bytes_left;		/* bytes still available in this pool */
-  } hdr;
-  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
-} small_pool_hdr;
-
-typedef union large_pool_struct FAR * large_pool_ptr;
-
-typedef union large_pool_struct {
-  struct {
-    large_pool_ptr next;	/* next in list of pools */
-    size_t bytes_used;		/* how many bytes already used within pool */
-    size_t bytes_left;		/* bytes still available in this pool */
-  } hdr;
-  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
-} large_pool_hdr;
-
-
-/*
- * Here is the full definition of a memory manager object.
- */
-
-typedef struct {
-  struct jpeg_memory_mgr pub;	/* public fields */
-
-  /* Each pool identifier (lifetime class) names a linked list of pools. */
-  small_pool_ptr small_list[JPOOL_NUMPOOLS];
-  large_pool_ptr large_list[JPOOL_NUMPOOLS];
-
-  /* Since we only have one lifetime class of virtual arrays, only one
-   * linked list is necessary (for each datatype).  Note that the virtual
-   * array control blocks being linked together are actually stored somewhere
-   * in the small-pool list.
-   */
-  jvirt_sarray_ptr virt_sarray_list;
-  jvirt_barray_ptr virt_barray_list;
-
-  /* This counts total space obtained from jpeg_get_small/large */
-  size_t total_space_allocated;
-
-  /* alloc_sarray and alloc_barray set this value for use by virtual
-   * array routines.
-   */
-  JDIMENSION last_rowsperchunk;	/* from most recent alloc_sarray/barray */
-} my_memory_mgr;
-
-typedef my_memory_mgr * my_mem_ptr;
-
-
-/*
- * The control blocks for virtual arrays.
- * Note that these blocks are allocated in the "small" pool area.
- * System-dependent info for the associated backing store (if any) is hidden
- * inside the backing_store_info struct.
- */
-
-struct jvirt_sarray_control {
-  JSAMPARRAY mem_buffer;	/* => the in-memory buffer */
-  JDIMENSION rows_in_array;	/* total virtual array height */
-  JDIMENSION samplesperrow;	/* width of array (and of memory buffer) */
-  JDIMENSION maxaccess;		/* max rows accessed by access_virt_sarray */
-  JDIMENSION rows_in_mem;	/* height of memory buffer */
-  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
-  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
-  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
-  boolean pre_zero;		/* pre-zero mode requested? */
-  boolean dirty;		/* do current buffer contents need written? */
-  boolean b_s_open;		/* is backing-store data valid? */
-  jvirt_sarray_ptr next;	/* link to next virtual sarray control block */
-  backing_store_info b_s_info;	/* System-dependent control info */
-};
-
-struct jvirt_barray_control {
-  JBLOCKARRAY mem_buffer;	/* => the in-memory buffer */
-  JDIMENSION rows_in_array;	/* total virtual array height */
-  JDIMENSION blocksperrow;	/* width of array (and of memory buffer) */
-  JDIMENSION maxaccess;		/* max rows accessed by access_virt_barray */
-  JDIMENSION rows_in_mem;	/* height of memory buffer */
-  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
-  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
-  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
-  boolean pre_zero;		/* pre-zero mode requested? */
-  boolean dirty;		/* do current buffer contents need written? */
-  boolean b_s_open;		/* is backing-store data valid? */
-  jvirt_barray_ptr next;	/* link to next virtual barray control block */
-  backing_store_info b_s_info;	/* System-dependent control info */
-};
-
-
-#ifdef MEM_STATS		/* optional extra stuff for statistics */
-
-LOCAL(void)
-print_mem_stats (j_common_ptr cinfo, int pool_id)
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  small_pool_ptr shdr_ptr;
-  large_pool_ptr lhdr_ptr;
-
-  /* Since this is only a debugging stub, we can cheat a little by using
-   * fprintf directly rather than going through the trace message code.
-   * This is helpful because message parm array can't handle longs.
-   */
-  fprintf(stderr, "Freeing pool %d, total space = %ld\n",
-	  pool_id, (long) mem->total_space_allocated);
-
-  for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL;
-       lhdr_ptr = lhdr_ptr->hdr.next) {
-    fprintf(stderr, "  Large chunk used %ld\n",
-	    (long) lhdr_ptr->hdr.bytes_used);
-  }
-
-  for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL;
-       shdr_ptr = shdr_ptr->hdr.next) {
-    fprintf(stderr, "  Small chunk used %ld free %ld\n",
-	    (long) shdr_ptr->hdr.bytes_used,
-	    (long) shdr_ptr->hdr.bytes_left);
-  }
-}
-
-#endif /* MEM_STATS */
-
-
-LOCAL(noreturn_t)
-out_of_memory (j_common_ptr cinfo, int which)
-/* Report an out-of-memory error and stop execution */
-/* If we compiled MEM_STATS support, report alloc requests before dying */
-{
-#ifdef MEM_STATS
-  cinfo->err->trace_level = 2;	/* force self_destruct to report stats */
-#endif
-  ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, which);
-}
-
-
-/*
- * Allocation of "small" objects.
- *
- * For these, we use pooled storage.  When a new pool must be created,
- * we try to get enough space for the current request plus a "slop" factor,
- * where the slop will be the amount of leftover space in the new pool.
- * The speed vs. space tradeoff is largely determined by the slop values.
- * A different slop value is provided for each pool class (lifetime),
- * and we also distinguish the first pool of a class from later ones.
- * NOTE: the values given work fairly well on both 16- and 32-bit-int
- * machines, but may be too small if longs are 64 bits or more.
- */
-
-static const size_t first_pool_slop[JPOOL_NUMPOOLS] = 
-{
-	1600,			/* first PERMANENT pool */
-	16000			/* first IMAGE pool */
-};
-
-static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = 
-{
-	0,			/* additional PERMANENT pools */
-	5000			/* additional IMAGE pools */
-};
-
-#define MIN_SLOP  50		/* greater than 0 to avoid futile looping */
-
-
-METHODDEF(void *)
-alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
-/* Allocate a "small" object */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  small_pool_ptr hdr_ptr, prev_hdr_ptr;
-  size_t odd_bytes, min_request, slop;
-  char * data_ptr;
-
-  /* Check for unsatisfiable request (do now to ensure no overflow below) */
-  if (sizeofobject > (size_t) MAX_ALLOC_CHUNK - SIZEOF(small_pool_hdr))
-    out_of_memory(cinfo, 1);	/* request exceeds malloc's ability */
-
-  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
-  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
-  if (odd_bytes > 0)
-    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
-
-  /* See if space is available in any existing pool */
-  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
-  prev_hdr_ptr = NULL;
-  hdr_ptr = mem->small_list[pool_id];
-  while (hdr_ptr != NULL) {
-    if (hdr_ptr->hdr.bytes_left >= sizeofobject)
-      break;			/* found pool with enough space */
-    prev_hdr_ptr = hdr_ptr;
-    hdr_ptr = hdr_ptr->hdr.next;
-  }
-
-  /* Time to make a new pool? */
-  if (hdr_ptr == NULL) {
-    /* min_request is what we need now, slop is what will be leftover */
-    min_request = sizeofobject + SIZEOF(small_pool_hdr);
-    if (prev_hdr_ptr == NULL)	/* first pool in class? */
-      slop = first_pool_slop[pool_id];
-    else
-      slop = extra_pool_slop[pool_id];
-    /* Don't ask for more than MAX_ALLOC_CHUNK */
-    if (slop > (size_t) MAX_ALLOC_CHUNK - min_request)
-      slop = (size_t) MAX_ALLOC_CHUNK - min_request;
-    /* Try to get space, if fail reduce slop and try again */
-    for (;;) {
-      hdr_ptr = (small_pool_ptr) jpeg_get_small(cinfo, min_request + slop);
-      if (hdr_ptr != NULL)
-	break;
-      slop /= 2;
-      if (slop < MIN_SLOP)	/* give up when it gets real small */
-	out_of_memory(cinfo, 2); /* jpeg_get_small failed */
-    }
-    mem->total_space_allocated += min_request + slop;
-    /* Success, initialize the new pool header and add to end of list */
-    hdr_ptr->hdr.next = NULL;
-    hdr_ptr->hdr.bytes_used = 0;
-    hdr_ptr->hdr.bytes_left = sizeofobject + slop;
-    if (prev_hdr_ptr == NULL)	/* first pool in class? */
-      mem->small_list[pool_id] = hdr_ptr;
-    else
-      prev_hdr_ptr->hdr.next = hdr_ptr;
-  }
-
-  /* OK, allocate the object from the current pool */
-  data_ptr = (char *) (hdr_ptr + 1); /* point to first data byte in pool */
-  data_ptr += hdr_ptr->hdr.bytes_used; /* point to place for object */
-  hdr_ptr->hdr.bytes_used += sizeofobject;
-  hdr_ptr->hdr.bytes_left -= sizeofobject;
-
-  return (void *) data_ptr;
-}
-
-
-/*
- * Allocation of "large" objects.
- *
- * The external semantics of these are the same as "small" objects,
- * except that FAR pointers are used on 80x86.  However the pool
- * management heuristics are quite different.  We assume that each
- * request is large enough that it may as well be passed directly to
- * jpeg_get_large; the pool management just links everything together
- * so that we can free it all on demand.
- * Note: the major use of "large" objects is in JSAMPARRAY and JBLOCKARRAY
- * structures.  The routines that create these structures (see below)
- * deliberately bunch rows together to ensure a large request size.
- */
-
-METHODDEF(void FAR *)
-alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
-/* Allocate a "large" object */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  large_pool_ptr hdr_ptr;
-  size_t odd_bytes;
-
-  /* Check for unsatisfiable request (do now to ensure no overflow below) */
-  if (sizeofobject > (size_t) MAX_ALLOC_CHUNK - SIZEOF(large_pool_hdr))
-    out_of_memory(cinfo, 3);	/* request exceeds malloc's ability */
-
-  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
-  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
-  if (odd_bytes > 0)
-    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
-
-  /* Always make a new pool */
-  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
-
-  hdr_ptr = (large_pool_ptr) jpeg_get_large(cinfo, sizeofobject +
-					    SIZEOF(large_pool_hdr));
-  if (hdr_ptr == NULL)
-    out_of_memory(cinfo, 4);	/* jpeg_get_large failed */
-  mem->total_space_allocated += sizeofobject + SIZEOF(large_pool_hdr);
-
-  /* Success, initialize the new pool header and add to list */
-  hdr_ptr->hdr.next = mem->large_list[pool_id];
-  /* We maintain space counts in each pool header for statistical purposes,
-   * even though they are not needed for allocation.
-   */
-  hdr_ptr->hdr.bytes_used = sizeofobject;
-  hdr_ptr->hdr.bytes_left = 0;
-  mem->large_list[pool_id] = hdr_ptr;
-
-  return (void FAR *) (hdr_ptr + 1); /* point to first data byte in pool */
-}
-
-
-/*
- * Creation of 2-D sample arrays.
- * The pointers are in near heap, the samples themselves in FAR heap.
- *
- * To minimize allocation overhead and to allow I/O of large contiguous
- * blocks, we allocate the sample rows in groups of as many rows as possible
- * without exceeding MAX_ALLOC_CHUNK total bytes per allocation request.
- * NB: the virtual array control routines, later in this file, know about
- * this chunking of rows.  The rowsperchunk value is left in the mem manager
- * object so that it can be saved away if this sarray is the workspace for
- * a virtual array.
- */
-
-METHODDEF(JSAMPARRAY)
-alloc_sarray (j_common_ptr cinfo, int pool_id,
-	      JDIMENSION samplesperrow, JDIMENSION numrows)
-/* Allocate a 2-D sample array */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  JSAMPARRAY result;
-  JSAMPROW workspace;
-  JDIMENSION rowsperchunk, currow, i;
-  long ltemp;
-
-  /* Calculate max # of rows allowed in one allocation chunk */
-  ltemp = (MAX_ALLOC_CHUNK - SIZEOF(large_pool_hdr)) /
-	  ((long) samplesperrow * SIZEOF(JSAMPLE));
-  if (ltemp <= 0)
-    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
-  if (ltemp < (long) numrows)
-    rowsperchunk = (JDIMENSION) ltemp;
-  else
-    rowsperchunk = numrows;
-  mem->last_rowsperchunk = rowsperchunk;
-
-  /* Get space for row pointers (small object) */
-  result = (JSAMPARRAY) alloc_small(cinfo, pool_id,
-				    (size_t) numrows * SIZEOF(JSAMPROW));
-
-  /* Get the rows themselves (large objects) */
-  currow = 0;
-  while (currow < numrows) {
-    rowsperchunk = MIN(rowsperchunk, numrows - currow);
-    workspace = (JSAMPROW) alloc_large(cinfo, pool_id,
-      (size_t) rowsperchunk * (size_t) samplesperrow * SIZEOF(JSAMPLE));
-    for (i = rowsperchunk; i > 0; i--) {
-      result[currow++] = workspace;
-      workspace += samplesperrow;
-    }
-  }
-
-  return result;
-}
-
-
-/*
- * Creation of 2-D coefficient-block arrays.
- * This is essentially the same as the code for sample arrays, above.
- */
-
-METHODDEF(JBLOCKARRAY)
-alloc_barray (j_common_ptr cinfo, int pool_id,
-	      JDIMENSION blocksperrow, JDIMENSION numrows)
-/* Allocate a 2-D coefficient-block array */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  JBLOCKARRAY result;
-  JBLOCKROW workspace;
-  JDIMENSION rowsperchunk, currow, i;
-  long ltemp;
-
-  /* Calculate max # of rows allowed in one allocation chunk */
-  ltemp = (MAX_ALLOC_CHUNK - SIZEOF(large_pool_hdr)) /
-	  ((long) blocksperrow * SIZEOF(JBLOCK));
-  if (ltemp <= 0)
-    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
-  if (ltemp < (long) numrows)
-    rowsperchunk = (JDIMENSION) ltemp;
-  else
-    rowsperchunk = numrows;
-  mem->last_rowsperchunk = rowsperchunk;
-
-  /* Get space for row pointers (small object) */
-  result = (JBLOCKARRAY) alloc_small(cinfo, pool_id,
-				     (size_t) numrows * SIZEOF(JBLOCKROW));
-
-  /* Get the rows themselves (large objects) */
-  currow = 0;
-  while (currow < numrows) {
-    rowsperchunk = MIN(rowsperchunk, numrows - currow);
-    workspace = (JBLOCKROW) alloc_large(cinfo, pool_id,
-      (size_t) rowsperchunk * (size_t) blocksperrow * SIZEOF(JBLOCK));
-    for (i = rowsperchunk; i > 0; i--) {
-      result[currow++] = workspace;
-      workspace += blocksperrow;
-    }
-  }
-
-  return result;
-}
-
-
-/*
- * About virtual array management:
- *
- * The above "normal" array routines are only used to allocate strip buffers
- * (as wide as the image, but just a few rows high).  Full-image-sized buffers
- * are handled as "virtual" arrays.  The array is still accessed a strip at a
- * time, but the memory manager must save the whole array for repeated
- * accesses.  The intended implementation is that there is a strip buffer in
- * memory (as high as is possible given the desired memory limit), plus a
- * backing file that holds the rest of the array.
- *
- * The request_virt_array routines are told the total size of the image and
- * the maximum number of rows that will be accessed at once.  The in-memory
- * buffer must be at least as large as the maxaccess value.
- *
- * The request routines create control blocks but not the in-memory buffers.
- * That is postponed until realize_virt_arrays is called.  At that time the
- * total amount of space needed is known (approximately, anyway), so free
- * memory can be divided up fairly.
- *
- * The access_virt_array routines are responsible for making a specific strip
- * area accessible (after reading or writing the backing file, if necessary).
- * Note that the access routines are told whether the caller intends to modify
- * the accessed strip; during a read-only pass this saves having to rewrite
- * data to disk.  The access routines are also responsible for pre-zeroing
- * any newly accessed rows, if pre-zeroing was requested.
- *
- * In current usage, the access requests are usually for nonoverlapping
- * strips; that is, successive access start_row numbers differ by exactly
- * num_rows = maxaccess.  This means we can get good performance with simple
- * buffer dump/reload logic, by making the in-memory buffer be a multiple
- * of the access height; then there will never be accesses across bufferload
- * boundaries.  The code will still work with overlapping access requests,
- * but it doesn't handle bufferload overlaps very efficiently.
- */
-
-
-METHODDEF(jvirt_sarray_ptr)
-request_virt_sarray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
-		     JDIMENSION samplesperrow, JDIMENSION numrows,
-		     JDIMENSION maxaccess)
-/* Request a virtual 2-D sample array */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  jvirt_sarray_ptr result;
-
-  /* Only IMAGE-lifetime virtual arrays are currently supported */
-  if (pool_id != JPOOL_IMAGE)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
-
-  /* get control block */
-  result = (jvirt_sarray_ptr) alloc_small(cinfo, pool_id,
-					  SIZEOF(struct jvirt_sarray_control));
-
-  result->mem_buffer = NULL;	/* marks array not yet realized */
-  result->rows_in_array = numrows;
-  result->samplesperrow = samplesperrow;
-  result->maxaccess = maxaccess;
-  result->pre_zero = pre_zero;
-  result->b_s_open = FALSE;	/* no associated backing-store object */
-  result->next = mem->virt_sarray_list; /* add to list of virtual arrays */
-  mem->virt_sarray_list = result;
-
-  return result;
-}
-
-
-METHODDEF(jvirt_barray_ptr)
-request_virt_barray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
-		     JDIMENSION blocksperrow, JDIMENSION numrows,
-		     JDIMENSION maxaccess)
-/* Request a virtual 2-D coefficient-block array */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  jvirt_barray_ptr result;
-
-  /* Only IMAGE-lifetime virtual arrays are currently supported */
-  if (pool_id != JPOOL_IMAGE)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
-
-  /* get control block */
-  result = (jvirt_barray_ptr) alloc_small(cinfo, pool_id,
-					  SIZEOF(struct jvirt_barray_control));
-
-  result->mem_buffer = NULL;	/* marks array not yet realized */
-  result->rows_in_array = numrows;
-  result->blocksperrow = blocksperrow;
-  result->maxaccess = maxaccess;
-  result->pre_zero = pre_zero;
-  result->b_s_open = FALSE;	/* no associated backing-store object */
-  result->next = mem->virt_barray_list; /* add to list of virtual arrays */
-  mem->virt_barray_list = result;
-
-  return result;
-}
-
-
-METHODDEF(void)
-realize_virt_arrays (j_common_ptr cinfo)
-/* Allocate the in-memory buffers for any unrealized virtual arrays */
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  long bytesperrow, space_per_minheight, maximum_space;
-  long avail_mem, minheights, max_minheights;
-  jvirt_sarray_ptr sptr;
-  jvirt_barray_ptr bptr;
-
-  /* Compute the minimum space needed (maxaccess rows in each buffer)
-   * and the maximum space needed (full image height in each buffer).
-   * These may be of use to the system-dependent jpeg_mem_available routine.
-   */
-  space_per_minheight = 0;
-  maximum_space = 0;
-  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
-    if (sptr->mem_buffer == NULL) { /* if not realized yet */
-      bytesperrow = (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
-      space_per_minheight += (long) sptr->maxaccess * bytesperrow;
-      maximum_space += (long) sptr->rows_in_array * bytesperrow;
-    }
-  }
-  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
-    if (bptr->mem_buffer == NULL) { /* if not realized yet */
-      bytesperrow = (long) bptr->blocksperrow * SIZEOF(JBLOCK);
-      space_per_minheight += (long) bptr->maxaccess * bytesperrow;
-      maximum_space += (long) bptr->rows_in_array * bytesperrow;
-    }
-  }
-
-  if (space_per_minheight <= 0)
-    return;			/* no unrealized arrays, no work */
-
-  /* Determine amount of memory to actually use; this is system-dependent. */
-  avail_mem = jpeg_mem_available(cinfo, space_per_minheight, maximum_space,
-				 (long) mem->total_space_allocated);
-
-  /* If the maximum space needed is available, make all the buffers full
-   * height; otherwise parcel it out with the same number of minheights
-   * in each buffer.
-   */
-  if (avail_mem >= maximum_space)
-    max_minheights = 1000000000L;
-  else {
-    max_minheights = avail_mem / space_per_minheight;
-    /* If there doesn't seem to be enough space, try to get the minimum
-     * anyway.  This allows a "stub" implementation of jpeg_mem_available().
-     */
-    if (max_minheights <= 0)
-      max_minheights = 1;
-  }
-
-  /* Allocate the in-memory buffers and initialize backing store as needed. */
-
-  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
-    if (sptr->mem_buffer == NULL) { /* if not realized yet */
-      minheights = ((long) sptr->rows_in_array - 1L) / sptr->maxaccess + 1L;
-      if (minheights <= max_minheights) {
-	/* This buffer fits in memory */
-	sptr->rows_in_mem = sptr->rows_in_array;
-      } else {
-	/* It doesn't fit in memory, create backing store. */
-	sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess);
-	jpeg_open_backing_store(cinfo, & sptr->b_s_info,
-				(long) sptr->rows_in_array *
-				(long) sptr->samplesperrow *
-				(long) SIZEOF(JSAMPLE));
-	sptr->b_s_open = TRUE;
-      }
-      sptr->mem_buffer = alloc_sarray(cinfo, JPOOL_IMAGE,
-				      sptr->samplesperrow, sptr->rows_in_mem);
-      sptr->rowsperchunk = mem->last_rowsperchunk;
-      sptr->cur_start_row = 0;
-      sptr->first_undef_row = 0;
-      sptr->dirty = FALSE;
-    }
-  }
-
-  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
-    if (bptr->mem_buffer == NULL) { /* if not realized yet */
-      minheights = ((long) bptr->rows_in_array - 1L) / bptr->maxaccess + 1L;
-      if (minheights <= max_minheights) {
-	/* This buffer fits in memory */
-	bptr->rows_in_mem = bptr->rows_in_array;
-      } else {
-	/* It doesn't fit in memory, create backing store. */
-	bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess);
-	jpeg_open_backing_store(cinfo, & bptr->b_s_info,
-				(long) bptr->rows_in_array *
-				(long) bptr->blocksperrow *
-				(long) SIZEOF(JBLOCK));
-	bptr->b_s_open = TRUE;
-      }
-      bptr->mem_buffer = alloc_barray(cinfo, JPOOL_IMAGE,
-				      bptr->blocksperrow, bptr->rows_in_mem);
-      bptr->rowsperchunk = mem->last_rowsperchunk;
-      bptr->cur_start_row = 0;
-      bptr->first_undef_row = 0;
-      bptr->dirty = FALSE;
-    }
-  }
-}
-
-
-LOCAL(void)
-do_sarray_io (j_common_ptr cinfo, jvirt_sarray_ptr ptr, boolean writing)
-/* Do backing store read or write of a virtual sample array */
-{
-  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
-
-  bytesperrow = (long) ptr->samplesperrow * SIZEOF(JSAMPLE);
-  file_offset = (long) ptr->cur_start_row * bytesperrow;
-  /* Loop to read or write each allocation chunk in mem_buffer */
-  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
-    /* One chunk, but check for short chunk at end of buffer */
-    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
-    /* Transfer no more than is currently defined */
-    thisrow = (long) ptr->cur_start_row + i;
-    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
-    /* Transfer no more than fits in file */
-    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
-    if (rows <= 0)		/* this chunk might be past end of file! */
-      break;
-    byte_count = rows * bytesperrow;
-    if (writing)
-      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
-					    (void FAR *) ptr->mem_buffer[i],
-					    file_offset, byte_count);
-    else
-      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
-					   (void FAR *) ptr->mem_buffer[i],
-					   file_offset, byte_count);
-    file_offset += byte_count;
-  }
-}
-
-
-LOCAL(void)
-do_barray_io (j_common_ptr cinfo, jvirt_barray_ptr ptr, boolean writing)
-/* Do backing store read or write of a virtual coefficient-block array */
-{
-  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
-
-  bytesperrow = (long) ptr->blocksperrow * SIZEOF(JBLOCK);
-  file_offset = (long) ptr->cur_start_row * bytesperrow;
-  /* Loop to read or write each allocation chunk in mem_buffer */
-  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
-    /* One chunk, but check for short chunk at end of buffer */
-    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
-    /* Transfer no more than is currently defined */
-    thisrow = (long) ptr->cur_start_row + i;
-    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
-    /* Transfer no more than fits in file */
-    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
-    if (rows <= 0)		/* this chunk might be past end of file! */
-      break;
-    byte_count = rows * bytesperrow;
-    if (writing)
-      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
-					    (void FAR *) ptr->mem_buffer[i],
-					    file_offset, byte_count);
-    else
-      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
-					   (void FAR *) ptr->mem_buffer[i],
-					   file_offset, byte_count);
-    file_offset += byte_count;
-  }
-}
-
-
-METHODDEF(JSAMPARRAY)
-access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr,
-		    JDIMENSION start_row, JDIMENSION num_rows,
-		    boolean writable)
-/* Access the part of a virtual sample array starting at start_row */
-/* and extending for num_rows rows.  writable is true if  */
-/* caller intends to modify the accessed area. */
-{
-  JDIMENSION end_row = start_row + num_rows;
-  JDIMENSION undef_row;
-
-  /* debugging check */
-  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
-      ptr->mem_buffer == NULL)
-    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-
-  /* Make the desired part of the virtual array accessible */
-  if (start_row < ptr->cur_start_row ||
-      end_row > ptr->cur_start_row + ptr->rows_in_mem) {
-    if (! ptr->b_s_open)
-      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
-    /* Flush old buffer contents if necessary */
-    if (ptr->dirty) {
-      do_sarray_io(cinfo, ptr, TRUE);
-      ptr->dirty = FALSE;
-    }
-    /* Decide what part of virtual array to access.
-     * Algorithm: if target address > current window, assume forward scan,
-     * load starting at target address.  If target address < current window,
-     * assume backward scan, load so that target area is top of window.
-     * Note that when switching from forward write to forward read, will have
-     * start_row = 0, so the limiting case applies and we load from 0 anyway.
-     */
-    if (start_row > ptr->cur_start_row) {
-      ptr->cur_start_row = start_row;
-    } else {
-      /* use long arithmetic here to avoid overflow & unsigned problems */
-      long ltemp;
-
-      ltemp = (long) end_row - (long) ptr->rows_in_mem;
-      if (ltemp < 0)
-	ltemp = 0;		/* don't fall off front end of file */
-      ptr->cur_start_row = (JDIMENSION) ltemp;
-    }
-    /* Read in the selected part of the array.
-     * During the initial write pass, we will do no actual read
-     * because the selected part is all undefined.
-     */
-    do_sarray_io(cinfo, ptr, FALSE);
-  }
-  /* Ensure the accessed part of the array is defined; prezero if needed.
-   * To improve locality of access, we only prezero the part of the array
-   * that the caller is about to access, not the entire in-memory array.
-   */
-  if (ptr->first_undef_row < end_row) {
-    if (ptr->first_undef_row < start_row) {
-      if (writable)		/* writer skipped over a section of array */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-      undef_row = start_row;	/* but reader is allowed to read ahead */
-    } else {
-      undef_row = ptr->first_undef_row;
-    }
-    if (writable)
-      ptr->first_undef_row = end_row;
-    if (ptr->pre_zero) {
-      size_t bytesperrow = (size_t) ptr->samplesperrow * SIZEOF(JSAMPLE);
-      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
-      end_row -= ptr->cur_start_row;
-      while (undef_row < end_row) {
-	FMEMZERO((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
-	undef_row++;
-      }
-    } else {
-      if (! writable)		/* reader looking at undefined data */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-    }
-  }
-  /* Flag the buffer dirty if caller will write in it */
-  if (writable)
-    ptr->dirty = TRUE;
-  /* Return address of proper part of the buffer */
-  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
-}
-
-
-METHODDEF(JBLOCKARRAY)
-access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr,
-		    JDIMENSION start_row, JDIMENSION num_rows,
-		    boolean writable)
-/* Access the part of a virtual block array starting at start_row */
-/* and extending for num_rows rows.  writable is true if  */
-/* caller intends to modify the accessed area. */
-{
-  JDIMENSION end_row = start_row + num_rows;
-  JDIMENSION undef_row;
-
-  /* debugging check */
-  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
-      ptr->mem_buffer == NULL)
-    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-
-  /* Make the desired part of the virtual array accessible */
-  if (start_row < ptr->cur_start_row ||
-      end_row > ptr->cur_start_row + ptr->rows_in_mem) {
-    if (! ptr->b_s_open)
-      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
-    /* Flush old buffer contents if necessary */
-    if (ptr->dirty) {
-      do_barray_io(cinfo, ptr, TRUE);
-      ptr->dirty = FALSE;
-    }
-    /* Decide what part of virtual array to access.
-     * Algorithm: if target address > current window, assume forward scan,
-     * load starting at target address.  If target address < current window,
-     * assume backward scan, load so that target area is top of window.
-     * Note that when switching from forward write to forward read, will have
-     * start_row = 0, so the limiting case applies and we load from 0 anyway.
-     */
-    if (start_row > ptr->cur_start_row) {
-      ptr->cur_start_row = start_row;
-    } else {
-      /* use long arithmetic here to avoid overflow & unsigned problems */
-      long ltemp;
-
-      ltemp = (long) end_row - (long) ptr->rows_in_mem;
-      if (ltemp < 0)
-	ltemp = 0;		/* don't fall off front end of file */
-      ptr->cur_start_row = (JDIMENSION) ltemp;
-    }
-    /* Read in the selected part of the array.
-     * During the initial write pass, we will do no actual read
-     * because the selected part is all undefined.
-     */
-    do_barray_io(cinfo, ptr, FALSE);
-  }
-  /* Ensure the accessed part of the array is defined; prezero if needed.
-   * To improve locality of access, we only prezero the part of the array
-   * that the caller is about to access, not the entire in-memory array.
-   */
-  if (ptr->first_undef_row < end_row) {
-    if (ptr->first_undef_row < start_row) {
-      if (writable)		/* writer skipped over a section of array */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-      undef_row = start_row;	/* but reader is allowed to read ahead */
-    } else {
-      undef_row = ptr->first_undef_row;
-    }
-    if (writable)
-      ptr->first_undef_row = end_row;
-    if (ptr->pre_zero) {
-      size_t bytesperrow = (size_t) ptr->blocksperrow * SIZEOF(JBLOCK);
-      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
-      end_row -= ptr->cur_start_row;
-      while (undef_row < end_row) {
-	FMEMZERO((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
-	undef_row++;
-      }
-    } else {
-      if (! writable)		/* reader looking at undefined data */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-    }
-  }
-  /* Flag the buffer dirty if caller will write in it */
-  if (writable)
-    ptr->dirty = TRUE;
-  /* Return address of proper part of the buffer */
-  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
-}
-
-
-/*
- * Release all objects belonging to a specified pool.
- */
-
-METHODDEF(void)
-free_pool (j_common_ptr cinfo, int pool_id)
-{
-  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  small_pool_ptr shdr_ptr;
-  large_pool_ptr lhdr_ptr;
-  size_t space_freed;
-
-  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
-
-#ifdef MEM_STATS
-  if (cinfo->err->trace_level > 1)
-    print_mem_stats(cinfo, pool_id); /* print pool's memory usage statistics */
-#endif
-
-  /* If freeing IMAGE pool, close any virtual arrays first */
-  if (pool_id == JPOOL_IMAGE) {
-    jvirt_sarray_ptr sptr;
-    jvirt_barray_ptr bptr;
-
-    for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
-      if (sptr->b_s_open) {	/* there may be no backing store */
-	sptr->b_s_open = FALSE;	/* prevent recursive close if error */
-	(*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info);
-      }
-    }
-    mem->virt_sarray_list = NULL;
-    for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
-      if (bptr->b_s_open) {	/* there may be no backing store */
-	bptr->b_s_open = FALSE;	/* prevent recursive close if error */
-	(*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info);
-      }
-    }
-    mem->virt_barray_list = NULL;
-  }
-
-  /* Release large objects */
-  lhdr_ptr = mem->large_list[pool_id];
-  mem->large_list[pool_id] = NULL;
-
-  while (lhdr_ptr != NULL) {
-    large_pool_ptr next_lhdr_ptr = lhdr_ptr->hdr.next;
-    space_freed = lhdr_ptr->hdr.bytes_used +
-		  lhdr_ptr->hdr.bytes_left +
-		  SIZEOF(large_pool_hdr);
-    jpeg_free_large(cinfo, (void FAR *) lhdr_ptr, space_freed);
-    mem->total_space_allocated -= space_freed;
-    lhdr_ptr = next_lhdr_ptr;
-  }
-
-  /* Release small objects */
-  shdr_ptr = mem->small_list[pool_id];
-  mem->small_list[pool_id] = NULL;
-
-  while (shdr_ptr != NULL) {
-    small_pool_ptr next_shdr_ptr = shdr_ptr->hdr.next;
-    space_freed = shdr_ptr->hdr.bytes_used +
-		  shdr_ptr->hdr.bytes_left +
-		  SIZEOF(small_pool_hdr);
-    jpeg_free_small(cinfo, (void *) shdr_ptr, space_freed);
-    mem->total_space_allocated -= space_freed;
-    shdr_ptr = next_shdr_ptr;
-  }
-}
-
-
-/*
- * Close up shop entirely.
- * Note that this cannot be called unless cinfo->mem is non-NULL.
- */
-
-METHODDEF(void)
-self_destruct (j_common_ptr cinfo)
-{
-  int pool;
-
-  /* Close all backing store, release all memory.
-   * Releasing pools in reverse order might help avoid fragmentation
-   * with some (brain-damaged) malloc libraries.
-   */
-  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
-    free_pool(cinfo, pool);
-  }
-
-  /* Release the memory manager control block too. */
-  jpeg_free_small(cinfo, (void *) cinfo->mem, SIZEOF(my_memory_mgr));
-  cinfo->mem = NULL;		/* ensures I will be called only once */
-
-  jpeg_mem_term(cinfo);		/* system-dependent cleanup */
-}
-
-
-/*
- * Memory manager initialization.
- * When this is called, only the error manager pointer is valid in cinfo!
- */
-
-GLOBAL(void)
-jinit_memory_mgr (j_common_ptr cinfo)
-{
-  my_mem_ptr mem;
-  long max_to_use;
-  int pool;
-  size_t test_mac;
-
-  cinfo->mem = NULL;		/* for safety if init fails */
-
-  /* Check for configuration errors.
-   * SIZEOF(ALIGN_TYPE) should be a power of 2; otherwise, it probably
-   * doesn't reflect any real hardware alignment requirement.
-   * The test is a little tricky: for X>0, X and X-1 have no one-bits
-   * in common if and only if X is a power of 2, ie has only one one-bit.
-   * Some compilers may give an "unreachable code" warning here; ignore it.
-   */
-  if ((SIZEOF(ALIGN_TYPE) & (SIZEOF(ALIGN_TYPE)-1)) != 0)
-    ERREXIT(cinfo, JERR_BAD_ALIGN_TYPE);
-  /* MAX_ALLOC_CHUNK must be representable as type size_t, and must be
-   * a multiple of SIZEOF(ALIGN_TYPE).
-   * Again, an "unreachable code" warning may be ignored here.
-   * But a "constant too large" warning means you need to fix MAX_ALLOC_CHUNK.
-   */
-  test_mac = (size_t) MAX_ALLOC_CHUNK;
-  if ((long) test_mac != MAX_ALLOC_CHUNK ||
-      (MAX_ALLOC_CHUNK % SIZEOF(ALIGN_TYPE)) != 0)
-    ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK);
-
-  max_to_use = jpeg_mem_init(cinfo); /* system-dependent initialization */
-
-  /* Attempt to allocate memory manager's control block */
-  mem = (my_mem_ptr) jpeg_get_small(cinfo, SIZEOF(my_memory_mgr));
-
-  if (mem == NULL) {
-    jpeg_mem_term(cinfo);	/* system-dependent cleanup */
-    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 0);
-  }
-
-  /* OK, fill in the method pointers */
-  mem->pub.alloc_small = alloc_small;
-  mem->pub.alloc_large = alloc_large;
-  mem->pub.alloc_sarray = alloc_sarray;
-  mem->pub.alloc_barray = alloc_barray;
-  mem->pub.request_virt_sarray = request_virt_sarray;
-  mem->pub.request_virt_barray = request_virt_barray;
-  mem->pub.realize_virt_arrays = realize_virt_arrays;
-  mem->pub.access_virt_sarray = access_virt_sarray;
-  mem->pub.access_virt_barray = access_virt_barray;
-  mem->pub.free_pool = free_pool;
-  mem->pub.self_destruct = self_destruct;
-
-  /* Make MAX_ALLOC_CHUNK accessible to other modules */
-  mem->pub.max_alloc_chunk = MAX_ALLOC_CHUNK;
-
-  /* Initialize working state */
-  mem->pub.max_memory_to_use = max_to_use;
-
-  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
-    mem->small_list[pool] = NULL;
-    mem->large_list[pool] = NULL;
-  }
-  mem->virt_sarray_list = NULL;
-  mem->virt_barray_list = NULL;
-
-  mem->total_space_allocated = SIZEOF(my_memory_mgr);
-
-  /* Declare ourselves open for business */
-  cinfo->mem = &mem->pub;
-
-  /* Check for an environment variable JPEGMEM; if found, override the
-   * default max_memory setting from jpeg_mem_init.  Note that the
-   * surrounding application may again override this value.
-   * If your system doesn't support getenv(), define NO_GETENV to disable
-   * this feature.
-   */
-#ifndef NO_GETENV
-  { char * memenv;
-
-    if ((memenv = getenv("JPEGMEM")) != NULL) {
-      char ch = 'x';
-
-      if (sscanf(memenv, "%ld%c", &max_to_use, &ch) > 0) {
-	if (ch == 'm' || ch == 'M')
-	  max_to_use *= 1000L;
-	mem->pub.max_memory_to_use = max_to_use * 1000L;
-      }
-    }
-  }
-#endif
-
-}
diff --git a/cmake/externals/libjpeg/jmemnobs.c b/cmake/externals/libjpeg/jmemnobs.c
deleted file mode 100644
index a364cd822..000000000
--- a/cmake/externals/libjpeg/jmemnobs.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * jmemnobs.c
- *
- * Copyright (C) 1992-1996, Thomas G. Lane.
- * Modified 2019 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file provides a really simple implementation of the system-
- * dependent portion of the JPEG memory manager.  This implementation
- * assumes that no backing-store files are needed: all required space
- * can be obtained from malloc().
- * This is very portable in the sense that it'll compile on almost anything,
- * but you'd better have lots of main memory (or virtual memory) if you want
- * to process big images.
- * Note that the max_memory_to_use option is respected by this implementation.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jmemsys.h"		/* import the system-dependent declarations */
-
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
-extern void * malloc JPP((size_t size));
-extern void free JPP((void *ptr));
-#endif
-
-
-/*
- * Memory allocation and freeing are controlled by the regular library
- * routines malloc() and free().
- */
-
-GLOBAL(void *)
-jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
-{
-  return (void *) malloc(sizeofobject);
-}
-
-GLOBAL(void)
-jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
-{
-  free(object);
-}
-
-
-/*
- * "Large" objects are treated the same as "small" ones.
- * NB: although we include FAR keywords in the routine declarations,
- * this file won't actually work in 80x86 small/medium model; at least,
- * you probably won't be able to process useful-size images in only 64KB.
- */
-
-GLOBAL(void FAR *)
-jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
-{
-  return (void FAR *) malloc(sizeofobject);
-}
-
-GLOBAL(void)
-jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
-{
-  free(object);
-}
-
-
-/*
- * This routine computes the total memory space available for allocation.
- */
-
-GLOBAL(long)
-jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
-		    long max_bytes_needed, long already_allocated)
-{
-  if (cinfo->mem->max_memory_to_use)
-    return cinfo->mem->max_memory_to_use - already_allocated;
-
-  /* Here we say, "we got all you want bud!" */
-  return max_bytes_needed;
-}
-
-
-/*
- * Backing store (temporary file) management.
- * Since jpeg_mem_available always promised the moon,
- * this should never be called and we can just error out.
- */
-
-GLOBAL(void)
-jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-			 long total_bytes_needed)
-{
-  ERREXIT(cinfo, JERR_NO_BACKING_STORE);
-}
-
-
-/*
- * These routines take care of any system-dependent initialization and
- * cleanup required.  Here, there isn't any.
- */
-
-GLOBAL(long)
-jpeg_mem_init (j_common_ptr cinfo)
-{
-  return 0;			/* just set max_memory_to_use to 0 */
-}
-
-GLOBAL(void)
-jpeg_mem_term (j_common_ptr cinfo)
-{
-  /* no work */
-}
diff --git a/cmake/externals/libjpeg/jmemsys.h b/cmake/externals/libjpeg/jmemsys.h
deleted file mode 100644
index 6c3c6d348..000000000
--- a/cmake/externals/libjpeg/jmemsys.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * jmemsys.h
- *
- * Copyright (C) 1992-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This include file defines the interface between the system-independent
- * and system-dependent portions of the JPEG memory manager.  No other
- * modules need include it.  (The system-independent portion is jmemmgr.c;
- * there are several different versions of the system-dependent portion.)
- *
- * This file works as-is for the system-dependent memory managers supplied
- * in the IJG distribution.  You may need to modify it if you write a
- * custom memory manager.  If system-dependent changes are needed in
- * this file, the best method is to #ifdef them based on a configuration
- * symbol supplied in jconfig.h, as we have done with USE_MSDOS_MEMMGR
- * and USE_MAC_MEMMGR.
- */
-
-
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_get_small		jGetSmall
-#define jpeg_free_small		jFreeSmall
-#define jpeg_get_large		jGetLarge
-#define jpeg_free_large		jFreeLarge
-#define jpeg_mem_available	jMemAvail
-#define jpeg_open_backing_store	jOpenBackStore
-#define jpeg_mem_init		jMemInit
-#define jpeg_mem_term		jMemTerm
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-
-/*
- * These two functions are used to allocate and release small chunks of
- * memory.  (Typically the total amount requested through jpeg_get_small is
- * no more than 20K or so; this will be requested in chunks of a few K each.)
- * Behavior should be the same as for the standard library functions malloc
- * and free; in particular, jpeg_get_small must return NULL on failure.
- * On most systems, these ARE malloc and free.  jpeg_free_small is passed the
- * size of the object being freed, just in case it's needed.
- * On an 80x86 machine using small-data memory model, these manage near heap.
- */
-
-EXTERN(void *) jpeg_get_small JPP((j_common_ptr cinfo, size_t sizeofobject));
-EXTERN(void) jpeg_free_small JPP((j_common_ptr cinfo, void * object,
-				  size_t sizeofobject));
-
-/*
- * These two functions are used to allocate and release large chunks of
- * memory (up to the total free space designated by jpeg_mem_available).
- * The interface is the same as above, except that on an 80x86 machine,
- * far pointers are used.  On most other machines these are identical to
- * the jpeg_get/free_small routines; but we keep them separate anyway,
- * in case a different allocation strategy is desirable for large chunks.
- */
-
-EXTERN(void FAR *) jpeg_get_large JPP((j_common_ptr cinfo,
-				       size_t sizeofobject));
-EXTERN(void) jpeg_free_large JPP((j_common_ptr cinfo, void FAR * object,
-				  size_t sizeofobject));
-
-/*
- * The macro MAX_ALLOC_CHUNK designates the maximum number of bytes that may
- * be requested in a single call to jpeg_get_large (and jpeg_get_small for that
- * matter, but that case should never come into play).  This macro is needed
- * to model the 64Kb-segment-size limit of far addressing on 80x86 machines.
- * On those machines, we expect that jconfig.h will provide a proper value.
- * On machines with 32-bit flat address spaces, any large constant may be used.
- *
- * NB: jmemmgr.c expects that MAX_ALLOC_CHUNK will be representable as type
- * size_t and will be a multiple of sizeof(align_type).
- */
-
-#ifndef MAX_ALLOC_CHUNK		/* may be overridden in jconfig.h */
-#define MAX_ALLOC_CHUNK  1000000000L
-#endif
-
-/*
- * This routine computes the total space still available for allocation by
- * jpeg_get_large.  If more space than this is needed, backing store will be
- * used.  NOTE: any memory already allocated must not be counted.
- *
- * There is a minimum space requirement, corresponding to the minimum
- * feasible buffer sizes; jmemmgr.c will request that much space even if
- * jpeg_mem_available returns zero.  The maximum space needed, enough to hold
- * all working storage in memory, is also passed in case it is useful.
- * Finally, the total space already allocated is passed.  If no better
- * method is available, cinfo->mem->max_memory_to_use - already_allocated
- * is often a suitable calculation.
- *
- * It is OK for jpeg_mem_available to underestimate the space available
- * (that'll just lead to more backing-store access than is really necessary).
- * However, an overestimate will lead to failure.  Hence it's wise to subtract
- * a slop factor from the true available space.  5% should be enough.
- *
- * On machines with lots of virtual memory, any large constant may be returned.
- * Conversely, zero may be returned to always use the minimum amount of memory.
- */
-
-EXTERN(long) jpeg_mem_available JPP((j_common_ptr cinfo,
-				     long min_bytes_needed,
-				     long max_bytes_needed,
-				     long already_allocated));
-
-
-/*
- * This structure holds whatever state is needed to access a single
- * backing-store object.  The read/write/close method pointers are called
- * by jmemmgr.c to manipulate the backing-store object; all other fields
- * are private to the system-dependent backing store routines.
- */
-
-#define TEMP_NAME_LENGTH   64	/* max length of a temporary file's name */
-
-
-#ifdef USE_MSDOS_MEMMGR		/* DOS-specific junk */
-
-typedef unsigned short XMSH;	/* type of extended-memory handles */
-typedef unsigned short EMSH;	/* type of expanded-memory handles */
-
-typedef union {
-  short file_handle;		/* DOS file handle if it's a temp file */
-  XMSH xms_handle;		/* handle if it's a chunk of XMS */
-  EMSH ems_handle;		/* handle if it's a chunk of EMS */
-} handle_union;
-
-#endif /* USE_MSDOS_MEMMGR */
-
-#ifdef USE_MAC_MEMMGR		/* Mac-specific junk */
-#include <Files.h>
-#endif /* USE_MAC_MEMMGR */
-
-
-typedef struct backing_store_struct * backing_store_ptr;
-
-typedef struct backing_store_struct {
-  /* Methods for reading/writing/closing this backing-store object */
-  JMETHOD(void, read_backing_store, (j_common_ptr cinfo,
-				     backing_store_ptr info,
-				     void FAR * buffer_address,
-				     long file_offset, long byte_count));
-  JMETHOD(void, write_backing_store, (j_common_ptr cinfo,
-				      backing_store_ptr info,
-				      void FAR * buffer_address,
-				      long file_offset, long byte_count));
-  JMETHOD(void, close_backing_store, (j_common_ptr cinfo,
-				      backing_store_ptr info));
-
-  /* Private fields for system-dependent backing-store management */
-#ifdef USE_MSDOS_MEMMGR
-  /* For the MS-DOS manager (jmemdos.c), we need: */
-  handle_union handle;		/* reference to backing-store storage object */
-  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
-#else
-#ifdef USE_MAC_MEMMGR
-  /* For the Mac manager (jmemmac.c), we need: */
-  short temp_file;		/* file reference number to temp file */
-  FSSpec tempSpec;		/* the FSSpec for the temp file */
-  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
-#else
-  /* For a typical implementation with temp files, we need: */
-  FILE * temp_file;		/* stdio reference to temp file */
-  char temp_name[TEMP_NAME_LENGTH]; /* name of temp file */
-#endif
-#endif
-} backing_store_info;
-
-
-/*
- * Initial opening of a backing-store object.  This must fill in the
- * read/write/close pointers in the object.  The read/write routines
- * may take an error exit if the specified maximum file size is exceeded.
- * (If jpeg_mem_available always returns a large value, this routine can
- * just take an error exit.)
- */
-
-EXTERN(void) jpeg_open_backing_store JPP((j_common_ptr cinfo,
-					  backing_store_ptr info,
-					  long total_bytes_needed));
-
-
-/*
- * These routines take care of any system-dependent initialization and
- * cleanup required.  jpeg_mem_init will be called before anything is
- * allocated (and, therefore, nothing in cinfo is of use except the error
- * manager pointer).  It should return a suitable default value for
- * max_memory_to_use; this may subsequently be overridden by the surrounding
- * application.  (Note that max_memory_to_use is only important if
- * jpeg_mem_available chooses to consult it ... no one else will.)
- * jpeg_mem_term may assume that all requested memory has been freed and that
- * all opened backing-store objects have been closed.
- */
-
-EXTERN(long) jpeg_mem_init JPP((j_common_ptr cinfo));
-EXTERN(void) jpeg_mem_term JPP((j_common_ptr cinfo));
diff --git a/cmake/externals/libjpeg/jmorecfg.h b/cmake/externals/libjpeg/jmorecfg.h
deleted file mode 100644
index 4638d6af2..000000000
--- a/cmake/externals/libjpeg/jmorecfg.h
+++ /dev/null
@@ -1,457 +0,0 @@
-/*
- * jmorecfg.h
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 1997-2022 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains additional configuration options that customize the
- * JPEG software for special applications or support machine-dependent
- * optimizations.  Most users will not need to touch this file.
- */
-
-
-/*
- * Define BITS_IN_JSAMPLE as either
- *   8   for 8-bit sample values (the usual setting)
- *   9   for 9-bit sample values
- *   10  for 10-bit sample values
- *   11  for 11-bit sample values
- *   12  for 12-bit sample values
- * Only 8, 9, 10, 11, and 12 bits sample data precision are supported for
- * full-feature DCT processing.  Further depths up to 16-bit may be added
- * later for the lossless modes of operation.
- * Run-time selection and conversion of data precision will be added later
- * and are currently not supported, sorry.
- * Exception:  The transcoding part (jpegtran) supports all settings in a
- * single instance, since it operates on the level of DCT coefficients and
- * not sample values.  The DCT coefficients are of the same type (16 bits)
- * in all cases (see below).
- */
-
-#define BITS_IN_JSAMPLE  8	/* use 8, 9, 10, 11, or 12 */
-
-
-/*
- * Maximum number of components (color channels) allowed in JPEG image.
- * To meet the letter of the JPEG spec, set this to 255.  However, darn
- * few applications need more than 4 channels (maybe 5 for CMYK + alpha
- * mask).  We recommend 10 as a reasonable compromise; use 4 if you are
- * really short on memory.  (Each allowed component costs a hundred or so
- * bytes of storage, whether actually used in an image or not.)
- */
-
-#define MAX_COMPONENTS  10	/* maximum number of image components */
-
-
-/*
- * Basic data types.
- * You may need to change these if you have a machine with unusual data
- * type sizes; for example, "char" not 8 bits, "short" not 16 bits,
- * or "long" not 32 bits.  We don't care whether "int" is 16 or 32 bits,
- * but it had better be at least 16.
- */
-
-/* Representation of a single sample (pixel element value).
- * We frequently allocate large arrays of these, so it's important to keep
- * them small.  But if you have memory to burn and access to char or short
- * arrays is very slow on your hardware, you might want to change these.
- */
-
-#if BITS_IN_JSAMPLE == 8
-/* JSAMPLE should be the smallest type that will hold the values 0..255.
- * You can use a signed char by having GETJSAMPLE mask it with 0xFF.
- */
-
-#ifdef HAVE_UNSIGNED_CHAR
-
-typedef unsigned char JSAMPLE;
-#define GETJSAMPLE(value)  ((int) (value))
-
-#else /* not HAVE_UNSIGNED_CHAR */
-
-typedef char JSAMPLE;
-#ifdef CHAR_IS_UNSIGNED
-#define GETJSAMPLE(value)  ((int) (value))
-#else
-#define GETJSAMPLE(value)  ((int) (value) & 0xFF)
-#endif /* CHAR_IS_UNSIGNED */
-
-#endif /* HAVE_UNSIGNED_CHAR */
-
-#define MAXJSAMPLE	255
-#define CENTERJSAMPLE	128
-
-#endif /* BITS_IN_JSAMPLE == 8 */
-
-
-#if BITS_IN_JSAMPLE == 9
-/* JSAMPLE should be the smallest type that will hold the values 0..511.
- * On nearly all machines "short" will do nicely.
- */
-
-typedef short JSAMPLE;
-#define GETJSAMPLE(value)  ((int) (value))
-
-#define MAXJSAMPLE	511
-#define CENTERJSAMPLE	256
-
-#endif /* BITS_IN_JSAMPLE == 9 */
-
-
-#if BITS_IN_JSAMPLE == 10
-/* JSAMPLE should be the smallest type that will hold the values 0..1023.
- * On nearly all machines "short" will do nicely.
- */
-
-typedef short JSAMPLE;
-#define GETJSAMPLE(value)  ((int) (value))
-
-#define MAXJSAMPLE	1023
-#define CENTERJSAMPLE	512
-
-#endif /* BITS_IN_JSAMPLE == 10 */
-
-
-#if BITS_IN_JSAMPLE == 11
-/* JSAMPLE should be the smallest type that will hold the values 0..2047.
- * On nearly all machines "short" will do nicely.
- */
-
-typedef short JSAMPLE;
-#define GETJSAMPLE(value)  ((int) (value))
-
-#define MAXJSAMPLE	2047
-#define CENTERJSAMPLE	1024
-
-#endif /* BITS_IN_JSAMPLE == 11 */
-
-
-#if BITS_IN_JSAMPLE == 12
-/* JSAMPLE should be the smallest type that will hold the values 0..4095.
- * On nearly all machines "short" will do nicely.
- */
-
-typedef short JSAMPLE;
-#define GETJSAMPLE(value)  ((int) (value))
-
-#define MAXJSAMPLE	4095
-#define CENTERJSAMPLE	2048
-
-#endif /* BITS_IN_JSAMPLE == 12 */
-
-
-/* Representation of a DCT frequency coefficient.
- * This should be a signed value of at least 16 bits; "short" is usually OK.
- * Again, we allocate large arrays of these, but you can change to int
- * if you have memory to burn and "short" is really slow.
- */
-
-typedef short JCOEF;
-
-
-/* Compressed datastreams are represented as arrays of JOCTET.
- * These must be EXACTLY 8 bits wide, at least once they are written to
- * external storage.  Note that when using the stdio data source/destination
- * managers, this is also the data type passed to fread/fwrite.
- */
-
-#ifdef HAVE_UNSIGNED_CHAR
-
-typedef unsigned char JOCTET;
-#define GETJOCTET(value)  (value)
-
-#else /* not HAVE_UNSIGNED_CHAR */
-
-typedef char JOCTET;
-#ifdef CHAR_IS_UNSIGNED
-#define GETJOCTET(value)  (value)
-#else
-#define GETJOCTET(value)  ((value) & 0xFF)
-#endif /* CHAR_IS_UNSIGNED */
-
-#endif /* HAVE_UNSIGNED_CHAR */
-
-
-/* These typedefs are used for various table entries and so forth.
- * They must be at least as wide as specified; but making them too big
- * won't cost a huge amount of memory, so we don't provide special
- * extraction code like we did for JSAMPLE.  (In other words, these
- * typedefs live at a different point on the speed/space tradeoff curve.)
- */
-
-/* UINT8 must hold at least the values 0..255. */
-
-#ifdef HAVE_UNSIGNED_CHAR
-typedef unsigned char UINT8;
-#else /* not HAVE_UNSIGNED_CHAR */
-#ifdef CHAR_IS_UNSIGNED
-typedef char UINT8;
-#else /* not CHAR_IS_UNSIGNED */
-typedef short UINT8;
-#endif /* CHAR_IS_UNSIGNED */
-#endif /* HAVE_UNSIGNED_CHAR */
-
-/* UINT16 must hold at least the values 0..65535. */
-
-#ifdef HAVE_UNSIGNED_SHORT
-typedef unsigned short UINT16;
-#else /* not HAVE_UNSIGNED_SHORT */
-typedef unsigned int UINT16;
-#endif /* HAVE_UNSIGNED_SHORT */
-
-/* INT16 must hold at least the values -32768..32767. */
-
-#ifndef XMD_H			/* X11/xmd.h correctly defines INT16 */
-typedef short INT16;
-#endif
-
-/* INT32 must hold at least signed 32-bit values. */
-
-#ifndef XMD_H			/* X11/xmd.h correctly defines INT32 */
-#ifndef _BASETSD_H_		/* Microsoft defines it in basetsd.h */
-#ifndef _BASETSD_H		/* MinGW is slightly different */
-#ifndef QGLOBAL_H		/* Qt defines it in qglobal.h */
-typedef long INT32;
-#endif
-#endif
-#endif
-#endif
-
-/* Datatype used for image dimensions.  The JPEG standard only supports
- * images up to 64K*64K due to 16-bit fields in SOF markers.  Therefore
- * "unsigned int" is sufficient on all machines.  However, if you need to
- * handle larger images and you don't mind deviating from the spec, you
- * can change this datatype.
- */
-
-typedef unsigned int JDIMENSION;
-
-#define JPEG_MAX_DIMENSION  65500L  /* a tad under 64K to prevent overflows */
-
-
-/* These macros are used in all function definitions and extern declarations.
- * You could modify them if you need to change function linkage conventions;
- * in particular, you'll need to do that to make the library a Windows DLL.
- * Another application is to make all functions global for use with debuggers
- * or code profilers that require it.
- */
-
-/* a function called through method pointers: */
-#define METHODDEF(type)		static type
-/* a function used only in its module: */
-#define LOCAL(type)		static type
-/* a function referenced thru EXTERNs: */
-#define GLOBAL(type)		type
-/* a reference to a GLOBAL function: */
-#define EXTERN(type)		extern type
-
-
-/* This macro is used to declare a "method", that is, a function pointer.
- * We want to supply prototype parameters if the compiler can cope.
- * Note that the arglist parameter must be parenthesized!
- * Again, you can customize this if you need special linkage keywords.
- */
-
-#ifdef HAVE_PROTOTYPES
-#define JMETHOD(type,methodname,arglist)  type (*methodname) arglist
-#else
-#define JMETHOD(type,methodname,arglist)  type (*methodname) ()
-#endif
-
-
-/* The noreturn type identifier is used to declare functions
- * which cannot return.
- * Compilers can thus create more optimized code and perform
- * better checks for warnings and errors.
- * Static analyzer tools can make improved inferences about
- * execution paths and are prevented from giving false alerts.
- *
- * Unfortunately, the proposed specifications of corresponding
- * extensions in the Dec 2011 ISO C standard revision (C11),
- * GCC, MSVC, etc. are not viable.
- * Thus we introduce a user defined type to declare noreturn
- * functions at least for clarity.  A proper compiler would
- * have a suitable noreturn type to match in place of void.
- */
-
-#ifndef HAVE_NORETURN_T
-typedef void noreturn_t;
-#endif
-
-
-/* Here is the pseudo-keyword for declaring pointers that must be "far"
- * on 80x86 machines.  Most of the specialized coding for 80x86 is handled
- * by just saying "FAR *" where such a pointer is needed.  In a few places
- * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol.
- */
-
-#ifndef FAR
-#ifdef NEED_FAR_POINTERS
-#define FAR  far
-#else
-#define FAR
-#endif
-#endif
-
-
-/*
- * On a few systems, type boolean and/or its values FALSE, TRUE may appear
- * in standard header files.  Or you may have conflicts with application-
- * specific header files that you want to include together with these files.
- * Defining HAVE_BOOLEAN before including jpeglib.h should make it work.
- */
-
-#ifndef HAVE_BOOLEAN
-#if defined FALSE || defined TRUE || defined QGLOBAL_H
-/* Qt3 defines FALSE and TRUE as "const" variables in qglobal.h */
-typedef int boolean;
-#ifndef FALSE			/* in case these macros already exist */
-#define FALSE	0		/* values of boolean */
-#endif
-#ifndef TRUE
-#define TRUE	1
-#endif
-#else
-typedef enum { FALSE = 0, TRUE = 1 } boolean;
-#endif
-#endif
-
-
-/*
- * The remaining options affect code selection within the JPEG library,
- * but they don't need to be visible to most applications using the library.
- * To minimize application namespace pollution, the symbols won't be
- * defined unless JPEG_INTERNALS or JPEG_INTERNAL_OPTIONS has been defined.
- */
-
-#ifdef JPEG_INTERNALS
-#define JPEG_INTERNAL_OPTIONS
-#endif
-
-#ifdef JPEG_INTERNAL_OPTIONS
-
-
-/*
- * These defines indicate whether to include various optional functions.
- * Undefining some of these symbols will produce a smaller but less capable
- * library.  Note that you can leave certain source files out of the
- * compilation/linking process if you've #undef'd the corresponding symbols.
- * (You may HAVE to do that if your compiler doesn't like null source files.)
- */
-
-/* Capability options common to encoder and decoder: */
-
-#define DCT_ISLOW_SUPPORTED	/* slow but accurate integer algorithm */
-#define DCT_IFAST_SUPPORTED	/* faster, less accurate integer method */
-#define DCT_FLOAT_SUPPORTED	/* floating-point: accurate, fast on fast HW */
-
-/* Encoder capability options: */
-
-#define C_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
-#define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
-#define C_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN) */
-#define DCT_SCALING_SUPPORTED	/* Input rescaling via DCT? (Requires DCT_ISLOW) */
-#define ENTROPY_OPT_SUPPORTED	    /* Optimization of entropy coding parms? */
-/* Note: if you selected more than 8-bit data precision, it is dangerous to
- * turn off ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only
- * good for 8-bit precision, so arithmetic coding is recommended for higher
- * precision.  The Huffman encoder normally uses entropy optimization to
- * compute usable tables for higher precision.  Otherwise, you'll have to
- * supply different default Huffman tables.
- * The exact same statements apply for progressive JPEG: the default tables
- * don't work for progressive mode.  (This may get fixed, however.)
- */
-#define INPUT_SMOOTHING_SUPPORTED   /* Input image smoothing option? */
-
-/* Decoder capability options: */
-
-#define D_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
-#define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
-#define D_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN) */
-#define IDCT_SCALING_SUPPORTED	/* Output rescaling via IDCT? (Requires DCT_ISLOW) */
-#define SAVE_MARKERS_SUPPORTED	    /* jpeg_save_markers() needed? */
-#define BLOCK_SMOOTHING_SUPPORTED   /* Block smoothing? (Progressive only) */
-#undef  UPSAMPLE_SCALING_SUPPORTED  /* Output rescaling at upsample stage? */
-#define UPSAMPLE_MERGING_SUPPORTED  /* Fast path for sloppy upsampling? */
-#define QUANT_1PASS_SUPPORTED	    /* 1-pass color quantization? */
-#define QUANT_2PASS_SUPPORTED	    /* 2-pass color quantization? */
-
-/* more capability options later, no doubt */
-
-
-/*
- * Ordering of RGB data in scanlines passed to or from the application.
- * If your application wants to deal with data in the order B,G,R, just
- * #define JPEG_USE_RGB_CUSTOM in jconfig.h, or define your own custom
- * order in jconfig.h and #define JPEG_HAVE_RGB_CUSTOM.
- * You can also deal with formats such as R,G,B,X (one extra byte per pixel)
- * by changing RGB_PIXELSIZE.
- * Note that changing the offsets will also change
- * the order in which colormap data is organized.
- * RESTRICTIONS:
- * 1. The sample applications cjpeg,djpeg do NOT support modified RGB formats.
- * 2. The color quantizer modules will not behave desirably if RGB_PIXELSIZE
- *    is not 3 (they don't understand about dummy color components!).
- *    So you can't use color quantization if you change that value.
- */
-
-#ifndef JPEG_HAVE_RGB_CUSTOM
-#ifdef JPEG_USE_RGB_CUSTOM
-#define RGB_RED		2	/* Offset of Red in an RGB scanline element */
-#define RGB_GREEN	1	/* Offset of Green */
-#define RGB_BLUE	0	/* Offset of Blue */
-#else
-#define RGB_RED		0	/* Offset of Red in an RGB scanline element */
-#define RGB_GREEN	1	/* Offset of Green */
-#define RGB_BLUE	2	/* Offset of Blue */
-#endif
-#define RGB_PIXELSIZE	3	/* JSAMPLEs per RGB scanline element */
-#endif
-
-
-/* Definitions for speed-related optimizations. */
-
-
-/* If your compiler supports inline functions, define INLINE
- * as the inline keyword; otherwise define it as empty.
- */
-
-#ifndef INLINE
-#ifdef __GNUC__			/* for instance, GNU C knows about inline */
-#define INLINE __inline__
-#endif
-#ifndef INLINE
-#define INLINE			/* default is to define it as empty */
-#endif
-#endif
-
-
-/* On some machines (notably 68000 series) "int" is 32 bits, but multiplying
- * two 16-bit shorts is faster than multiplying two ints.  Define MULTIPLIER
- * as short on such a machine.  MULTIPLIER must be at least 16 bits wide.
- */
-
-#ifndef MULTIPLIER
-#define MULTIPLIER  int		/* type for fastest integer multiply */
-#endif
-
-
-/* FAST_FLOAT should be either float or double, whichever is done faster
- * by your compiler.  (Note that this type is only used in the floating point
- * DCT routines, so it only matters if you've defined DCT_FLOAT_SUPPORTED.)
- * Typically, float is faster in ANSI C compilers, while double is faster in
- * pre-ANSI compilers (because they insist on converting to double anyway).
- * The code below therefore chooses float if we have ANSI-style prototypes.
- */
-
-#ifndef FAST_FLOAT
-#ifdef HAVE_PROTOTYPES
-#define FAST_FLOAT  float
-#else
-#define FAST_FLOAT  double
-#endif
-#endif
-
-#endif /* JPEG_INTERNAL_OPTIONS */
diff --git a/cmake/externals/libjpeg/jpegint.h b/cmake/externals/libjpeg/jpegint.h
deleted file mode 100644
index 3528bff5b..000000000
--- a/cmake/externals/libjpeg/jpegint.h
+++ /dev/null
@@ -1,445 +0,0 @@
-/*
- * jpegint.h
- *
- * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 1997-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file provides common declarations for the various JPEG modules.
- * These declarations are considered internal to the JPEG library; most
- * applications using the library shouldn't need to include this file.
- */
-
-
-/* Declarations for both compression & decompression */
-
-typedef enum {			/* Operating modes for buffer controllers */
-	JBUF_PASS_THRU,		/* Plain stripwise operation */
-	/* Remaining modes require a full-image buffer to have been created */
-	JBUF_SAVE_SOURCE,	/* Run source subobject only, save output */
-	JBUF_CRANK_DEST,	/* Run dest subobject only, using saved data */
-	JBUF_SAVE_AND_PASS	/* Run both subobjects, save output */
-} J_BUF_MODE;
-
-/* Values of global_state field (jdapi.c has some dependencies on ordering!) */
-#define CSTATE_START	100	/* after create_compress */
-#define CSTATE_SCANNING	101	/* start_compress done, write_scanlines OK */
-#define CSTATE_RAW_OK	102	/* start_compress done, write_raw_data OK */
-#define CSTATE_WRCOEFS	103	/* jpeg_write_coefficients done */
-#define DSTATE_START	200	/* after create_decompress */
-#define DSTATE_INHEADER	201	/* reading header markers, no SOS yet */
-#define DSTATE_READY	202	/* found SOS, ready for start_decompress */
-#define DSTATE_PRELOAD	203	/* reading multiscan file in start_decompress*/
-#define DSTATE_PRESCAN	204	/* performing dummy pass for 2-pass quant */
-#define DSTATE_SCANNING	205	/* start_decompress done, read_scanlines OK */
-#define DSTATE_RAW_OK	206	/* start_decompress done, read_raw_data OK */
-#define DSTATE_BUFIMAGE	207	/* expecting jpeg_start_output */
-#define DSTATE_BUFPOST	208	/* looking for SOS/EOI in jpeg_finish_output */
-#define DSTATE_RDCOEFS	209	/* reading file in jpeg_read_coefficients */
-#define DSTATE_STOPPING	210	/* looking for EOI in jpeg_finish_decompress */
-
-
-/* Declarations for compression modules */
-
-/* Master control module */
-struct jpeg_comp_master {
-  JMETHOD(void, prepare_for_pass, (j_compress_ptr cinfo));
-  JMETHOD(void, pass_startup, (j_compress_ptr cinfo));
-  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
-
-  /* State variables made visible to other modules */
-  boolean call_pass_startup;	/* True if pass_startup must be called */
-  boolean is_last_pass;		/* True during last pass */
-};
-
-/* Main buffer control (downsampled-data buffer) */
-struct jpeg_c_main_controller {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(void, process_data, (j_compress_ptr cinfo,
-			       JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-			       JDIMENSION in_rows_avail));
-};
-
-/* Compression preprocessing (downsampling input buffer control) */
-struct jpeg_c_prep_controller {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(void, pre_process_data, (j_compress_ptr cinfo,
-				   JSAMPARRAY input_buf,
-				   JDIMENSION *in_row_ctr,
-				   JDIMENSION in_rows_avail,
-				   JSAMPIMAGE output_buf,
-				   JDIMENSION *out_row_group_ctr,
-				   JDIMENSION out_row_groups_avail));
-};
-
-/* Coefficient buffer control */
-struct jpeg_c_coef_controller {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(boolean, compress_data, (j_compress_ptr cinfo,
-				   JSAMPIMAGE input_buf));
-};
-
-/* Colorspace conversion */
-struct jpeg_color_converter {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
-  JMETHOD(void, color_convert, (j_compress_ptr cinfo,
-				JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-				JDIMENSION output_row, int num_rows));
-};
-
-/* Downsampling */
-struct jpeg_downsampler {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
-  JMETHOD(void, downsample, (j_compress_ptr cinfo,
-			     JSAMPIMAGE input_buf, JDIMENSION in_row_index,
-			     JSAMPIMAGE output_buf,
-			     JDIMENSION out_row_group_index));
-
-  boolean need_context_rows;	/* TRUE if need rows above & below */
-};
-
-/* Forward DCT (also controls coefficient quantization) */
-typedef JMETHOD(void, forward_DCT_ptr,
-		(j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
-		 JDIMENSION start_col, JDIMENSION num_blocks));
-
-struct jpeg_forward_dct {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
-  /* It is useful to allow each component to have a separate FDCT method. */
-  forward_DCT_ptr forward_DCT[MAX_COMPONENTS];
-};
-
-/* Entropy encoding */
-struct jpeg_entropy_encoder {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo, boolean gather_statistics));
-  JMETHOD(boolean, encode_mcu, (j_compress_ptr cinfo, JBLOCKARRAY MCU_data));
-  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
-};
-
-/* Marker writing */
-struct jpeg_marker_writer {
-  JMETHOD(void, write_file_header, (j_compress_ptr cinfo));
-  JMETHOD(void, write_frame_header, (j_compress_ptr cinfo));
-  JMETHOD(void, write_scan_header, (j_compress_ptr cinfo));
-  JMETHOD(void, write_file_trailer, (j_compress_ptr cinfo));
-  JMETHOD(void, write_tables_only, (j_compress_ptr cinfo));
-  /* These routines are exported to allow insertion of extra markers */
-  /* Probably only COM and APPn markers should be written this way */
-  JMETHOD(void, write_marker_header, (j_compress_ptr cinfo, int marker,
-				      unsigned int datalen));
-  JMETHOD(void, write_marker_byte, (j_compress_ptr cinfo, int val));
-};
-
-
-/* Declarations for decompression modules */
-
-/* Master control module */
-struct jpeg_decomp_master {
-  JMETHOD(void, prepare_for_output_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, finish_output_pass, (j_decompress_ptr cinfo));
-
-  /* State variables made visible to other modules */
-  boolean is_dummy_pass;	/* True during 1st pass for 2-pass quant */
-};
-
-/* Input control module */
-struct jpeg_input_controller {
-  JMETHOD(int, consume_input, (j_decompress_ptr cinfo));
-  JMETHOD(void, reset_input_controller, (j_decompress_ptr cinfo));
-  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, finish_input_pass, (j_decompress_ptr cinfo));
-
-  /* State variables made visible to other modules */
-  boolean has_multiple_scans;	/* True if file has multiple scans */
-  boolean eoi_reached;		/* True when EOI has been consumed */
-};
-
-/* Main buffer control (downsampled-data buffer) */
-struct jpeg_d_main_controller {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(void, process_data, (j_decompress_ptr cinfo,
-			       JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-			       JDIMENSION out_rows_avail));
-};
-
-/* Coefficient buffer control */
-struct jpeg_d_coef_controller {
-  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
-  JMETHOD(int, consume_data, (j_decompress_ptr cinfo));
-  JMETHOD(void, start_output_pass, (j_decompress_ptr cinfo));
-  JMETHOD(int, decompress_data, (j_decompress_ptr cinfo,
-				 JSAMPIMAGE output_buf));
-  /* Pointer to array of coefficient virtual arrays, or NULL if none */
-  jvirt_barray_ptr *coef_arrays;
-};
-
-/* Decompression postprocessing (color quantization buffer control) */
-struct jpeg_d_post_controller {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(void, post_process_data, (j_decompress_ptr cinfo,
-				    JSAMPIMAGE input_buf,
-				    JDIMENSION *in_row_group_ctr,
-				    JDIMENSION in_row_groups_avail,
-				    JSAMPARRAY output_buf,
-				    JDIMENSION *out_row_ctr,
-				    JDIMENSION out_rows_avail));
-};
-
-/* Marker reading & parsing */
-struct jpeg_marker_reader {
-  JMETHOD(void, reset_marker_reader, (j_decompress_ptr cinfo));
-  /* Read markers until SOS or EOI.
-   * Returns same codes as are defined for jpeg_consume_input:
-   * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
-   */
-  JMETHOD(int, read_markers, (j_decompress_ptr cinfo));
-  /* Read a restart marker --- exported for use by entropy decoder only */
-  jpeg_marker_parser_method read_restart_marker;
-
-  /* State of marker reader --- nominally internal, but applications
-   * supplying COM or APPn handlers might like to know the state.
-   */
-  boolean saw_SOI;		/* found SOI? */
-  boolean saw_SOF;		/* found SOF? */
-  int next_restart_num;		/* next restart number expected (0-7) */
-  unsigned int discarded_bytes;	/* # of bytes skipped looking for a marker */
-};
-
-/* Entropy decoding */
-struct jpeg_entropy_decoder {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
-  JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo, JBLOCKARRAY MCU_data));
-  JMETHOD(void, finish_pass, (j_decompress_ptr cinfo));
-};
-
-/* Inverse DCT (also performs dequantization) */
-typedef JMETHOD(void, inverse_DCT_method_ptr,
-		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col));
-
-struct jpeg_inverse_dct {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
-  /* It is useful to allow each component to have a separate IDCT method. */
-  inverse_DCT_method_ptr inverse_DCT[MAX_COMPONENTS];
-};
-
-/* Upsampling (note that upsampler must also call color converter) */
-struct jpeg_upsampler {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, upsample, (j_decompress_ptr cinfo,
-			   JSAMPIMAGE input_buf,
-			   JDIMENSION *in_row_group_ctr,
-			   JDIMENSION in_row_groups_avail,
-			   JSAMPARRAY output_buf,
-			   JDIMENSION *out_row_ctr,
-			   JDIMENSION out_rows_avail));
-
-  boolean need_context_rows;	/* TRUE if need rows above & below */
-};
-
-/* Colorspace conversion */
-struct jpeg_color_deconverter {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, color_convert, (j_decompress_ptr cinfo,
-				JSAMPIMAGE input_buf, JDIMENSION input_row,
-				JSAMPARRAY output_buf, int num_rows));
-};
-
-/* Color quantization or color precision reduction */
-struct jpeg_color_quantizer {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, boolean is_pre_scan));
-  JMETHOD(void, color_quantize, (j_decompress_ptr cinfo,
-				 JSAMPARRAY input_buf, JSAMPARRAY output_buf,
-				 int num_rows));
-  JMETHOD(void, finish_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, new_color_map, (j_decompress_ptr cinfo));
-};
-
-
-/* Definition of range extension bits for decompression processes.
- * See the comments with prepare_range_limit_table (in jdmaster.c)
- * for more info.
- * The recommended default value for normal applications is 2.
- * Applications with special requirements may use a different value.
- * For example, Ghostscript wants to use 3 for proper handling of
- * wacky images with oversize coefficient values.
- */
-
-#define RANGE_BITS	2
-#define RANGE_CENTER	(CENTERJSAMPLE << RANGE_BITS)
-
-
-/* Miscellaneous useful macros */
-
-#undef MAX
-#define MAX(a,b)	((a) > (b) ? (a) : (b))
-#undef MIN
-#define MIN(a,b)	((a) < (b) ? (a) : (b))
-
-
-/* We assume that right shift corresponds to signed division by 2 with
- * rounding towards minus infinity.  This is correct for typical "arithmetic
- * shift" instructions that shift in copies of the sign bit.  But some
- * C compilers implement >> with an unsigned shift.  For these machines you
- * must define RIGHT_SHIFT_IS_UNSIGNED.
- * RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity.
- * It is only applied with constant shift counts.  SHIFT_TEMPS must be
- * included in the variables of any routine using RIGHT_SHIFT.
- */
-
-#ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define SHIFT_TEMPS	INT32 shift_temp;
-#define RIGHT_SHIFT(x,shft)  \
-	((shift_temp = (x)) < 0 ? \
-	 (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \
-	 (shift_temp >> (shft)))
-#else
-#define SHIFT_TEMPS
-#define RIGHT_SHIFT(x,shft)	((x) >> (shft))
-#endif
-
-/* Descale and correctly round an INT32 value that's scaled by N bits.
- * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
- * the fudge factor is correct for either sign of X.
- */
-
-#define DESCALE(x,n)	RIGHT_SHIFT((x) + ((INT32) 1 << ((n)-1)), n)
-
-
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jinit_compress_master	jICompress
-#define jinit_c_master_control	jICMaster
-#define jinit_c_main_controller	jICMainC
-#define jinit_c_prep_controller	jICPrepC
-#define jinit_c_coef_controller	jICCoefC
-#define jinit_color_converter	jICColor
-#define jinit_downsampler	jIDownsampler
-#define jinit_forward_dct	jIFDCT
-#define jinit_huff_encoder	jIHEncoder
-#define jinit_arith_encoder	jIAEncoder
-#define jinit_marker_writer	jIMWriter
-#define jinit_master_decompress	jIDMaster
-#define jinit_d_main_controller	jIDMainC
-#define jinit_d_coef_controller	jIDCoefC
-#define jinit_d_post_controller	jIDPostC
-#define jinit_input_controller	jIInCtlr
-#define jinit_marker_reader	jIMReader
-#define jinit_huff_decoder	jIHDecoder
-#define jinit_arith_decoder	jIADecoder
-#define jinit_inverse_dct	jIIDCT
-#define jinit_upsampler		jIUpsampler
-#define jinit_color_deconverter	jIDColor
-#define jinit_1pass_quantizer	jI1Quant
-#define jinit_2pass_quantizer	jI2Quant
-#define jinit_merged_upsampler	jIMUpsampler
-#define jinit_memory_mgr	jIMemMgr
-#define jdiv_round_up		jDivRound
-#define jround_up		jRound
-#define jzero_far		jZeroFar
-#define jcopy_sample_rows	jCopySamples
-#define jcopy_block_row		jCopyBlocks
-#define jpeg_zigzag_order	jZIGTable
-#define jpeg_natural_order	jZAGTable
-#define jpeg_natural_order7	jZAG7Table
-#define jpeg_natural_order6	jZAG6Table
-#define jpeg_natural_order5	jZAG5Table
-#define jpeg_natural_order4	jZAG4Table
-#define jpeg_natural_order3	jZAG3Table
-#define jpeg_natural_order2	jZAG2Table
-#define jpeg_aritab		jAriTab
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-
-/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays
- * and coefficient-block arrays.  This won't work on 80x86 because the arrays
- * are FAR and we're assuming a small-pointer memory model.  However, some
- * DOS compilers provide far-pointer versions of memcpy() and memset() even
- * in the small-model libraries.  These will be used if USE_FMEM is defined.
- * Otherwise, the routines in jutils.c do it the hard way.
- */
-
-#ifndef NEED_FAR_POINTERS	/* normal case, same as regular macro */
-#define FMEMZERO(target,size)	MEMZERO(target,size)
-#else				/* 80x86 case */
-#ifdef USE_FMEM
-#define FMEMZERO(target,size)	_fmemset((void FAR *)(target), 0, (size_t)(size))
-#else
-EXTERN(void) jzero_far JPP((void FAR * target, size_t bytestozero));
-#define FMEMZERO(target,size)	jzero_far(target, size)
-#endif
-#endif
-
-
-/* Compression module initialization routines */
-EXTERN(void) jinit_compress_master JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_c_master_control JPP((j_compress_ptr cinfo,
-					 boolean transcode_only));
-EXTERN(void) jinit_c_main_controller JPP((j_compress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_c_prep_controller JPP((j_compress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_c_coef_controller JPP((j_compress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_color_converter JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_downsampler JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_huff_encoder JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_arith_encoder JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_marker_writer JPP((j_compress_ptr cinfo));
-/* Decompression module initialization routines */
-EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_d_main_controller JPP((j_decompress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_d_coef_controller JPP((j_decompress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_d_post_controller JPP((j_decompress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_input_controller JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_arith_decoder JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_color_deconverter JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_1pass_quantizer JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_2pass_quantizer JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_merged_upsampler JPP((j_decompress_ptr cinfo));
-/* Memory manager initialization */
-EXTERN(void) jinit_memory_mgr JPP((j_common_ptr cinfo));
-
-/* Utility routines in jutils.c */
-EXTERN(long) jdiv_round_up JPP((long a, long b));
-EXTERN(long) jround_up JPP((long a, long b));
-EXTERN(void) jcopy_sample_rows JPP((JSAMPARRAY input_array,
-				    JSAMPARRAY output_array,
-				    int num_rows, JDIMENSION num_cols));
-EXTERN(void) jcopy_block_row JPP((JBLOCKROW input_row, JBLOCKROW output_row,
-				  JDIMENSION num_blocks));
-/* Constant tables in jutils.c */
-#if 0				/* This table is not actually needed in v6a */
-extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */
-#endif
-extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */
-extern const int jpeg_natural_order7[]; /* zz to natural order for 7x7 block */
-extern const int jpeg_natural_order6[]; /* zz to natural order for 6x6 block */
-extern const int jpeg_natural_order5[]; /* zz to natural order for 5x5 block */
-extern const int jpeg_natural_order4[]; /* zz to natural order for 4x4 block */
-extern const int jpeg_natural_order3[]; /* zz to natural order for 3x3 block */
-extern const int jpeg_natural_order2[]; /* zz to natural order for 2x2 block */
-
-/* Arithmetic coding probability estimation tables in jaricom.c */
-extern const INT32 jpeg_aritab[];
-
-/* Suppress undefined-structure complaints if necessary. */
-
-#ifdef INCOMPLETE_TYPES_BROKEN
-#ifndef AM_MEMORY_MANAGER	/* only jmemmgr.c defines these */
-struct jvirt_sarray_control { long dummy; };
-struct jvirt_barray_control { long dummy; };
-#endif
-#endif /* INCOMPLETE_TYPES_BROKEN */
diff --git a/cmake/externals/libjpeg/jpeglib.h b/cmake/externals/libjpeg/jpeglib.h
deleted file mode 100644
index e7e15ab2c..000000000
--- a/cmake/externals/libjpeg/jpeglib.h
+++ /dev/null
@@ -1,1183 +0,0 @@
-/*
- * jpeglib.h
- *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2002-2022 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file defines the application interface for the JPEG library.
- * Most applications using the library need only include this file,
- * and perhaps jerror.h if they want to know the exact error codes.
- */
-
-#ifndef JPEGLIB_H
-#define JPEGLIB_H
-
-/*
- * First we include the configuration files that record how this
- * installation of the JPEG library is set up.  jconfig.h can be
- * generated automatically for many systems.  jmorecfg.h contains
- * manual configuration options that most people need not worry about.
- */
-
-#ifndef JCONFIG_INCLUDED	/* in case jinclude.h already did */
-#include "jconfig.h"		/* widely used configuration options */
-#endif
-#include "jmorecfg.h"		/* seldom changed options */
-
-
-#ifdef __cplusplus
-#ifndef DONT_USE_EXTERN_C
-extern "C" {
-#endif
-#endif
-
-/* Version IDs for the JPEG library.
- * Might be useful for tests like "#if JPEG_LIB_VERSION >= 90".
- */
-
-#define JPEG_LIB_VERSION        90	/* Compatibility version 9.0 */
-#define JPEG_LIB_VERSION_MAJOR  9
-#define JPEG_LIB_VERSION_MINOR  6
-
-
-/* Various constants determining the sizes of things.
- * All of these are specified by the JPEG standard,
- * so don't change them if you want to be compatible.
- */
-
-#define DCTSIZE		    8	/* The basic DCT block is 8x8 coefficients */
-#define DCTSIZE2	    64	/* DCTSIZE squared; # of elements in a block */
-#define NUM_QUANT_TBLS      4	/* Quantization tables are numbered 0..3 */
-#define NUM_HUFF_TBLS       4	/* Huffman tables are numbered 0..3 */
-#define NUM_ARITH_TBLS      16	/* Arith-coding tables are numbered 0..15 */
-#define MAX_COMPS_IN_SCAN   4	/* JPEG limit on # of components in one scan */
-#define MAX_SAMP_FACTOR     4	/* JPEG limit on sampling factors */
-/* Unfortunately, some bozo at Adobe saw no reason to be bound by the standard;
- * the PostScript DCT filter can emit files with many more than 10 blocks/MCU.
- * If you happen to run across such a file, you can up D_MAX_BLOCKS_IN_MCU
- * to handle it.  We even let you do this from the jconfig.h file.  However,
- * we strongly discourage changing C_MAX_BLOCKS_IN_MCU; just because Adobe
- * sometimes emits noncompliant files doesn't mean you should too.
- */
-#define C_MAX_BLOCKS_IN_MCU   10 /* compressor's limit on blocks per MCU */
-#ifndef D_MAX_BLOCKS_IN_MCU
-#define D_MAX_BLOCKS_IN_MCU   10 /* decompressor's limit on blocks per MCU */
-#endif
-
-
-/* Data structures for images (arrays of samples and of DCT coefficients).
- * On 80x86 machines, the image arrays are too big for near pointers,
- * but the pointer arrays can fit in near memory.
- */
-
-typedef JSAMPLE FAR *JSAMPROW;	/* ptr to one image row of pixel samples. */
-typedef JSAMPROW *JSAMPARRAY;	/* ptr to some rows (a 2-D sample array) */
-typedef JSAMPARRAY *JSAMPIMAGE;	/* a 3-D sample array: top index is color */
-
-typedef JCOEF JBLOCK[DCTSIZE2];	/* one block of coefficients */
-typedef JBLOCK FAR *JBLOCKROW;	/* pointer to one row of coefficient blocks */
-typedef JBLOCKROW *JBLOCKARRAY;		/* a 2-D array of coefficient blocks */
-typedef JBLOCKARRAY *JBLOCKIMAGE;	/* a 3-D array of coefficient blocks */
-
-typedef JCOEF FAR *JCOEFPTR;	/* useful in a couple of places */
-
-
-/* Types for JPEG compression parameters and working tables. */
-
-
-/* DCT coefficient quantization tables. */
-
-typedef struct {
-  /* This array gives the coefficient quantizers in natural array order
-   * (not the zigzag order in which they are stored in a JPEG DQT marker).
-   * CAUTION: IJG versions prior to v6a kept this array in zigzag order.
-   */
-  UINT16 quantval[DCTSIZE2];	/* quantization step for each coefficient */
-  /* This field is used only during compression.  It's initialized FALSE when
-   * the table is created, and set TRUE when it's been output to the file.
-   * You could suppress output of a table by setting this to TRUE.
-   * (See jpeg_suppress_tables for an example.)
-   */
-  boolean sent_table;		/* TRUE when table has been output */
-} JQUANT_TBL;
-
-
-/* Huffman coding tables. */
-
-typedef struct {
-  /* These two fields directly represent the contents of a JPEG DHT marker */
-  UINT8 bits[17];		/* bits[k] = # of symbols with codes of */
-				/* length k bits; bits[0] is unused */
-  UINT8 huffval[256];		/* The symbols, in order of incr code length */
-  /* This field is used only during compression.  It's initialized FALSE when
-   * the table is created, and set TRUE when it's been output to the file.
-   * You could suppress output of a table by setting this to TRUE.
-   * (See jpeg_suppress_tables for an example.)
-   */
-  boolean sent_table;		/* TRUE when table has been output */
-} JHUFF_TBL;
-
-
-/* Basic info about one component (color channel). */
-
-typedef struct {
-  /* These values are fixed over the whole image. */
-  /* For compression, they must be supplied by parameter setup; */
-  /* for decompression, they are read from the SOF marker. */
-  int component_id;		/* identifier for this component (0..255) */
-  int component_index;		/* its index in SOF or cinfo->comp_info[] */
-  int h_samp_factor;		/* horizontal sampling factor (1..4) */
-  int v_samp_factor;		/* vertical sampling factor (1..4) */
-  int quant_tbl_no;		/* quantization table selector (0..3) */
-  /* These values may vary between scans. */
-  /* For compression, they must be supplied by parameter setup; */
-  /* for decompression, they are read from the SOS marker. */
-  /* The decompressor output side may not use these variables. */
-  int dc_tbl_no;		/* DC entropy table selector (0..3) */
-  int ac_tbl_no;		/* AC entropy table selector (0..3) */
-
-  /* Remaining fields should be treated as private by applications. */
-
-  /* These values are computed during compression or decompression startup: */
-  /* Component's size in DCT blocks.
-   * Any dummy blocks added to complete an MCU are not counted; therefore
-   * these values do not depend on whether a scan is interleaved or not.
-   */
-  JDIMENSION width_in_blocks;
-  JDIMENSION height_in_blocks;
-  /* Size of a DCT block in samples,
-   * reflecting any scaling we choose to apply during the DCT step.
-   * Values from 1 to 16 are supported.
-   * Note that different components may receive different DCT scalings.
-   */
-  int DCT_h_scaled_size;
-  int DCT_v_scaled_size;
-  /* The downsampled dimensions are the component's actual, unpadded number
-   * of samples at the main buffer (preprocessing/compression interface);
-   * DCT scaling is included, so
-   * downsampled_width =
-   *   ceil(image_width * Hi/Hmax * DCT_h_scaled_size/block_size)
-   * and similarly for height.
-   */
-  JDIMENSION downsampled_width;	 /* actual width in samples */
-  JDIMENSION downsampled_height; /* actual height in samples */
-  /* For decompression, in cases where some of the components will be
-   * ignored (eg grayscale output from YCbCr image), we can skip most
-   * computations for the unused components.
-   * For compression, some of the components will need further quantization
-   * scale by factor of 2 after DCT (eg BG_YCC output from normal RGB input).
-   * The field is first set TRUE for decompression, FALSE for compression
-   * in initial_setup, and then adapted in color conversion setup.
-   */
-  boolean component_needed;
-
-  /* These values are computed before starting a scan of the component. */
-  /* The decompressor output side may not use these variables. */
-  int MCU_width;		/* number of blocks per MCU, horizontally */
-  int MCU_height;		/* number of blocks per MCU, vertically */
-  int MCU_blocks;		/* MCU_width * MCU_height */
-  int MCU_sample_width;	/* MCU width in samples: MCU_width * DCT_h_scaled_size */
-  int last_col_width;		/* # of non-dummy blocks across in last MCU */
-  int last_row_height;		/* # of non-dummy blocks down in last MCU */
-
-  /* Saved quantization table for component; NULL if none yet saved.
-   * See jdinput.c comments about the need for this information.
-   * This field is currently used only for decompression.
-   */
-  JQUANT_TBL * quant_table;
-
-  /* Private per-component storage for DCT or IDCT subsystem. */
-  void * dct_table;
-} jpeg_component_info;
-
-
-/* The script for encoding a multiple-scan file is an array of these: */
-
-typedef struct {
-  int comps_in_scan;		/* number of components encoded in this scan */
-  int component_index[MAX_COMPS_IN_SCAN]; /* their SOF/comp_info[] indexes */
-  int Ss, Se;			/* progressive JPEG spectral selection parms */
-  int Ah, Al;			/* progressive JPEG successive approx. parms */
-} jpeg_scan_info;
-
-/* The decompressor can save APPn and COM markers in a list of these: */
-
-typedef struct jpeg_marker_struct FAR * jpeg_saved_marker_ptr;
-
-struct jpeg_marker_struct {
-  jpeg_saved_marker_ptr next;	/* next in list, or NULL */
-  UINT8 marker;			/* marker code: JPEG_COM, or JPEG_APP0+n */
-  unsigned int original_length;	/* # bytes of data in the file */
-  unsigned int data_length;	/* # bytes of data saved at data[] */
-  JOCTET FAR * data;		/* the data contained in the marker */
-  /* the marker length word is not counted in data_length or original_length */
-};
-
-/* Known color spaces. */
-
-typedef enum {
-	JCS_UNKNOWN,		/* error/unspecified */
-	JCS_GRAYSCALE,		/* monochrome */
-	JCS_RGB,		/* red/green/blue, standard RGB (sRGB) */
-	JCS_YCbCr,		/* Y/Cb/Cr (also known as YUV), standard YCC */
-	JCS_CMYK,		/* C/M/Y/K */
-	JCS_YCCK,		/* Y/Cb/Cr/K */
-	JCS_BG_RGB,		/* big gamut red/green/blue, bg-sRGB */
-	JCS_BG_YCC		/* big gamut Y/Cb/Cr, bg-sYCC */
-} J_COLOR_SPACE;
-
-/* Supported color transforms. */
-
-typedef enum {
-	JCT_NONE           = 0,
-	JCT_SUBTRACT_GREEN = 1
-} J_COLOR_TRANSFORM;
-
-/* DCT/IDCT algorithm options. */
-
-typedef enum {
-	JDCT_ISLOW,		/* slow but accurate integer algorithm */
-	JDCT_IFAST,		/* faster, less accurate integer method */
-	JDCT_FLOAT		/* floating-point: accurate, fast on fast HW */
-} J_DCT_METHOD;
-
-#ifndef JDCT_DEFAULT		/* may be overridden in jconfig.h */
-#define JDCT_DEFAULT  JDCT_ISLOW
-#endif
-#ifndef JDCT_FASTEST		/* may be overridden in jconfig.h */
-#define JDCT_FASTEST  JDCT_IFAST
-#endif
-
-/* Dithering options for decompression. */
-
-typedef enum {
-	JDITHER_NONE,		/* no dithering */
-	JDITHER_ORDERED,	/* simple ordered dither */
-	JDITHER_FS		/* Floyd-Steinberg error diffusion dither */
-} J_DITHER_MODE;
-
-
-/* Common fields between JPEG compression and decompression master structs. */
-
-#define jpeg_common_fields \
-  struct jpeg_error_mgr * err;	/* Error handler module */\
-  struct jpeg_memory_mgr * mem;	/* Memory manager module */\
-  struct jpeg_progress_mgr * progress; /* Progress monitor, or NULL if none */\
-  void * client_data;		/* Available for use by application */\
-  boolean is_decompressor;	/* So common code can tell which is which */\
-  int global_state		/* For checking call sequence validity */
-
-/* Routines that are to be used by both halves of the library are declared
- * to receive a pointer to this structure.  There are no actual instances of
- * jpeg_common_struct, only of jpeg_compress_struct and jpeg_decompress_struct.
- */
-struct jpeg_common_struct {
-  jpeg_common_fields;		/* Fields common to both master struct types */
-  /* Additional fields follow in an actual jpeg_compress_struct or
-   * jpeg_decompress_struct.  All three structs must agree on these
-   * initial fields!  (This would be a lot cleaner in C++.)
-   */
-};
-
-typedef struct jpeg_common_struct * j_common_ptr;
-typedef struct jpeg_compress_struct * j_compress_ptr;
-typedef struct jpeg_decompress_struct * j_decompress_ptr;
-
-
-/* Master record for a compression instance */
-
-struct jpeg_compress_struct {
-  jpeg_common_fields;		/* Fields shared with jpeg_decompress_struct */
-
-  /* Destination for compressed data */
-  struct jpeg_destination_mgr * dest;
-
-  /* Description of source image --- these fields must be filled in by
-   * outer application before starting compression.  in_color_space must
-   * be correct before you can even call jpeg_set_defaults().
-   */
-
-  JDIMENSION image_width;	/* input image width */
-  JDIMENSION image_height;	/* input image height */
-  int input_components;		/* # of color components in input image */
-  J_COLOR_SPACE in_color_space;	/* colorspace of input image */
-
-  double input_gamma;		/* image gamma of input image */
-
-  /* Compression parameters --- these fields must be set before calling
-   * jpeg_start_compress().  We recommend calling jpeg_set_defaults() to
-   * initialize everything to reasonable defaults, then changing anything
-   * the application specifically wants to change.  That way you won't get
-   * burnt when new parameters are added.  Also note that there are several
-   * helper routines to simplify changing parameters.
-   */
-
-  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
-
-  JDIMENSION jpeg_width;	/* scaled JPEG image width */
-  JDIMENSION jpeg_height;	/* scaled JPEG image height */
-  /* Dimensions of actual JPEG image that will be written to file,
-   * derived from input dimensions by scaling factors above.
-   * These fields are computed by jpeg_start_compress().
-   * You can also use jpeg_calc_jpeg_dimensions() to determine these values
-   * in advance of calling jpeg_start_compress().
-   */
-
-  int data_precision;		/* bits of precision in image data */
-
-  int num_components;		/* # of color components in JPEG image */
-  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
-
-  jpeg_component_info * comp_info;
-  /* comp_info[i] describes component that appears i'th in SOF */
-
-  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
-  int q_scale_factor[NUM_QUANT_TBLS];
-  /* ptrs to coefficient quantization tables, or NULL if not defined,
-   * and corresponding scale factors (percentage, initialized 100).
-   */
-
-  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
-  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
-  /* ptrs to Huffman coding tables, or NULL if not defined */
-
-  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
-  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
-  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
-
-  int num_scans;		/* # of entries in scan_info array */
-  const jpeg_scan_info * scan_info; /* script for multi-scan file, or NULL */
-  /* The default value of scan_info is NULL, which causes a single-scan
-   * sequential JPEG file to be emitted.  To create a multi-scan file,
-   * set num_scans and scan_info to point to an array of scan definitions.
-   */
-
-  boolean raw_data_in;		/* TRUE=caller supplies downsampled data */
-  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
-  boolean optimize_coding;	/* TRUE=optimize entropy encoding parms */
-  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
-  boolean do_fancy_downsampling; /* TRUE=apply fancy downsampling */
-  int smoothing_factor;		/* 1..100, or 0 for no input smoothing */
-  J_DCT_METHOD dct_method;	/* DCT algorithm selector */
-
-  /* The restart interval can be specified in absolute MCUs by setting
-   * restart_interval, or in MCU rows by setting restart_in_rows
-   * (in which case the correct restart_interval will be figured
-   * for each scan).
-   */
-  unsigned int restart_interval; /* MCUs per restart, or 0 for no restart */
-  int restart_in_rows;		/* if > 0, MCU rows per restart interval */
-
-  /* Parameters controlling emission of special markers. */
-
-  boolean write_JFIF_header;	/* should a JFIF marker be written? */
-  UINT8 JFIF_major_version;	/* What to write for the JFIF version number */
-  UINT8 JFIF_minor_version;
-  /* These three values are not used by the JPEG code, merely copied */
-  /* into the JFIF APP0 marker.  density_unit can be 0 for unknown, */
-  /* 1 for dots/inch, or 2 for dots/cm.  Note that the pixel aspect */
-  /* ratio is defined by X_density/Y_density even when density_unit=0. */
-  UINT8 density_unit;		/* JFIF code for pixel size units */
-  UINT16 X_density;		/* Horizontal pixel density */
-  UINT16 Y_density;		/* Vertical pixel density */
-  boolean write_Adobe_marker;	/* should an Adobe marker be written? */
-
-  J_COLOR_TRANSFORM color_transform;
-  /* Color transform identifier, writes LSE marker if nonzero */
-
-  /* State variable: index of next scanline to be written to
-   * jpeg_write_scanlines().  Application may use this to control its
-   * processing loop, e.g., "while (next_scanline < image_height)".
-   */
-
-  JDIMENSION next_scanline;	/* 0 .. image_height-1  */
-
-  /* Remaining fields are known throughout compressor, but generally
-   * should not be touched by a surrounding application.
-   */
-
-  /*
-   * These fields are computed during compression startup
-   */
-  boolean progressive_mode;	/* TRUE if scan script uses progressive mode */
-  int max_h_samp_factor;	/* largest h_samp_factor */
-  int max_v_samp_factor;	/* largest v_samp_factor */
-
-  int min_DCT_h_scaled_size;	/* smallest DCT_h_scaled_size of any component */
-  int min_DCT_v_scaled_size;	/* smallest DCT_v_scaled_size of any component */
-
-  JDIMENSION total_iMCU_rows;	/* # of iMCU rows to be input to coef ctlr */
-  /* The coefficient controller receives data in units of MCU rows as defined
-   * for fully interleaved scans (whether the JPEG file is interleaved or not).
-   * There are v_samp_factor * DCT_v_scaled_size sample rows of each component
-   * in an "iMCU" (interleaved MCU) row.
-   */
-
-  /*
-   * These fields are valid during any one scan.
-   * They describe the components and MCUs actually appearing in the scan.
-   */
-  int comps_in_scan;		/* # of JPEG components in this scan */
-  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
-  /* *cur_comp_info[i] describes component that appears i'th in SOS */
-
-  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
-  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
-
-  int blocks_in_MCU;		/* # of DCT blocks per MCU */
-  int MCU_membership[C_MAX_BLOCKS_IN_MCU];
-  /* MCU_membership[i] is index in cur_comp_info of component owning */
-  /* i'th block in an MCU */
-
-  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
-
-  int block_size;		/* the basic DCT block size: 1..16 */
-  const int * natural_order;	/* natural-order position array */
-  int lim_Se;			/* min( Se, DCTSIZE2-1 ) */
-
-  /*
-   * Links to compression subobjects (methods and private variables of modules)
-   */
-  struct jpeg_comp_master * master;
-  struct jpeg_c_main_controller * main;
-  struct jpeg_c_prep_controller * prep;
-  struct jpeg_c_coef_controller * coef;
-  struct jpeg_marker_writer * marker;
-  struct jpeg_color_converter * cconvert;
-  struct jpeg_downsampler * downsample;
-  struct jpeg_forward_dct * fdct;
-  struct jpeg_entropy_encoder * entropy;
-  jpeg_scan_info * script_space; /* workspace for jpeg_simple_progression */
-  int script_space_size;
-};
-
-
-/* Master record for a decompression instance */
-
-struct jpeg_decompress_struct {
-  jpeg_common_fields;		/* Fields shared with jpeg_compress_struct */
-
-  /* Source of compressed data */
-  struct jpeg_source_mgr * src;
-
-  /* Basic description of image --- filled in by jpeg_read_header(). */
-  /* Application may inspect these values to decide how to process image. */
-
-  JDIMENSION image_width;	/* nominal image width (from SOF marker) */
-  JDIMENSION image_height;	/* nominal image height */
-  int num_components;		/* # of color components in JPEG image */
-  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
-
-  /* Decompression processing parameters --- these fields must be set before
-   * calling jpeg_start_decompress().  Note that jpeg_read_header() initializes
-   * them to default values.
-   */
-
-  J_COLOR_SPACE out_color_space; /* colorspace for output */
-
-  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
-
-  double output_gamma;		/* image gamma wanted in output */
-
-  boolean buffered_image;	/* TRUE=multiple output passes */
-  boolean raw_data_out;		/* TRUE=downsampled data wanted */
-
-  J_DCT_METHOD dct_method;	/* IDCT algorithm selector */
-  boolean do_fancy_upsampling;	/* TRUE=apply fancy upsampling */
-  boolean do_block_smoothing;	/* TRUE=apply interblock smoothing */
-
-  boolean quantize_colors;	/* TRUE=colormapped output wanted */
-  /* the following are ignored if not quantize_colors: */
-  J_DITHER_MODE dither_mode;	/* type of color dithering to use */
-  boolean two_pass_quantize;	/* TRUE=use two-pass color quantization */
-  int desired_number_of_colors;	/* max # colors to use in created colormap */
-  /* these are significant only in buffered-image mode: */
-  boolean enable_1pass_quant;	/* enable future use of 1-pass quantizer */
-  boolean enable_external_quant;/* enable future use of external colormap */
-  boolean enable_2pass_quant;	/* enable future use of 2-pass quantizer */
-
-  /* Description of actual output image that will be returned to application.
-   * These fields are computed by jpeg_start_decompress().
-   * You can also use jpeg_calc_output_dimensions() to determine these values
-   * in advance of calling jpeg_start_decompress().
-   */
-
-  JDIMENSION output_width;	/* scaled image width */
-  JDIMENSION output_height;	/* scaled image height */
-  int out_color_components;	/* # of color components in out_color_space */
-  int output_components;	/* # of color components returned */
-  /* output_components is 1 (a colormap index) when quantizing colors;
-   * otherwise it equals out_color_components.
-   */
-  int rec_outbuf_height;	/* min recommended height of scanline buffer */
-  /* If the buffer passed to jpeg_read_scanlines() is less than this many rows
-   * high, space and time will be wasted due to unnecessary data copying.
-   * Usually rec_outbuf_height will be 1 or 2, at most 4.
-   */
-
-  /* When quantizing colors, the output colormap is described by these fields.
-   * The application can supply a colormap by setting colormap non-NULL before
-   * calling jpeg_start_decompress; otherwise a colormap is created during
-   * jpeg_start_decompress or jpeg_start_output.
-   * The map has out_color_components rows and actual_number_of_colors columns.
-   */
-  int actual_number_of_colors;	/* number of entries in use */
-  JSAMPARRAY colormap;		/* The color map as a 2-D pixel array */
-
-  /* State variables: these variables indicate the progress of decompression.
-   * The application may examine these but must not modify them.
-   */
-
-  /* Row index of next scanline to be read from jpeg_read_scanlines().
-   * Application may use this to control its processing loop, e.g.,
-   * "while (output_scanline < output_height)".
-   */
-  JDIMENSION output_scanline;	/* 0 .. output_height-1  */
-
-  /* Current input scan number and number of iMCU rows completed in scan.
-   * These indicate the progress of the decompressor input side.
-   */
-  int input_scan_number;	/* Number of SOS markers seen so far */
-  JDIMENSION input_iMCU_row;	/* Number of iMCU rows completed */
-
-  /* The "output scan number" is the notional scan being displayed by the
-   * output side.  The decompressor will not allow output scan/row number
-   * to get ahead of input scan/row, but it can fall arbitrarily far behind.
-   */
-  int output_scan_number;	/* Nominal scan number being displayed */
-  JDIMENSION output_iMCU_row;	/* Number of iMCU rows read */
-
-  /* Current progression status.  coef_bits[c][i] indicates the precision
-   * with which component c's DCT coefficient i (in zigzag order) is known.
-   * It is -1 when no data has yet been received, otherwise it is the point
-   * transform (shift) value for the most recent scan of the coefficient
-   * (thus, 0 at completion of the progression).
-   * This pointer is NULL when reading a non-progressive file.
-   */
-  int (*coef_bits)[DCTSIZE2];	/* -1 or current Al value for each coef */
-
-  /* Internal JPEG parameters --- the application usually need not look at
-   * these fields.  Note that the decompressor output side may not use
-   * any parameters that can change between scans.
-   */
-
-  /* Quantization and Huffman tables are carried forward across input
-   * datastreams when processing abbreviated JPEG datastreams.
-   */
-
-  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
-  /* ptrs to coefficient quantization tables, or NULL if not defined */
-
-  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
-  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
-  /* ptrs to Huffman coding tables, or NULL if not defined */
-
-  /* These parameters are never carried across datastreams, since they
-   * are given in SOF/SOS markers or defined to be reset by SOI.
-   */
-
-  int data_precision;		/* bits of precision in image data */
-
-  jpeg_component_info * comp_info;
-  /* comp_info[i] describes component that appears i'th in SOF */
-
-  boolean is_baseline;		/* TRUE if Baseline SOF0 encountered */
-  boolean progressive_mode;	/* TRUE if SOFn specifies progressive mode */
-  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
-
-  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
-  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
-  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
-
-  unsigned int restart_interval; /* MCUs per restart interval, or 0 for no restart */
-
-  /* These fields record data obtained from optional markers recognized by
-   * the JPEG library.
-   */
-  boolean saw_JFIF_marker;	/* TRUE iff a JFIF APP0 marker was found */
-  /* Data copied from JFIF marker; only valid if saw_JFIF_marker is TRUE: */
-  UINT8 JFIF_major_version;	/* JFIF version number */
-  UINT8 JFIF_minor_version;
-  UINT8 density_unit;		/* JFIF code for pixel size units */
-  UINT16 X_density;		/* Horizontal pixel density */
-  UINT16 Y_density;		/* Vertical pixel density */
-  boolean saw_Adobe_marker;	/* TRUE iff an Adobe APP14 marker was found */
-  UINT8 Adobe_transform;	/* Color transform code from Adobe marker */
-
-  J_COLOR_TRANSFORM color_transform;
-  /* Color transform identifier derived from LSE marker, otherwise zero */
-
-  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
-
-  /* Aside from the specific data retained from APPn markers known to the
-   * library, the uninterpreted contents of any or all APPn and COM markers
-   * can be saved in a list for examination by the application.
-   */
-  jpeg_saved_marker_ptr marker_list; /* Head of list of saved markers */
-
-  /* Remaining fields are known throughout decompressor, but generally
-   * should not be touched by a surrounding application.
-   */
-
-  /*
-   * These fields are computed during decompression startup
-   */
-  int max_h_samp_factor;	/* largest h_samp_factor */
-  int max_v_samp_factor;	/* largest v_samp_factor */
-
-  int min_DCT_h_scaled_size;	/* smallest DCT_h_scaled_size of any component */
-  int min_DCT_v_scaled_size;	/* smallest DCT_v_scaled_size of any component */
-
-  JDIMENSION total_iMCU_rows;	/* # of iMCU rows in image */
-  /* The coefficient controller's input and output progress is measured in
-   * units of "iMCU" (interleaved MCU) rows.  These are the same as MCU rows
-   * in fully interleaved JPEG scans, but are used whether the scan is
-   * interleaved or not.  We define an iMCU row as v_samp_factor DCT block
-   * rows of each component.  Therefore, the IDCT output contains
-   * v_samp_factor * DCT_v_scaled_size sample rows of a component per iMCU row.
-   */
-
-  JSAMPLE * sample_range_limit; /* table for fast range-limiting */
-
-  /*
-   * These fields are valid during any one scan.
-   * They describe the components and MCUs actually appearing in the scan.
-   * Note that the decompressor output side must not use these fields.
-   */
-  int comps_in_scan;		/* # of JPEG components in this scan */
-  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
-  /* *cur_comp_info[i] describes component that appears i'th in SOS */
-
-  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
-  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
-
-  int blocks_in_MCU;		/* # of DCT blocks per MCU */
-  int MCU_membership[D_MAX_BLOCKS_IN_MCU];
-  /* MCU_membership[i] is index in cur_comp_info of component owning */
-  /* i'th block in an MCU */
-
-  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
-
-  /* These fields are derived from Se of first SOS marker.
-   */
-  int block_size;		/* the basic DCT block size: 1..16 */
-  const int * natural_order; /* natural-order position array for entropy decode */
-  int lim_Se;			/* min( Se, DCTSIZE2-1 ) for entropy decode */
-
-  /* This field is shared between entropy decoder and marker parser.
-   * It is either zero or the code of a JPEG marker that has been
-   * read from the data source, but has not yet been processed.
-   */
-  int unread_marker;
-
-  /*
-   * Links to decompression subobjects (methods, private variables of modules)
-   */
-  struct jpeg_decomp_master * master;
-  struct jpeg_d_main_controller * main;
-  struct jpeg_d_coef_controller * coef;
-  struct jpeg_d_post_controller * post;
-  struct jpeg_input_controller * inputctl;
-  struct jpeg_marker_reader * marker;
-  struct jpeg_entropy_decoder * entropy;
-  struct jpeg_inverse_dct * idct;
-  struct jpeg_upsampler * upsample;
-  struct jpeg_color_deconverter * cconvert;
-  struct jpeg_color_quantizer * cquantize;
-};
-
-
-/* "Object" declarations for JPEG modules that may be supplied or called
- * directly by the surrounding application.
- * As with all objects in the JPEG library, these structs only define the
- * publicly visible methods and state variables of a module.  Additional
- * private fields may exist after the public ones.
- */
-
-
-/* Error handler object */
-
-struct jpeg_error_mgr {
-  /* Error exit handler: does not return to caller */
-  JMETHOD(noreturn_t, error_exit, (j_common_ptr cinfo));
-  /* Conditionally emit a trace or warning message */
-  JMETHOD(void, emit_message, (j_common_ptr cinfo, int msg_level));
-  /* Routine that actually outputs a trace or error message */
-  JMETHOD(void, output_message, (j_common_ptr cinfo));
-  /* Format a message string for the most recent JPEG error or message */
-  JMETHOD(void, format_message, (j_common_ptr cinfo, char * buffer));
-#define JMSG_LENGTH_MAX  200	/* recommended size of format_message buffer */
-  /* Reset error state variables at start of a new image */
-  JMETHOD(void, reset_error_mgr, (j_common_ptr cinfo));
-
-  /* The message ID code and any parameters are saved here.
-   * A message can have one string parameter or up to 8 int parameters.
-   */
-  int msg_code;
-#define JMSG_STR_PARM_MAX  80
-  union {
-    int i[8];
-    char s[JMSG_STR_PARM_MAX];
-  } msg_parm;
-
-  /* Standard state variables for error facility */
-
-  int trace_level;		/* max msg_level that will be displayed */
-
-  /* For recoverable corrupt-data errors, we emit a warning message,
-   * but keep going unless emit_message chooses to abort.  emit_message
-   * should count warnings in num_warnings.  The surrounding application
-   * can check for bad data by seeing if num_warnings is nonzero at the
-   * end of processing.
-   */
-  long num_warnings;		/* number of corrupt-data warnings */
-
-  /* These fields point to the table(s) of error message strings.
-   * An application can change the table pointer to switch to a different
-   * message list (typically, to change the language in which errors are
-   * reported).  Some applications may wish to add additional error codes
-   * that will be handled by the JPEG library error mechanism; the second
-   * table pointer is used for this purpose.
-   *
-   * First table includes all errors generated by JPEG library itself.
-   * Error code 0 is reserved for a "no such error string" message.
-   */
-  const char * const * jpeg_message_table; /* Library errors */
-  int last_jpeg_message;    /* Table contains strings 0..last_jpeg_message */
-  /* Second table can be added by application (see cjpeg/djpeg for example).
-   * It contains strings numbered first_addon_message..last_addon_message.
-   */
-  const char * const * addon_message_table; /* Non-library errors */
-  int first_addon_message;	/* code for first string in addon table */
-  int last_addon_message;	/* code for last string in addon table */
-};
-
-
-/* Progress monitor object */
-
-struct jpeg_progress_mgr {
-  JMETHOD(void, progress_monitor, (j_common_ptr cinfo));
-
-  long pass_counter;		/* work units completed in this pass */
-  long pass_limit;		/* total number of work units in this pass */
-  int completed_passes;		/* passes completed so far */
-  int total_passes;		/* total number of passes expected */
-};
-
-
-/* Data destination object for compression */
-
-struct jpeg_destination_mgr {
-  JOCTET * next_output_byte;	/* => next byte to write in buffer */
-  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
-
-  JMETHOD(void, init_destination, (j_compress_ptr cinfo));
-  JMETHOD(boolean, empty_output_buffer, (j_compress_ptr cinfo));
-  JMETHOD(void, term_destination, (j_compress_ptr cinfo));
-};
-
-
-/* Data source object for decompression */
-
-struct jpeg_source_mgr {
-  const JOCTET * next_input_byte; /* => next byte to read from buffer */
-  size_t bytes_in_buffer;	/* # of bytes remaining in buffer */
-
-  JMETHOD(void, init_source, (j_decompress_ptr cinfo));
-  JMETHOD(boolean, fill_input_buffer, (j_decompress_ptr cinfo));
-  JMETHOD(void, skip_input_data, (j_decompress_ptr cinfo, long num_bytes));
-  JMETHOD(boolean, resync_to_restart, (j_decompress_ptr cinfo, int desired));
-  JMETHOD(void, term_source, (j_decompress_ptr cinfo));
-};
-
-
-/* Memory manager object.
- * Allocates "small" objects (a few K total), "large" objects (tens of K),
- * and "really big" objects (virtual arrays with backing store if needed).
- * The memory manager does not allow individual objects to be freed; rather,
- * each created object is assigned to a pool, and whole pools can be freed
- * at once.  This is faster and more convenient than remembering exactly what
- * to free, especially where malloc()/free() are not too speedy.
- * NB: alloc routines never return NULL.  They exit to error_exit if not
- * successful.
- */
-
-#define JPOOL_PERMANENT	0	/* lasts until master record is destroyed */
-#define JPOOL_IMAGE	1	/* lasts until done with image/datastream */
-#define JPOOL_NUMPOOLS	2
-
-typedef struct jvirt_sarray_control * jvirt_sarray_ptr;
-typedef struct jvirt_barray_control * jvirt_barray_ptr;
-
-
-struct jpeg_memory_mgr {
-  /* Method pointers */
-  JMETHOD(void *, alloc_small, (j_common_ptr cinfo, int pool_id,
-				size_t sizeofobject));
-  JMETHOD(void FAR *, alloc_large, (j_common_ptr cinfo, int pool_id,
-				     size_t sizeofobject));
-  JMETHOD(JSAMPARRAY, alloc_sarray, (j_common_ptr cinfo, int pool_id,
-				     JDIMENSION samplesperrow,
-				     JDIMENSION numrows));
-  JMETHOD(JBLOCKARRAY, alloc_barray, (j_common_ptr cinfo, int pool_id,
-				      JDIMENSION blocksperrow,
-				      JDIMENSION numrows));
-  JMETHOD(jvirt_sarray_ptr, request_virt_sarray, (j_common_ptr cinfo,
-						  int pool_id,
-						  boolean pre_zero,
-						  JDIMENSION samplesperrow,
-						  JDIMENSION numrows,
-						  JDIMENSION maxaccess));
-  JMETHOD(jvirt_barray_ptr, request_virt_barray, (j_common_ptr cinfo,
-						  int pool_id,
-						  boolean pre_zero,
-						  JDIMENSION blocksperrow,
-						  JDIMENSION numrows,
-						  JDIMENSION maxaccess));
-  JMETHOD(void, realize_virt_arrays, (j_common_ptr cinfo));
-  JMETHOD(JSAMPARRAY, access_virt_sarray, (j_common_ptr cinfo,
-					   jvirt_sarray_ptr ptr,
-					   JDIMENSION start_row,
-					   JDIMENSION num_rows,
-					   boolean writable));
-  JMETHOD(JBLOCKARRAY, access_virt_barray, (j_common_ptr cinfo,
-					    jvirt_barray_ptr ptr,
-					    JDIMENSION start_row,
-					    JDIMENSION num_rows,
-					    boolean writable));
-  JMETHOD(void, free_pool, (j_common_ptr cinfo, int pool_id));
-  JMETHOD(void, self_destruct, (j_common_ptr cinfo));
-
-  /* Limit on memory allocation for this JPEG object.  (Note that this is
-   * merely advisory, not a guaranteed maximum; it only affects the space
-   * used for virtual-array buffers.)  May be changed by outer application
-   * after creating the JPEG object.
-   */
-  long max_memory_to_use;
-
-  /* Maximum allocation request accepted by alloc_large. */
-  long max_alloc_chunk;
-};
-
-
-/* Routine signature for application-supplied marker processing methods.
- * Need not pass marker code since it is stored in cinfo->unread_marker.
- */
-typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo));
-
-
-/* Declarations for routines called by application.
- * The JPP macro hides prototype parameters from compilers that can't cope.
- * Note JPP requires double parentheses.
- */
-
-#ifdef HAVE_PROTOTYPES
-#define JPP(arglist)	arglist
-#else
-#define JPP(arglist)	()
-#endif
-
-
-/* Short forms of external names for systems with brain-damaged linkers.
- * We shorten external names to be unique in the first six letters, which
- * is good enough for all known systems.
- * (If your compiler itself needs names to be unique in less than 15 
- * characters, you are out of luck.  Get a better compiler.)
- */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_std_error		jStdError
-#define jpeg_CreateCompress	jCreaCompress
-#define jpeg_CreateDecompress	jCreaDecompress
-#define jpeg_destroy_compress	jDestCompress
-#define jpeg_destroy_decompress	jDestDecompress
-#define jpeg_stdio_dest		jStdDest
-#define jpeg_stdio_src		jStdSrc
-#define jpeg_mem_dest		jMemDest
-#define jpeg_mem_src		jMemSrc
-#define jpeg_set_defaults	jSetDefaults
-#define jpeg_set_colorspace	jSetColorspace
-#define jpeg_default_colorspace	jDefColorspace
-#define jpeg_set_quality	jSetQuality
-#define jpeg_set_linear_quality	jSetLQuality
-#define jpeg_default_qtables	jDefQTables
-#define jpeg_add_quant_table	jAddQuantTable
-#define jpeg_quality_scaling	jQualityScaling
-#define jpeg_simple_progression	jSimProgress
-#define jpeg_suppress_tables	jSuppressTables
-#define jpeg_alloc_quant_table	jAlcQTable
-#define jpeg_alloc_huff_table	jAlcHTable
-#define jpeg_std_huff_table	jStdHTable
-#define jpeg_start_compress	jStrtCompress
-#define jpeg_write_scanlines	jWrtScanlines
-#define jpeg_finish_compress	jFinCompress
-#define jpeg_calc_jpeg_dimensions	jCjpegDimensions
-#define jpeg_write_raw_data	jWrtRawData
-#define jpeg_write_marker	jWrtMarker
-#define jpeg_write_m_header	jWrtMHeader
-#define jpeg_write_m_byte	jWrtMByte
-#define jpeg_write_tables	jWrtTables
-#define jpeg_read_header	jReadHeader
-#define jpeg_start_decompress	jStrtDecompress
-#define jpeg_read_scanlines	jReadScanlines
-#define jpeg_finish_decompress	jFinDecompress
-#define jpeg_read_raw_data	jReadRawData
-#define jpeg_has_multiple_scans	jHasMultScn
-#define jpeg_start_output	jStrtOutput
-#define jpeg_finish_output	jFinOutput
-#define jpeg_input_complete	jInComplete
-#define jpeg_new_colormap	jNewCMap
-#define jpeg_consume_input	jConsumeInput
-#define jpeg_core_output_dimensions	jCoreDimensions
-#define jpeg_calc_output_dimensions	jCalcDimensions
-#define jpeg_save_markers	jSaveMarkers
-#define jpeg_set_marker_processor	jSetMarker
-#define jpeg_read_coefficients	jReadCoefs
-#define jpeg_write_coefficients	jWrtCoefs
-#define jpeg_copy_critical_parameters	jCopyCrit
-#define jpeg_abort_compress	jAbrtCompress
-#define jpeg_abort_decompress	jAbrtDecompress
-#define jpeg_abort		jAbort
-#define jpeg_destroy		jDestroy
-#define jpeg_resync_to_restart	jResyncRestart
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-
-/* Default error-management setup */
-EXTERN(struct jpeg_error_mgr *) jpeg_std_error
-	JPP((struct jpeg_error_mgr * err));
-
-/* Initialization of JPEG compression objects.
- * jpeg_create_compress() and jpeg_create_decompress() are the exported
- * names that applications should call.  These expand to calls on
- * jpeg_CreateCompress and jpeg_CreateDecompress with additional information
- * passed for version mismatch checking.
- * NB: you must set up the error-manager BEFORE calling jpeg_create_xxx.
- */
-#define jpeg_create_compress(cinfo) \
-    jpeg_CreateCompress((cinfo), JPEG_LIB_VERSION, \
-			(size_t) sizeof(struct jpeg_compress_struct))
-#define jpeg_create_decompress(cinfo) \
-    jpeg_CreateDecompress((cinfo), JPEG_LIB_VERSION, \
-			  (size_t) sizeof(struct jpeg_decompress_struct))
-EXTERN(void) jpeg_CreateCompress JPP((j_compress_ptr cinfo,
-				      int version, size_t structsize));
-EXTERN(void) jpeg_CreateDecompress JPP((j_decompress_ptr cinfo,
-					int version, size_t structsize));
-/* Destruction of JPEG compression objects */
-EXTERN(void) jpeg_destroy_compress JPP((j_compress_ptr cinfo));
-EXTERN(void) jpeg_destroy_decompress JPP((j_decompress_ptr cinfo));
-
-/* Standard data source and destination managers: stdio streams. */
-/* Caller is responsible for opening the file before and closing after. */
-EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile));
-EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile));
-
-/* Data source and destination managers: memory buffers. */
-EXTERN(void) jpeg_mem_dest JPP((j_compress_ptr cinfo,
-			       unsigned char ** outbuffer,
-			       size_t * outsize));
-EXTERN(void) jpeg_mem_src JPP((j_decompress_ptr cinfo,
-			      const unsigned char * inbuffer,
-			      size_t insize));
-
-/* Default parameter setup for compression */
-EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo));
-/* Compression parameter setup aids */
-EXTERN(void) jpeg_set_colorspace JPP((j_compress_ptr cinfo,
-				      J_COLOR_SPACE colorspace));
-EXTERN(void) jpeg_default_colorspace JPP((j_compress_ptr cinfo));
-EXTERN(void) jpeg_set_quality JPP((j_compress_ptr cinfo, int quality,
-				   boolean force_baseline));
-EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo,
-					  int scale_factor,
-					  boolean force_baseline));
-EXTERN(void) jpeg_default_qtables JPP((j_compress_ptr cinfo,
-				       boolean force_baseline));
-EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl,
-				       const unsigned int *basic_table,
-				       int scale_factor,
-				       boolean force_baseline));
-EXTERN(int) jpeg_quality_scaling JPP((int quality));
-EXTERN(void) jpeg_simple_progression JPP((j_compress_ptr cinfo));
-EXTERN(void) jpeg_suppress_tables JPP((j_compress_ptr cinfo,
-				       boolean suppress));
-EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table JPP((j_common_ptr cinfo));
-EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table JPP((j_common_ptr cinfo));
-EXTERN(JHUFF_TBL *) jpeg_std_huff_table JPP((j_common_ptr cinfo,
-					     boolean isDC, int tblno));
-
-/* Main entry points for compression */
-EXTERN(void) jpeg_start_compress JPP((j_compress_ptr cinfo,
-				      boolean write_all_tables));
-EXTERN(JDIMENSION) jpeg_write_scanlines JPP((j_compress_ptr cinfo,
-					     JSAMPARRAY scanlines,
-					     JDIMENSION num_lines));
-EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo));
-
-/* Precalculate JPEG dimensions for current compression parameters. */
-EXTERN(void) jpeg_calc_jpeg_dimensions JPP((j_compress_ptr cinfo));
-
-/* Replaces jpeg_write_scanlines when writing raw downsampled data. */
-EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo,
-					    JSAMPIMAGE data,
-					    JDIMENSION num_lines));
-
-/* Write a special marker.  See libjpeg.txt concerning safe usage. */
-EXTERN(void) jpeg_write_marker
-	JPP((j_compress_ptr cinfo, int marker,
-	     const JOCTET * dataptr, unsigned int datalen));
-/* Same, but piecemeal. */
-EXTERN(void) jpeg_write_m_header
-	JPP((j_compress_ptr cinfo, int marker, unsigned int datalen));
-EXTERN(void) jpeg_write_m_byte
-	JPP((j_compress_ptr cinfo, int val));
-
-/* Alternate compression function: just write an abbreviated table file */
-EXTERN(void) jpeg_write_tables JPP((j_compress_ptr cinfo));
-
-/* Decompression startup: read start of JPEG datastream to see what's there */
-EXTERN(int) jpeg_read_header JPP((j_decompress_ptr cinfo,
-				  boolean require_image));
-/* Return value is one of: */
-#define JPEG_SUSPENDED		0 /* Suspended due to lack of input data */
-#define JPEG_HEADER_OK		1 /* Found valid image datastream */
-#define JPEG_HEADER_TABLES_ONLY	2 /* Found valid table-specs-only datastream */
-/* If you pass require_image = TRUE (normal case), you need not check for
- * a TABLES_ONLY return code; an abbreviated file will cause an error exit.
- * JPEG_SUSPENDED is only possible if you use a data source module that can
- * give a suspension return (the stdio source module doesn't).
- */
-
-/* Main entry points for decompression */
-EXTERN(boolean) jpeg_start_decompress JPP((j_decompress_ptr cinfo));
-EXTERN(JDIMENSION) jpeg_read_scanlines JPP((j_decompress_ptr cinfo,
-					    JSAMPARRAY scanlines,
-					    JDIMENSION max_lines));
-EXTERN(boolean) jpeg_finish_decompress JPP((j_decompress_ptr cinfo));
-
-/* Replaces jpeg_read_scanlines when reading raw downsampled data. */
-EXTERN(JDIMENSION) jpeg_read_raw_data JPP((j_decompress_ptr cinfo,
-					   JSAMPIMAGE data,
-					   JDIMENSION max_lines));
-
-/* Additional entry points for buffered-image mode. */
-EXTERN(boolean) jpeg_has_multiple_scans JPP((j_decompress_ptr cinfo));
-EXTERN(boolean) jpeg_start_output JPP((j_decompress_ptr cinfo,
-				       int scan_number));
-EXTERN(boolean) jpeg_finish_output JPP((j_decompress_ptr cinfo));
-EXTERN(boolean) jpeg_input_complete JPP((j_decompress_ptr cinfo));
-EXTERN(void) jpeg_new_colormap JPP((j_decompress_ptr cinfo));
-EXTERN(int) jpeg_consume_input JPP((j_decompress_ptr cinfo));
-/* Return value is one of: */
-/* #define JPEG_SUSPENDED	0    Suspended due to lack of input data */
-#define JPEG_REACHED_SOS	1 /* Reached start of new scan */
-#define JPEG_REACHED_EOI	2 /* Reached end of image */
-#define JPEG_ROW_COMPLETED	3 /* Completed one iMCU row */
-#define JPEG_SCAN_COMPLETED	4 /* Completed last iMCU row of a scan */
-
-/* Precalculate output dimensions for current decompression parameters. */
-EXTERN(void) jpeg_core_output_dimensions JPP((j_decompress_ptr cinfo));
-EXTERN(void) jpeg_calc_output_dimensions JPP((j_decompress_ptr cinfo));
-
-/* Control saving of COM and APPn markers into marker_list. */
-EXTERN(void) jpeg_save_markers
-	JPP((j_decompress_ptr cinfo, int marker_code,
-	     unsigned int length_limit));
-
-/* Install a special processing method for COM or APPn markers. */
-EXTERN(void) jpeg_set_marker_processor
-	JPP((j_decompress_ptr cinfo, int marker_code,
-	     jpeg_marker_parser_method routine));
-
-/* Read or write raw DCT coefficients --- useful for lossless transcoding. */
-EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients JPP((j_decompress_ptr cinfo));
-EXTERN(void) jpeg_write_coefficients JPP((j_compress_ptr cinfo,
-					  jvirt_barray_ptr * coef_arrays));
-EXTERN(void) jpeg_copy_critical_parameters JPP((j_decompress_ptr srcinfo,
-						j_compress_ptr dstinfo));
-
-/* If you choose to abort compression or decompression before completing
- * jpeg_finish_(de)compress, then you need to clean up to release memory,
- * temporary files, etc.  You can just call jpeg_destroy_(de)compress
- * if you're done with the JPEG object, but if you want to clean it up and
- * reuse it, call this:
- */
-EXTERN(void) jpeg_abort_compress JPP((j_compress_ptr cinfo));
-EXTERN(void) jpeg_abort_decompress JPP((j_decompress_ptr cinfo));
-
-/* Generic versions of jpeg_abort and jpeg_destroy that work on either
- * flavor of JPEG object.  These may be more convenient in some places.
- */
-EXTERN(void) jpeg_abort JPP((j_common_ptr cinfo));
-EXTERN(void) jpeg_destroy JPP((j_common_ptr cinfo));
-
-/* Default restart-marker-resync procedure for use by data source modules */
-EXTERN(boolean) jpeg_resync_to_restart JPP((j_decompress_ptr cinfo,
-					    int desired));
-
-
-/* These marker codes are exported since applications and data source modules
- * are likely to want to use them.
- */
-
-#define JPEG_RST0	0xD0	/* RST0 marker code */
-#define JPEG_EOI	0xD9	/* EOI marker code */
-#define JPEG_APP0	0xE0	/* APP0 marker code */
-#define JPEG_COM	0xFE	/* COM marker code */
-
-
-/* If we have a brain-damaged compiler that emits warnings (or worse, errors)
- * for structure definitions that are never filled in, keep it quiet by
- * supplying dummy definitions for the various substructures.
- */
-
-#ifdef INCOMPLETE_TYPES_BROKEN
-#ifndef JPEG_INTERNALS		/* will be defined in jpegint.h */
-struct jvirt_sarray_control { long dummy; };
-struct jvirt_barray_control { long dummy; };
-struct jpeg_comp_master { long dummy; };
-struct jpeg_c_main_controller { long dummy; };
-struct jpeg_c_prep_controller { long dummy; };
-struct jpeg_c_coef_controller { long dummy; };
-struct jpeg_marker_writer { long dummy; };
-struct jpeg_color_converter { long dummy; };
-struct jpeg_downsampler { long dummy; };
-struct jpeg_forward_dct { long dummy; };
-struct jpeg_entropy_encoder { long dummy; };
-struct jpeg_decomp_master { long dummy; };
-struct jpeg_d_main_controller { long dummy; };
-struct jpeg_d_coef_controller { long dummy; };
-struct jpeg_d_post_controller { long dummy; };
-struct jpeg_input_controller { long dummy; };
-struct jpeg_marker_reader { long dummy; };
-struct jpeg_entropy_decoder { long dummy; };
-struct jpeg_inverse_dct { long dummy; };
-struct jpeg_upsampler { long dummy; };
-struct jpeg_color_deconverter { long dummy; };
-struct jpeg_color_quantizer { long dummy; };
-#endif /* JPEG_INTERNALS */
-#endif /* INCOMPLETE_TYPES_BROKEN */
-
-
-/*
- * The JPEG library modules define JPEG_INTERNALS before including this file.
- * The internal structure declarations are read only when that is true.
- * Applications using the library should not include jpegint.h, but may wish
- * to include jerror.h.
- */
-
-#ifdef JPEG_INTERNALS
-#include "jpegint.h"		/* fetch private declarations */
-#include "jerror.h"		/* fetch error codes too */
-#endif
-
-#ifdef __cplusplus
-#ifndef DONT_USE_EXTERN_C
-}
-#endif
-#endif
-
-#endif /* JPEGLIB_H */
diff --git a/cmake/externals/libjpeg/jquant1.c b/cmake/externals/libjpeg/jquant1.c
deleted file mode 100644
index 60b1843e7..000000000
--- a/cmake/externals/libjpeg/jquant1.c
+++ /dev/null
@@ -1,851 +0,0 @@
-/*
- * jquant1.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2011-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains 1-pass color quantization (color mapping) routines.
- * These routines provide mapping to a fixed color map using equally spaced
- * color values.  Optional Floyd-Steinberg or ordered dithering is available.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-#ifdef QUANT_1PASS_SUPPORTED
-
-
-/*
- * The main purpose of 1-pass quantization is to provide a fast, if not very
- * high quality, colormapped output capability.  A 2-pass quantizer usually
- * gives better visual quality; however, for quantized grayscale output this
- * quantizer is perfectly adequate.  Dithering is highly recommended with this
- * quantizer, though you can turn it off if you really want to.
- *
- * In 1-pass quantization the colormap must be chosen in advance of seeing the
- * image.  We use a map consisting of all combinations of Ncolors[i] color
- * values for the i'th component.  The Ncolors[] values are chosen so that
- * their product, the total number of colors, is no more than that requested.
- * (In most cases, the product will be somewhat less.)
- *
- * Since the colormap is orthogonal, the representative value for each color
- * component can be determined without considering the other components;
- * then these indexes can be combined into a colormap index by a standard
- * N-dimensional-array-subscript calculation.  Most of the arithmetic involved
- * can be precalculated and stored in the lookup table colorindex[].
- * colorindex[i][j] maps pixel value j in component i to the nearest
- * representative value (grid plane) for that component; this index is
- * multiplied by the array stride for component i, so that the
- * index of the colormap entry closest to a given pixel value is just
- *    sum( colorindex[component-number][pixel-component-value] )
- * Aside from being fast, this scheme allows for variable spacing between
- * representative values with no additional lookup cost.
- *
- * If gamma correction has been applied in color conversion, it might be wise
- * to adjust the color grid spacing so that the representative colors are
- * equidistant in linear space.  At this writing, gamma correction is not
- * implemented by jdcolor, so nothing is done here.
- */
-
-
-/* Declarations for ordered dithering.
- *
- * We use a standard 16x16 ordered dither array.  The basic concept of ordered
- * dithering is described in many references, for instance Dale Schumacher's
- * chapter II.2 of Graphics Gems II (James Arvo, ed. Academic Press, 1991).
- * In place of Schumacher's comparisons against a "threshold" value, we add a
- * "dither" value to the input pixel and then round the result to the nearest
- * output value.  The dither value is equivalent to (0.5 - threshold) times
- * the distance between output values.  For ordered dithering, we assume that
- * the output colors are equally spaced; if not, results will probably be
- * worse, since the dither may be too much or too little at a given point.
- *
- * The normal calculation would be to form pixel value + dither, range-limit
- * this to 0..MAXJSAMPLE, and then index into the colorindex table as usual.
- * We can skip the separate range-limiting step by extending the colorindex
- * table in both directions.
- */
-
-#define ODITHER_SIZE  16	/* dimension of dither matrix */
-/* NB: if ODITHER_SIZE is not a power of 2, ODITHER_MASK uses will break */
-#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE)	/* # cells in matrix */
-#define ODITHER_MASK  (ODITHER_SIZE-1) /* mask for wrapping around counters */
-
-typedef int ODITHER_MATRIX[ODITHER_SIZE][ODITHER_SIZE];
-typedef int (*ODITHER_MATRIX_PTR)[ODITHER_SIZE];
-
-static const UINT8 base_dither_matrix[ODITHER_SIZE][ODITHER_SIZE] = {
-  /* Bayer's order-4 dither array.  Generated by the code given in
-   * Stephen Hawley's article "Ordered Dithering" in Graphics Gems I.
-   * The values in this array must range from 0 to ODITHER_CELLS-1.
-   */
-  {   0,192, 48,240, 12,204, 60,252,  3,195, 51,243, 15,207, 63,255 },
-  { 128, 64,176,112,140, 76,188,124,131, 67,179,115,143, 79,191,127 },
-  {  32,224, 16,208, 44,236, 28,220, 35,227, 19,211, 47,239, 31,223 },
-  { 160, 96,144, 80,172,108,156, 92,163, 99,147, 83,175,111,159, 95 },
-  {   8,200, 56,248,  4,196, 52,244, 11,203, 59,251,  7,199, 55,247 },
-  { 136, 72,184,120,132, 68,180,116,139, 75,187,123,135, 71,183,119 },
-  {  40,232, 24,216, 36,228, 20,212, 43,235, 27,219, 39,231, 23,215 },
-  { 168,104,152, 88,164,100,148, 84,171,107,155, 91,167,103,151, 87 },
-  {   2,194, 50,242, 14,206, 62,254,  1,193, 49,241, 13,205, 61,253 },
-  { 130, 66,178,114,142, 78,190,126,129, 65,177,113,141, 77,189,125 },
-  {  34,226, 18,210, 46,238, 30,222, 33,225, 17,209, 45,237, 29,221 },
-  { 162, 98,146, 82,174,110,158, 94,161, 97,145, 81,173,109,157, 93 },
-  {  10,202, 58,250,  6,198, 54,246,  9,201, 57,249,  5,197, 53,245 },
-  { 138, 74,186,122,134, 70,182,118,137, 73,185,121,133, 69,181,117 },
-  {  42,234, 26,218, 38,230, 22,214, 41,233, 25,217, 37,229, 21,213 },
-  { 170,106,154, 90,166,102,150, 86,169,105,153, 89,165,101,149, 85 }
-};
-
-
-/* Declarations for Floyd-Steinberg dithering.
- *
- * Errors are accumulated into the array fserrors[], at a resolution of
- * 1/16th of a pixel count.  The error at a given pixel is propagated
- * to its not-yet-processed neighbors using the standard F-S fractions,
- *		...	(here)	7/16
- *		3/16	5/16	1/16
- * We work left-to-right on even rows, right-to-left on odd rows.
- *
- * We can get away with a single array (holding one row's worth of errors)
- * by using it to store the current row's errors at pixel columns not yet
- * processed, but the next row's errors at columns already processed.  We
- * need only a few extra variables to hold the errors immediately around the
- * current column.  (If we are lucky, those variables are in registers, but
- * even if not, they're probably cheaper to access than array elements are.)
- *
- * The fserrors[] array is indexed [component#][position].
- * We provide (#columns + 2) entries per component; the extra entry at each
- * end saves us from special-casing the first and last pixels.
- *
- * Note: on a wide image, we might not have enough room in a PC's near data
- * segment to hold the error array; so it is allocated with alloc_large.
- */
-
-#if BITS_IN_JSAMPLE == 8
-typedef INT16 FSERROR;		/* 16 bits should be enough */
-typedef int LOCFSERROR;		/* use 'int' for calculation temps */
-#else
-typedef INT32 FSERROR;		/* may need more than 16 bits */
-typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
-#endif
-
-typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
-
-
-/* Private subobject */
-
-#define MAX_Q_COMPS 4		/* max components I can handle */
-
-typedef struct {
-  struct jpeg_color_quantizer pub; /* public fields */
-
-  /* Initially allocated colormap is saved here */
-  JSAMPARRAY sv_colormap;	/* The color map as a 2-D pixel array */
-  int sv_actual;		/* number of entries in use */
-
-  JSAMPARRAY colorindex;	/* Precomputed mapping for speed */
-  /* colorindex[i][j] = index of color closest to pixel value j in component i,
-   * premultiplied as described above.  Since colormap indexes must fit into
-   * JSAMPLEs, the entries of this array will too.
-   */
-  boolean is_padded;		/* is the colorindex padded for odither? */
-
-  int Ncolors[MAX_Q_COMPS];	/* # of values alloced to each component */
-
-  /* Variables for ordered dithering */
-  int row_index;		/* cur row's vertical index in dither matrix */
-  ODITHER_MATRIX_PTR odither[MAX_Q_COMPS]; /* one dither array per component */
-
-  /* Variables for Floyd-Steinberg dithering */
-  FSERRPTR fserrors[MAX_Q_COMPS]; /* accumulated errors */
-  boolean on_odd_row;		/* flag to remember which row we are on */
-} my_cquantizer;
-
-typedef my_cquantizer * my_cquantize_ptr;
-
-
-/*
- * Policy-making subroutines for create_colormap and create_colorindex.
- * These routines determine the colormap to be used.  The rest of the module
- * only assumes that the colormap is orthogonal.
- *
- *  * select_ncolors decides how to divvy up the available colors
- *    among the components.
- *  * output_value defines the set of representative values for a component.
- *  * largest_input_value defines the mapping from input values to
- *    representative values for a component.
- * Note that the latter two routines may impose different policies for
- * different components, though this is not currently done.
- */
-
-
-LOCAL(int)
-select_ncolors (j_decompress_ptr cinfo, int Ncolors[])
-/* Determine allocation of desired colors to components, */
-/* and fill in Ncolors[] array to indicate choice. */
-/* Return value is total number of colors (product of Ncolors[] values). */
-{
-  int nc = cinfo->out_color_components; /* number of color components */
-  int max_colors = cinfo->desired_number_of_colors;
-  int total_colors, iroot, i, j;
-  boolean changed;
-  long temp;
-  static const int RGB_order[3] = { RGB_GREEN, RGB_RED, RGB_BLUE };
-
-  /* We can allocate at least the nc'th root of max_colors per component. */
-  /* Compute floor(nc'th root of max_colors). */
-  iroot = 1;
-  do {
-    iroot++;
-    temp = iroot;		/* set temp = iroot ** nc */
-    for (i = 1; i < nc; i++)
-      temp *= iroot;
-  } while (temp <= (long) max_colors); /* repeat till iroot exceeds root */
-  iroot--;			/* now iroot = floor(root) */
-
-  /* Must have at least 2 color values per component */
-  if (iroot < 2)
-    ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, (int) temp);
-
-  /* Initialize to iroot color values for each component */
-  total_colors = 1;
-  for (i = 0; i < nc; i++) {
-    Ncolors[i] = iroot;
-    total_colors *= iroot;
-  }
-  /* We may be able to increment the count for one or more components without
-   * exceeding max_colors, though we know not all can be incremented.
-   * Sometimes, the first component can be incremented more than once!
-   * (Example: for 16 colors, we start at 2*2*2, go to 3*2*2, then 4*2*2.)
-   * In RGB colorspace, try to increment G first, then R, then B.
-   */
-  do {
-    changed = FALSE;
-    for (i = 0; i < nc; i++) {
-      j = (cinfo->out_color_space == JCS_RGB ? RGB_order[i] : i);
-      /* calculate new total_colors if Ncolors[j] is incremented */
-      temp = total_colors / Ncolors[j];
-      temp *= Ncolors[j]+1;	/* done in long arith to avoid oflo */
-      if (temp > (long) max_colors)
-	break;			/* won't fit, done with this pass */
-      Ncolors[j]++;		/* OK, apply the increment */
-      total_colors = (int) temp;
-      changed = TRUE;
-    }
-  } while (changed);
-
-  return total_colors;
-}
-
-
-LOCAL(int)
-output_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
-/* Return j'th output value, where j will range from 0 to maxj */
-/* The output values must fall in 0..MAXJSAMPLE in increasing order */
-{
-  /* We always provide values 0 and MAXJSAMPLE for each component;
-   * any additional values are equally spaced between these limits.
-   * (Forcing the upper and lower values to the limits ensures that
-   * dithering can't produce a color outside the selected gamut.)
-   */
-  return (int) (((INT32) j * MAXJSAMPLE + maxj/2) / maxj);
-}
-
-
-LOCAL(int)
-largest_input_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
-/* Return largest input value that should map to j'th output value */
-/* Must have largest(j=0) >= 0, and largest(j=maxj) >= MAXJSAMPLE */
-{
-  /* Breakpoints are halfway between values returned by output_value */
-  return (int) (((INT32) (2*j + 1) * MAXJSAMPLE + maxj) / (2*maxj));
-}
-
-
-/*
- * Create the colormap.
- */
-
-LOCAL(void)
-create_colormap (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  JSAMPARRAY colormap;		/* Created colormap */
-  int total_colors;		/* Number of distinct output colors */
-  int i,j,k, nci, blksize, blkdist, ptr, val;
-
-  /* Select number of colors for each component */
-  total_colors = select_ncolors(cinfo, cquantize->Ncolors);
-
-  /* Report selected color counts */
-  if (cinfo->out_color_components == 3)
-    TRACEMS4(cinfo, 1, JTRC_QUANT_3_NCOLORS,
-	     total_colors, cquantize->Ncolors[0],
-	     cquantize->Ncolors[1], cquantize->Ncolors[2]);
-  else
-    TRACEMS1(cinfo, 1, JTRC_QUANT_NCOLORS, total_colors);
-
-  /* Allocate and fill in the colormap. */
-  /* The colors are ordered in the map in standard row-major order, */
-  /* i.e. rightmost (highest-indexed) color changes most rapidly. */
-
-  colormap = (*cinfo->mem->alloc_sarray) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-     (JDIMENSION) total_colors, (JDIMENSION) cinfo->out_color_components);
-
-  /* blksize is number of adjacent repeated entries for a component */
-  /* blkdist is distance between groups of identical entries for a component */
-  blkdist = total_colors;
-
-  for (i = 0; i < cinfo->out_color_components; i++) {
-    /* fill in colormap entries for i'th color component */
-    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
-    blksize = blkdist / nci;
-    for (j = 0; j < nci; j++) {
-      /* Compute j'th output value (out of nci) for component */
-      val = output_value(cinfo, i, j, nci-1);
-      /* Fill in all colormap entries that have this value of this component */
-      for (ptr = j * blksize; ptr < total_colors; ptr += blkdist) {
-	/* fill in blksize entries beginning at ptr */
-	for (k = 0; k < blksize; k++)
-	  colormap[i][ptr+k] = (JSAMPLE) val;
-      }
-    }
-    blkdist = blksize;		/* blksize of this color is blkdist of next */
-  }
-
-  /* Save the colormap in private storage,
-   * where it will survive color quantization mode changes.
-   */
-  cquantize->sv_colormap = colormap;
-  cquantize->sv_actual = total_colors;
-}
-
-
-/*
- * Create the color index table.
- */
-
-LOCAL(void)
-create_colorindex (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  JSAMPROW indexptr;
-  int i,j,k, nci, blksize, val, pad;
-
-  /* For ordered dither, we pad the color index tables by MAXJSAMPLE in
-   * each direction (input index values can be -MAXJSAMPLE .. 2*MAXJSAMPLE).
-   * This is not necessary in the other dithering modes.  However, we
-   * flag whether it was done in case user changes dithering mode.
-   */
-  if (cinfo->dither_mode == JDITHER_ORDERED) {
-    pad = MAXJSAMPLE*2;
-    cquantize->is_padded = TRUE;
-  } else {
-    pad = 0;
-    cquantize->is_padded = FALSE;
-  }
-
-  cquantize->colorindex = (*cinfo->mem->alloc_sarray)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE,
-     (JDIMENSION) (MAXJSAMPLE+1 + pad),
-     (JDIMENSION) cinfo->out_color_components);
-
-  /* blksize is number of adjacent repeated entries for a component */
-  blksize = cquantize->sv_actual;
-
-  for (i = 0; i < cinfo->out_color_components; i++) {
-    /* fill in colorindex entries for i'th color component */
-    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
-    blksize = blksize / nci;
-
-    /* adjust colorindex pointers to provide padding at negative indexes. */
-    if (pad)
-      cquantize->colorindex[i] += MAXJSAMPLE;
-
-    /* in loop, val = index of current output value, */
-    /* and k = largest j that maps to current val */
-    indexptr = cquantize->colorindex[i];
-    val = 0;
-    k = largest_input_value(cinfo, i, 0, nci-1);
-    for (j = 0; j <= MAXJSAMPLE; j++) {
-      while (j > k)		/* advance val if past boundary */
-	k = largest_input_value(cinfo, i, ++val, nci-1);
-      /* premultiply so that no multiplication needed in main processing */
-      indexptr[j] = (JSAMPLE) (val * blksize);
-    }
-    /* Pad at both ends if necessary */
-    if (pad)
-      for (j = 1; j <= MAXJSAMPLE; j++) {
-	indexptr[-j] = indexptr[0];
-	indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE];
-      }
-  }
-}
-
-
-/*
- * Create an ordered-dither array for a component having ncolors
- * distinct output values.
- */
-
-LOCAL(ODITHER_MATRIX_PTR)
-make_odither_array (j_decompress_ptr cinfo, int ncolors)
-{
-  ODITHER_MATRIX_PTR odither;
-  int j,k;
-  INT32 num,den;
-
-  odither = (ODITHER_MATRIX_PTR) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(ODITHER_MATRIX));
-  /* The inter-value distance for this color is MAXJSAMPLE/(ncolors-1).
-   * Hence the dither value for the matrix cell with fill order f
-   * (f=0..N-1) should be (N-1-2*f)/(2*N) * MAXJSAMPLE/(ncolors-1).
-   * On 16-bit-int machine, be careful to avoid overflow.
-   */
-  den = 2 * ODITHER_CELLS * ((INT32) (ncolors - 1));
-  for (j = 0; j < ODITHER_SIZE; j++) {
-    for (k = 0; k < ODITHER_SIZE; k++) {
-      num = ((INT32) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k])))
-	    * MAXJSAMPLE;
-      /* Ensure round towards zero despite C's lack of consistency
-       * about rounding negative values in integer division...
-       */
-      odither[j][k] = (int) (num<0 ? -((-num)/den) : num/den);
-    }
-  }
-  return odither;
-}
-
-
-/*
- * Create the ordered-dither tables.
- * Components having the same number of representative colors may 
- * share a dither table.
- */
-
-LOCAL(void)
-create_odither_tables (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  ODITHER_MATRIX_PTR odither;
-  int i, j, nci;
-
-  for (i = 0; i < cinfo->out_color_components; i++) {
-    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
-    odither = NULL;		/* search for matching prior component */
-    for (j = 0; j < i; j++) {
-      if (nci == cquantize->Ncolors[j]) {
-	odither = cquantize->odither[j];
-	break;
-      }
-    }
-    if (odither == NULL)	/* need a new table? */
-      odither = make_odither_array(cinfo, nci);
-    cquantize->odither[i] = odither;
-  }
-}
-
-
-/*
- * Map some rows of pixels to the output colormapped representation.
- */
-
-METHODDEF(void)
-color_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		JSAMPARRAY output_buf, int num_rows)
-/* General case, no dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  JSAMPARRAY colorindex = cquantize->colorindex;
-  register int pixcode, ci;
-  register JSAMPROW ptrin, ptrout;
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-  register int nc = cinfo->out_color_components;
-
-  for (row = 0; row < num_rows; row++) {
-    ptrin = input_buf[row];
-    ptrout = output_buf[row];
-    for (col = width; col > 0; col--) {
-      pixcode = 0;
-      for (ci = 0; ci < nc; ci++) {
-	pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]);
-      }
-      *ptrout++ = (JSAMPLE) pixcode;
-    }
-  }
-}
-
-
-METHODDEF(void)
-color_quantize3 (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		 JSAMPARRAY output_buf, int num_rows)
-/* Fast path for out_color_components==3, no dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  register int pixcode;
-  register JSAMPROW ptrin, ptrout;
-  JSAMPROW colorindex0 = cquantize->colorindex[0];
-  JSAMPROW colorindex1 = cquantize->colorindex[1];
-  JSAMPROW colorindex2 = cquantize->colorindex[2];
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-
-  for (row = 0; row < num_rows; row++) {
-    ptrin = input_buf[row];
-    ptrout = output_buf[row];
-    for (col = width; col > 0; col--) {
-      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*ptrin++)]);
-      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*ptrin++)]);
-      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*ptrin++)]);
-      *ptrout++ = (JSAMPLE) pixcode;
-    }
-  }
-}
-
-
-METHODDEF(void)
-quantize_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		     JSAMPARRAY output_buf, int num_rows)
-/* General case, with ordered dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  register JSAMPROW input_ptr;
-  register JSAMPROW output_ptr;
-  JSAMPROW colorindex_ci;
-  int * dither;			/* points to active row of dither matrix */
-  int row_index, col_index;	/* current indexes into dither matrix */
-  int nc = cinfo->out_color_components;
-  int ci;
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-
-  for (row = 0; row < num_rows; row++) {
-    /* Initialize output values to 0 so can process components separately */
-    FMEMZERO((void FAR *) output_buf[row], (size_t) width * SIZEOF(JSAMPLE));
-    row_index = cquantize->row_index;
-    for (ci = 0; ci < nc; ci++) {
-      input_ptr = input_buf[row] + ci;
-      output_ptr = output_buf[row];
-      colorindex_ci = cquantize->colorindex[ci];
-      dither = cquantize->odither[ci][row_index];
-      col_index = 0;
-
-      for (col = width; col > 0; col--) {
-	/* Form pixel value + dither, range-limit to 0..MAXJSAMPLE,
-	 * select output value, accumulate into output code for this pixel.
-	 * Range-limiting need not be done explicitly, as we have extended
-	 * the colorindex table to produce the right answers for out-of-range
-	 * inputs.  The maximum dither is +- MAXJSAMPLE; this sets the
-	 * required amount of padding.
-	 */
-	*output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]];
-	input_ptr += nc;
-	output_ptr++;
-	col_index = (col_index + 1) & ODITHER_MASK;
-      }
-    }
-    /* Advance row index for next row */
-    row_index = (row_index + 1) & ODITHER_MASK;
-    cquantize->row_index = row_index;
-  }
-}
-
-
-METHODDEF(void)
-quantize3_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		      JSAMPARRAY output_buf, int num_rows)
-/* Fast path for out_color_components==3, with ordered dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  register int pixcode;
-  register JSAMPROW input_ptr;
-  register JSAMPROW output_ptr;
-  JSAMPROW colorindex0 = cquantize->colorindex[0];
-  JSAMPROW colorindex1 = cquantize->colorindex[1];
-  JSAMPROW colorindex2 = cquantize->colorindex[2];
-  int * dither0;		/* points to active row of dither matrix */
-  int * dither1;
-  int * dither2;
-  int row_index, col_index;	/* current indexes into dither matrix */
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-
-  for (row = 0; row < num_rows; row++) {
-    row_index = cquantize->row_index;
-    input_ptr = input_buf[row];
-    output_ptr = output_buf[row];
-    dither0 = cquantize->odither[0][row_index];
-    dither1 = cquantize->odither[1][row_index];
-    dither2 = cquantize->odither[2][row_index];
-    col_index = 0;
-
-    for (col = width; col > 0; col--) {
-      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) +
-					dither0[col_index]]);
-      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) +
-					dither1[col_index]]);
-      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) +
-					dither2[col_index]]);
-      *output_ptr++ = (JSAMPLE) pixcode;
-      col_index = (col_index + 1) & ODITHER_MASK;
-    }
-    row_index = (row_index + 1) & ODITHER_MASK;
-    cquantize->row_index = row_index;
-  }
-}
-
-
-METHODDEF(void)
-quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		    JSAMPARRAY output_buf, int num_rows)
-/* General case, with Floyd-Steinberg dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  register LOCFSERROR cur;	/* current error or pixel value */
-  LOCFSERROR belowerr;		/* error for pixel below cur */
-  LOCFSERROR bpreverr;		/* error for below/prev col */
-  LOCFSERROR bnexterr;		/* error for below/next col */
-  LOCFSERROR delta;
-  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
-  register JSAMPROW input_ptr;
-  register JSAMPROW output_ptr;
-  JSAMPROW colorindex_ci;
-  JSAMPROW colormap_ci;
-  int pixcode;
-  int nc = cinfo->out_color_components;
-  int dir;			/* 1 for left-to-right, -1 for right-to-left */
-  int dirnc;			/* dir * nc */
-  int ci;
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-  JSAMPLE *range_limit = cinfo->sample_range_limit;
-  SHIFT_TEMPS
-
-  for (row = 0; row < num_rows; row++) {
-    /* Initialize output values to 0 so can process components separately */
-    FMEMZERO((void FAR *) output_buf[row], (size_t) width * SIZEOF(JSAMPLE));
-    for (ci = 0; ci < nc; ci++) {
-      input_ptr = input_buf[row] + ci;
-      output_ptr = output_buf[row];
-      if (cquantize->on_odd_row) {
-	/* work right to left in this row */
-	input_ptr += (width-1) * nc; /* so point to rightmost pixel */
-	output_ptr += width-1;
-	dir = -1;
-	dirnc = -nc;
-	errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */
-      } else {
-	/* work left to right in this row */
-	dir = 1;
-	dirnc = nc;
-	errorptr = cquantize->fserrors[ci]; /* => entry before first column */
-      }
-      colorindex_ci = cquantize->colorindex[ci];
-      colormap_ci = cquantize->sv_colormap[ci];
-      /* Preset error values: no error propagated to first pixel from left */
-      cur = 0;
-      /* and no error propagated to row below yet */
-      belowerr = bpreverr = 0;
-
-      for (col = width; col > 0; col--) {
-	/* cur holds the error propagated from the previous pixel on the
-	 * current line.  Add the error propagated from the previous line
-	 * to form the complete error correction term for this pixel, and
-	 * round the error term (which is expressed * 16) to an integer.
-	 * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
-	 * for either sign of the error value.
-	 * Note: errorptr points to *previous* column's array entry.
-	 */
-	cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4);
-	/* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
-	 * The maximum error is +- MAXJSAMPLE; this sets the required size
-	 * of the range_limit array.
-	 */
-	cur += GETJSAMPLE(*input_ptr);
-	cur = GETJSAMPLE(range_limit[cur]);
-	/* Select output value, accumulate into output code for this pixel */
-	pixcode = GETJSAMPLE(colorindex_ci[cur]);
-	*output_ptr += (JSAMPLE) pixcode;
-	/* Compute actual representation error at this pixel */
-	/* Note: we can do this even though we don't have the final */
-	/* pixel code, because the colormap is orthogonal. */
-	cur -= GETJSAMPLE(colormap_ci[pixcode]);
-	/* Compute error fractions to be propagated to adjacent pixels.
-	 * Add these into the running sums, and simultaneously shift the
-	 * next-line error sums left by 1 column.
-	 */
-	bnexterr = cur;
-	delta = cur * 2;
-	cur += delta;		/* form error * 3 */
-	errorptr[0] = (FSERROR) (bpreverr + cur);
-	cur += delta;		/* form error * 5 */
-	bpreverr = belowerr + cur;
-	belowerr = bnexterr;
-	cur += delta;		/* form error * 7 */
-	/* At this point cur contains the 7/16 error value to be propagated
-	 * to the next pixel on the current line, and all the errors for the
-	 * next line have been shifted over. We are therefore ready to move on.
-	 */
-	input_ptr += dirnc;	/* advance input ptr to next column */
-	output_ptr += dir;	/* advance output ptr to next column */
-	errorptr += dir;	/* advance errorptr to current column */
-      }
-      /* Post-loop cleanup: we must unload the final error value into the
-       * final fserrors[] entry.  Note we need not unload belowerr because
-       * it is for the dummy column before or after the actual array.
-       */
-      errorptr[0] = (FSERROR) bpreverr; /* unload prev err into array */
-    }
-    cquantize->on_odd_row = (cquantize->on_odd_row ? FALSE : TRUE);
-  }
-}
-
-
-/*
- * Allocate workspace for Floyd-Steinberg errors.
- */
-
-LOCAL(void)
-alloc_fs_workspace (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  size_t arraysize;
-  int i;
-
-  arraysize = ((size_t) cinfo->output_width + (size_t) 2) * SIZEOF(FSERROR);
-  for (i = 0; i < cinfo->out_color_components; i++) {
-    cquantize->fserrors[i] = (FSERRPTR) (*cinfo->mem->alloc_large)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
-  }
-}
-
-
-/*
- * Initialize for one-pass color quantization.
- */
-
-METHODDEF(void)
-start_pass_1_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  size_t arraysize;
-  int i;
-
-  /* Install my colormap. */
-  cinfo->colormap = cquantize->sv_colormap;
-  cinfo->actual_number_of_colors = cquantize->sv_actual;
-
-  /* Initialize for desired dithering mode. */
-  switch (cinfo->dither_mode) {
-  case JDITHER_NONE:
-    if (cinfo->out_color_components == 3)
-      cquantize->pub.color_quantize = color_quantize3;
-    else
-      cquantize->pub.color_quantize = color_quantize;
-    break;
-  case JDITHER_ORDERED:
-    if (cinfo->out_color_components == 3)
-      cquantize->pub.color_quantize = quantize3_ord_dither;
-    else
-      cquantize->pub.color_quantize = quantize_ord_dither;
-    cquantize->row_index = 0;	/* initialize state for ordered dither */
-    /* If user changed to ordered dither from another mode,
-     * we must recreate the color index table with padding.
-     * This will cost extra space, but probably isn't very likely.
-     */
-    if (! cquantize->is_padded)
-      create_colorindex(cinfo);
-    /* Create ordered-dither tables if we didn't already. */
-    if (cquantize->odither[0] == NULL)
-      create_odither_tables(cinfo);
-    break;
-  case JDITHER_FS:
-    cquantize->pub.color_quantize = quantize_fs_dither;
-    cquantize->on_odd_row = FALSE; /* initialize state for F-S dither */
-    /* Allocate Floyd-Steinberg workspace if didn't already. */
-    if (cquantize->fserrors[0] == NULL)
-      alloc_fs_workspace(cinfo);
-    /* Initialize the propagated errors to zero. */
-    arraysize = ((size_t) cinfo->output_width + (size_t) 2) * SIZEOF(FSERROR);
-    for (i = 0; i < cinfo->out_color_components; i++)
-      FMEMZERO((void FAR *) cquantize->fserrors[i], arraysize);
-    break;
-  default:
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
-  }
-}
-
-
-/*
- * Finish up at the end of the pass.
- */
-
-METHODDEF(void)
-finish_pass_1_quant (j_decompress_ptr cinfo)
-{
-  /* no work in 1-pass case */
-}
-
-
-/*
- * Switch to a new external colormap between output passes.
- * Shouldn't get to this module!
- */
-
-METHODDEF(void)
-new_color_map_1_quant (j_decompress_ptr cinfo)
-{
-  ERREXIT(cinfo, JERR_MODE_CHANGE);
-}
-
-
-/*
- * Module initialization routine for 1-pass color quantization.
- */
-
-GLOBAL(void)
-jinit_1pass_quantizer (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize;
-
-  cquantize = (my_cquantize_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_cquantizer));
-  cinfo->cquantize = &cquantize->pub;
-  cquantize->pub.start_pass = start_pass_1_quant;
-  cquantize->pub.finish_pass = finish_pass_1_quant;
-  cquantize->pub.new_color_map = new_color_map_1_quant;
-  cquantize->fserrors[0] = NULL; /* Flag FS workspace not allocated */
-  cquantize->odither[0] = NULL;	/* Also flag odither arrays not allocated */
-
-  /* Make sure my internal arrays won't overflow */
-  if (cinfo->out_color_components > MAX_Q_COMPS)
-    ERREXIT1(cinfo, JERR_QUANT_COMPONENTS, MAX_Q_COMPS);
-  /* Make sure colormap indexes can be represented by JSAMPLEs */
-  if (cinfo->desired_number_of_colors > (MAXJSAMPLE+1))
-    ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXJSAMPLE+1);
-
-  /* Create the colormap and color index table. */
-  create_colormap(cinfo);
-  create_colorindex(cinfo);
-
-  /* Allocate Floyd-Steinberg workspace now if requested.
-   * We do this now since it is FAR storage and may affect the memory
-   * manager's space calculations.  If the user changes to FS dither
-   * mode in a later pass, we will allocate the space then, and will
-   * possibly overrun the max_memory_to_use setting.
-   */
-  if (cinfo->dither_mode == JDITHER_FS)
-    alloc_fs_workspace(cinfo);
-}
-
-#endif /* QUANT_1PASS_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jquant2.c b/cmake/externals/libjpeg/jquant2.c
deleted file mode 100644
index 662b9bcef..000000000
--- a/cmake/externals/libjpeg/jquant2.c
+++ /dev/null
@@ -1,1311 +0,0 @@
-/*
- * jquant2.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2011-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains 2-pass color quantization (color mapping) routines.
- * These routines provide selection of a custom color map for an image,
- * followed by mapping of the image to that color map, with optional
- * Floyd-Steinberg dithering.
- * It is also possible to use just the second pass to map to an arbitrary
- * externally-given color map.
- *
- * Note: ordered dithering is not supported, since there isn't any fast
- * way to compute intercolor distances; it's unclear that ordered dither's
- * fundamental assumptions even hold with an irregularly spaced color map.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-#ifdef QUANT_2PASS_SUPPORTED
-
-
-/*
- * This module implements the well-known Heckbert paradigm for color
- * quantization.  Most of the ideas used here can be traced back to
- * Heckbert's seminal paper
- *   Heckbert, Paul.  "Color Image Quantization for Frame Buffer Display",
- *   Proc. SIGGRAPH '82, Computer Graphics v.16 #3 (July 1982), pp 297-304.
- *
- * In the first pass over the image, we accumulate a histogram showing the
- * usage count of each possible color.  To keep the histogram to a reasonable
- * size, we reduce the precision of the input; typical practice is to retain
- * 5 or 6 bits per color, so that 8 or 4 different input values are counted
- * in the same histogram cell.
- *
- * Next, the color-selection step begins with a box representing the whole
- * color space, and repeatedly splits the "largest" remaining box until we
- * have as many boxes as desired colors.  Then the mean color in each
- * remaining box becomes one of the possible output colors.
- * 
- * The second pass over the image maps each input pixel to the closest output
- * color (optionally after applying a Floyd-Steinberg dithering correction).
- * This mapping is logically trivial, but making it go fast enough requires
- * considerable care.
- *
- * Heckbert-style quantizers vary a good deal in their policies for choosing
- * the "largest" box and deciding where to cut it.  The particular policies
- * used here have proved out well in experimental comparisons, but better ones
- * may yet be found.
- *
- * In earlier versions of the IJG code, this module quantized in YCbCr color
- * space, processing the raw upsampled data without a color conversion step.
- * This allowed the color conversion math to be done only once per colormap
- * entry, not once per pixel.  However, that optimization precluded other
- * useful optimizations (such as merging color conversion with upsampling)
- * and it also interfered with desired capabilities such as quantizing to an
- * externally-supplied colormap.  We have therefore abandoned that approach.
- * The present code works in the post-conversion color space, typically RGB.
- *
- * To improve the visual quality of the results, we actually work in scaled
- * RGB space, giving G distances more weight than R, and R in turn more than
- * B.  To do everything in integer math, we must use integer scale factors.
- * The 2/3/1 scale factors used here correspond loosely to the relative
- * weights of the colors in the NTSC grayscale equation.
- * If you want to use this code to quantize a non-RGB color space, you'll
- * probably need to change these scale factors.
- */
-
-#define R_SCALE 2		/* scale R distances by this much */
-#define G_SCALE 3		/* scale G distances by this much */
-#define B_SCALE 1		/* and B by this much */
-
-/* Relabel R/G/B as components 0/1/2, respecting the RGB ordering defined
- * in jmorecfg.h.  As the code stands, it will do the right thing for R,G,B
- * and B,G,R orders.  If you define some other weird order in jmorecfg.h,
- * you'll get compile errors until you extend this logic.  In that case
- * you'll probably want to tweak the histogram sizes too.
- */
-
-#if RGB_RED == 0
-#define C0_SCALE R_SCALE
-#endif
-#if RGB_BLUE == 0
-#define C0_SCALE B_SCALE
-#endif
-#if RGB_GREEN == 1
-#define C1_SCALE G_SCALE
-#endif
-#if RGB_RED == 2
-#define C2_SCALE R_SCALE
-#endif
-#if RGB_BLUE == 2
-#define C2_SCALE B_SCALE
-#endif
-
-
-/*
- * First we have the histogram data structure and routines for creating it.
- *
- * The number of bits of precision can be adjusted by changing these symbols.
- * We recommend keeping 6 bits for G and 5 each for R and B.
- * If you have plenty of memory and cycles, 6 bits all around gives marginally
- * better results; if you are short of memory, 5 bits all around will save
- * some space but degrade the results.
- * To maintain a fully accurate histogram, we'd need to allocate a "long"
- * (preferably unsigned long) for each cell.  In practice this is overkill;
- * we can get by with 16 bits per cell.  Few of the cell counts will overflow,
- * and clamping those that do overflow to the maximum value will give close-
- * enough results.  This reduces the recommended histogram size from 256Kb
- * to 128Kb, which is a useful savings on PC-class machines.
- * (In the second pass the histogram space is re-used for pixel mapping data;
- * in that capacity, each cell must be able to store zero to the number of
- * desired colors.  16 bits/cell is plenty for that too.)
- * Since the JPEG code is intended to run in small memory model on 80x86
- * machines, we can't just allocate the histogram in one chunk.  Instead
- * of a true 3-D array, we use a row of pointers to 2-D arrays.  Each
- * pointer corresponds to a C0 value (typically 2^5 = 32 pointers) and
- * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries.  Note that
- * on 80x86 machines, the pointer row is in near memory but the actual
- * arrays are in far memory (same arrangement as we use for image arrays).
- */
-
-#define MAXNUMCOLORS  (MAXJSAMPLE+1) /* maximum size of colormap */
-
-/* These will do the right thing for either R,G,B or B,G,R color order,
- * but you may not like the results for other color orders.
- */
-#define HIST_C0_BITS  5		/* bits of precision in R/B histogram */
-#define HIST_C1_BITS  6		/* bits of precision in G histogram */
-#define HIST_C2_BITS  5		/* bits of precision in B/R histogram */
-
-/* Number of elements along histogram axes. */
-#define HIST_C0_ELEMS  (1<<HIST_C0_BITS)
-#define HIST_C1_ELEMS  (1<<HIST_C1_BITS)
-#define HIST_C2_ELEMS  (1<<HIST_C2_BITS)
-
-/* These are the amounts to shift an input value to get a histogram index. */
-#define C0_SHIFT  (BITS_IN_JSAMPLE-HIST_C0_BITS)
-#define C1_SHIFT  (BITS_IN_JSAMPLE-HIST_C1_BITS)
-#define C2_SHIFT  (BITS_IN_JSAMPLE-HIST_C2_BITS)
-
-
-typedef UINT16 histcell;	/* histogram cell; prefer an unsigned type */
-
-typedef histcell FAR * histptr;	/* for pointers to histogram cells */
-
-typedef histcell hist1d[HIST_C2_ELEMS]; /* typedefs for the array */
-typedef hist1d FAR * hist2d;	/* type for the 2nd-level pointers */
-typedef hist2d * hist3d;	/* type for top-level pointer */
-
-
-/* Declarations for Floyd-Steinberg dithering.
- *
- * Errors are accumulated into the array fserrors[], at a resolution of
- * 1/16th of a pixel count.  The error at a given pixel is propagated
- * to its not-yet-processed neighbors using the standard F-S fractions,
- *		...	(here)	7/16
- *		3/16	5/16	1/16
- * We work left-to-right on even rows, right-to-left on odd rows.
- *
- * We can get away with a single array (holding one row's worth of errors)
- * by using it to store the current row's errors at pixel columns not yet
- * processed, but the next row's errors at columns already processed.  We
- * need only a few extra variables to hold the errors immediately around the
- * current column.  (If we are lucky, those variables are in registers, but
- * even if not, they're probably cheaper to access than array elements are.)
- *
- * The fserrors[] array has (#columns + 2) entries; the extra entry at
- * each end saves us from special-casing the first and last pixels.
- * Each entry is three values long, one value for each color component.
- *
- * Note: on a wide image, we might not have enough room in a PC's near data
- * segment to hold the error array; so it is allocated with alloc_large.
- */
-
-#if BITS_IN_JSAMPLE == 8
-typedef INT16 FSERROR;		/* 16 bits should be enough */
-typedef int LOCFSERROR;		/* use 'int' for calculation temps */
-#else
-typedef INT32 FSERROR;		/* may need more than 16 bits */
-typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
-#endif
-
-typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
-
-
-/* Private subobject */
-
-typedef struct {
-  struct jpeg_color_quantizer pub; /* public fields */
-
-  /* Space for the eventually created colormap is stashed here */
-  JSAMPARRAY sv_colormap;	/* colormap allocated at init time */
-  int desired;			/* desired # of colors = size of colormap */
-
-  /* Variables for accumulating image statistics */
-  hist3d histogram;		/* pointer to the histogram */
-
-  boolean needs_zeroed;		/* TRUE if next pass must zero histogram */
-
-  /* Variables for Floyd-Steinberg dithering */
-  FSERRPTR fserrors;		/* accumulated errors */
-  boolean on_odd_row;		/* flag to remember which row we are on */
-  int * error_limiter;		/* table for clamping the applied error */
-} my_cquantizer;
-
-typedef my_cquantizer * my_cquantize_ptr;
-
-
-/*
- * Prescan some rows of pixels.
- * In this module the prescan simply updates the histogram, which has been
- * initialized to zeroes by start_pass.
- * An output_buf parameter is required by the method signature, but no data
- * is actually output (in fact the buffer controller is probably passing a
- * NULL pointer).
- */
-
-METHODDEF(void)
-prescan_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		  JSAMPARRAY output_buf, int num_rows)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  register JSAMPROW ptr;
-  register histptr histp;
-  register hist3d histogram = cquantize->histogram;
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-
-  for (row = 0; row < num_rows; row++) {
-    ptr = input_buf[row];
-    for (col = width; col > 0; col--) {
-      /* get pixel value and index into the histogram */
-      histp = & histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT]
-			 [GETJSAMPLE(ptr[1]) >> C1_SHIFT]
-			 [GETJSAMPLE(ptr[2]) >> C2_SHIFT];
-      /* increment, check for overflow and undo increment if so. */
-      if (++(*histp) <= 0)
-	(*histp)--;
-      ptr += 3;
-    }
-  }
-}
-
-
-/*
- * Next we have the really interesting routines: selection of a colormap
- * given the completed histogram.
- * These routines work with a list of "boxes", each representing a rectangular
- * subset of the input color space (to histogram precision).
- */
-
-typedef struct {
-  /* The bounds of the box (inclusive); expressed as histogram indexes */
-  int c0min, c0max;
-  int c1min, c1max;
-  int c2min, c2max;
-  /* The volume (actually 2-norm) of the box */
-  INT32 volume;
-  /* The number of nonzero histogram cells within this box */
-  long colorcount;
-} box;
-
-typedef box * boxptr;
-
-
-LOCAL(boxptr)
-find_biggest_color_pop (boxptr boxlist, int numboxes)
-/* Find the splittable box with the largest color population */
-/* Returns NULL if no splittable boxes remain */
-{
-  register boxptr boxp;
-  register int i;
-  register long maxc = 0;
-  boxptr which = NULL;
-  
-  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
-    if (boxp->colorcount > maxc && boxp->volume > 0) {
-      which = boxp;
-      maxc = boxp->colorcount;
-    }
-  }
-  return which;
-}
-
-
-LOCAL(boxptr)
-find_biggest_volume (boxptr boxlist, int numboxes)
-/* Find the splittable box with the largest (scaled) volume */
-/* Returns NULL if no splittable boxes remain */
-{
-  register boxptr boxp;
-  register int i;
-  register INT32 maxv = 0;
-  boxptr which = NULL;
-  
-  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
-    if (boxp->volume > maxv) {
-      which = boxp;
-      maxv = boxp->volume;
-    }
-  }
-  return which;
-}
-
-
-LOCAL(void)
-update_box (j_decompress_ptr cinfo, boxptr boxp)
-/* Shrink the min/max bounds of a box to enclose only nonzero elements, */
-/* and recompute its volume and population */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  histptr histp;
-  int c0,c1,c2;
-  int c0min,c0max,c1min,c1max,c2min,c2max;
-  INT32 dist0,dist1,dist2;
-  long ccount;
-  
-  c0min = boxp->c0min;  c0max = boxp->c0max;
-  c1min = boxp->c1min;  c1max = boxp->c1max;
-  c2min = boxp->c2min;  c2max = boxp->c2max;
-  
-  if (c0max > c0min)
-    for (c0 = c0min; c0 <= c0max; c0++)
-      for (c1 = c1min; c1 <= c1max; c1++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c0min = c0min = c0;
-	    goto have_c0min;
-	  }
-      }
- have_c0min:
-  if (c0max > c0min)
-    for (c0 = c0max; c0 >= c0min; c0--)
-      for (c1 = c1min; c1 <= c1max; c1++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c0max = c0max = c0;
-	    goto have_c0max;
-	  }
-      }
- have_c0max:
-  if (c1max > c1min)
-    for (c1 = c1min; c1 <= c1max; c1++)
-      for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c1min = c1min = c1;
-	    goto have_c1min;
-	  }
-      }
- have_c1min:
-  if (c1max > c1min)
-    for (c1 = c1max; c1 >= c1min; c1--)
-      for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c1max = c1max = c1;
-	    goto have_c1max;
-	  }
-      }
- have_c1max:
-  if (c2max > c2min)
-    for (c2 = c2min; c2 <= c2max; c2++)
-      for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1min][c2];
-	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
-	  if (*histp != 0) {
-	    boxp->c2min = c2min = c2;
-	    goto have_c2min;
-	  }
-      }
- have_c2min:
-  if (c2max > c2min)
-    for (c2 = c2max; c2 >= c2min; c2--)
-      for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1min][c2];
-	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
-	  if (*histp != 0) {
-	    boxp->c2max = c2max = c2;
-	    goto have_c2max;
-	  }
-      }
- have_c2max:
-
-  /* Update box volume.
-   * We use 2-norm rather than real volume here; this biases the method
-   * against making long narrow boxes, and it has the side benefit that
-   * a box is splittable iff norm > 0.
-   * Since the differences are expressed in histogram-cell units,
-   * we have to shift back to JSAMPLE units to get consistent distances;
-   * after which, we scale according to the selected distance scale factors.
-   */
-  dist0 = ((c0max - c0min) << C0_SHIFT) * C0_SCALE;
-  dist1 = ((c1max - c1min) << C1_SHIFT) * C1_SCALE;
-  dist2 = ((c2max - c2min) << C2_SHIFT) * C2_SCALE;
-  boxp->volume = dist0*dist0 + dist1*dist1 + dist2*dist2;
-  
-  /* Now scan remaining volume of box and compute population */
-  ccount = 0;
-  for (c0 = c0min; c0 <= c0max; c0++)
-    for (c1 = c1min; c1 <= c1max; c1++) {
-      histp = & histogram[c0][c1][c2min];
-      for (c2 = c2min; c2 <= c2max; c2++, histp++)
-	if (*histp != 0) {
-	  ccount++;
-	}
-    }
-  boxp->colorcount = ccount;
-}
-
-
-LOCAL(int)
-median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes,
-	    int desired_colors)
-/* Repeatedly select and split the largest box until we have enough boxes */
-{
-  int n,lb;
-  int c0,c1,c2,cmax;
-  register boxptr b1,b2;
-
-  while (numboxes < desired_colors) {
-    /* Select box to split.
-     * Current algorithm: by population for first half, then by volume.
-     */
-    if (numboxes*2 <= desired_colors) {
-      b1 = find_biggest_color_pop(boxlist, numboxes);
-    } else {
-      b1 = find_biggest_volume(boxlist, numboxes);
-    }
-    if (b1 == NULL)		/* no splittable boxes left! */
-      break;
-    b2 = &boxlist[numboxes];	/* where new box will go */
-    /* Copy the color bounds to the new box. */
-    b2->c0max = b1->c0max; b2->c1max = b1->c1max; b2->c2max = b1->c2max;
-    b2->c0min = b1->c0min; b2->c1min = b1->c1min; b2->c2min = b1->c2min;
-    /* Choose which axis to split the box on.
-     * Current algorithm: longest scaled axis.
-     * See notes in update_box about scaling distances.
-     */
-    c0 = ((b1->c0max - b1->c0min) << C0_SHIFT) * C0_SCALE;
-    c1 = ((b1->c1max - b1->c1min) << C1_SHIFT) * C1_SCALE;
-    c2 = ((b1->c2max - b1->c2min) << C2_SHIFT) * C2_SCALE;
-    /* We want to break any ties in favor of green, then red, blue last.
-     * This code does the right thing for R,G,B or B,G,R color orders only.
-     */
-#if RGB_RED == 0
-    cmax = c1; n = 1;
-    if (c0 > cmax) { cmax = c0; n = 0; }
-    if (c2 > cmax) { n = 2; }
-#else
-    cmax = c1; n = 1;
-    if (c2 > cmax) { cmax = c2; n = 2; }
-    if (c0 > cmax) { n = 0; }
-#endif
-    /* Choose split point along selected axis, and update box bounds.
-     * Current algorithm: split at halfway point.
-     * (Since the box has been shrunk to minimum volume,
-     * any split will produce two nonempty subboxes.)
-     * Note that lb value is max for lower box, so must be < old max.
-     */
-    switch (n) {
-    case 0:
-      lb = (b1->c0max + b1->c0min) / 2;
-      b1->c0max = lb;
-      b2->c0min = lb+1;
-      break;
-    case 1:
-      lb = (b1->c1max + b1->c1min) / 2;
-      b1->c1max = lb;
-      b2->c1min = lb+1;
-      break;
-    case 2:
-      lb = (b1->c2max + b1->c2min) / 2;
-      b1->c2max = lb;
-      b2->c2min = lb+1;
-      break;
-    }
-    /* Update stats for boxes */
-    update_box(cinfo, b1);
-    update_box(cinfo, b2);
-    numboxes++;
-  }
-  return numboxes;
-}
-
-
-LOCAL(void)
-compute_color (j_decompress_ptr cinfo, boxptr boxp, int icolor)
-/* Compute representative color for a box, put it in colormap[icolor] */
-{
-  /* Current algorithm: mean weighted by pixels (not colors) */
-  /* Note it is important to get the rounding correct! */
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  histptr histp;
-  int c0,c1,c2;
-  int c0min,c0max,c1min,c1max,c2min,c2max;
-  long count;
-  long total = 0;
-  long c0total = 0;
-  long c1total = 0;
-  long c2total = 0;
-  
-  c0min = boxp->c0min;  c0max = boxp->c0max;
-  c1min = boxp->c1min;  c1max = boxp->c1max;
-  c2min = boxp->c2min;  c2max = boxp->c2max;
-  
-  for (c0 = c0min; c0 <= c0max; c0++)
-    for (c1 = c1min; c1 <= c1max; c1++) {
-      histp = & histogram[c0][c1][c2min];
-      for (c2 = c2min; c2 <= c2max; c2++) {
-	if ((count = *histp++) != 0) {
-	  total += count;
-	  c0total += ((c0 << C0_SHIFT) + ((1<<C0_SHIFT)>>1)) * count;
-	  c1total += ((c1 << C1_SHIFT) + ((1<<C1_SHIFT)>>1)) * count;
-	  c2total += ((c2 << C2_SHIFT) + ((1<<C2_SHIFT)>>1)) * count;
-	}
-      }
-    }
-  
-  cinfo->colormap[0][icolor] = (JSAMPLE) ((c0total + (total>>1)) / total);
-  cinfo->colormap[1][icolor] = (JSAMPLE) ((c1total + (total>>1)) / total);
-  cinfo->colormap[2][icolor] = (JSAMPLE) ((c2total + (total>>1)) / total);
-}
-
-
-LOCAL(void)
-select_colors (j_decompress_ptr cinfo, int desired_colors)
-/* Master routine for color selection */
-{
-  boxptr boxlist;
-  int numboxes;
-  int i;
-
-  /* Allocate workspace for box list */
-  boxlist = (boxptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, desired_colors * SIZEOF(box));
-  /* Initialize one box containing whole space */
-  numboxes = 1;
-  boxlist[0].c0min = 0;
-  boxlist[0].c0max = MAXJSAMPLE >> C0_SHIFT;
-  boxlist[0].c1min = 0;
-  boxlist[0].c1max = MAXJSAMPLE >> C1_SHIFT;
-  boxlist[0].c2min = 0;
-  boxlist[0].c2max = MAXJSAMPLE >> C2_SHIFT;
-  /* Shrink it to actually-used volume and set its statistics */
-  update_box(cinfo, & boxlist[0]);
-  /* Perform median-cut to produce final box list */
-  numboxes = median_cut(cinfo, boxlist, numboxes, desired_colors);
-  /* Compute the representative color for each box, fill colormap */
-  for (i = 0; i < numboxes; i++)
-    compute_color(cinfo, & boxlist[i], i);
-  cinfo->actual_number_of_colors = numboxes;
-  TRACEMS1(cinfo, 1, JTRC_QUANT_SELECTED, numboxes);
-}
-
-
-/*
- * These routines are concerned with the time-critical task of mapping input
- * colors to the nearest color in the selected colormap.
- *
- * We re-use the histogram space as an "inverse color map", essentially a
- * cache for the results of nearest-color searches.  All colors within a
- * histogram cell will be mapped to the same colormap entry, namely the one
- * closest to the cell's center.  This may not be quite the closest entry to
- * the actual input color, but it's almost as good.  A zero in the cache
- * indicates we haven't found the nearest color for that cell yet; the array
- * is cleared to zeroes before starting the mapping pass.  When we find the
- * nearest color for a cell, its colormap index plus one is recorded in the
- * cache for future use.  The pass2 scanning routines call fill_inverse_cmap
- * when they need to use an unfilled entry in the cache.
- *
- * Our method of efficiently finding nearest colors is based on the "locally
- * sorted search" idea described by Heckbert and on the incremental distance
- * calculation described by Spencer W. Thomas in chapter III.1 of Graphics
- * Gems II (James Arvo, ed.  Academic Press, 1991).  Thomas points out that
- * the distances from a given colormap entry to each cell of the histogram can
- * be computed quickly using an incremental method: the differences between
- * distances to adjacent cells themselves differ by a constant.  This allows a
- * fairly fast implementation of the "brute force" approach of computing the
- * distance from every colormap entry to every histogram cell.  Unfortunately,
- * it needs a work array to hold the best-distance-so-far for each histogram
- * cell (because the inner loop has to be over cells, not colormap entries).
- * The work array elements have to be INT32s, so the work array would need
- * 256Kb at our recommended precision.  This is not feasible in DOS machines.
- *
- * To get around these problems, we apply Thomas' method to compute the
- * nearest colors for only the cells within a small subbox of the histogram.
- * The work array need be only as big as the subbox, so the memory usage
- * problem is solved.  Furthermore, we need not fill subboxes that are never
- * referenced in pass2; many images use only part of the color gamut, so a
- * fair amount of work is saved.  An additional advantage of this
- * approach is that we can apply Heckbert's locality criterion to quickly
- * eliminate colormap entries that are far away from the subbox; typically
- * three-fourths of the colormap entries are rejected by Heckbert's criterion,
- * and we need not compute their distances to individual cells in the subbox.
- * The speed of this approach is heavily influenced by the subbox size: too
- * small means too much overhead, too big loses because Heckbert's criterion
- * can't eliminate as many colormap entries.  Empirically the best subbox
- * size seems to be about 1/512th of the histogram (1/8th in each direction).
- *
- * Thomas' article also describes a refined method which is asymptotically
- * faster than the brute-force method, but it is also far more complex and
- * cannot efficiently be applied to small subboxes.  It is therefore not
- * useful for programs intended to be portable to DOS machines.  On machines
- * with plenty of memory, filling the whole histogram in one shot with Thomas'
- * refined method might be faster than the present code --- but then again,
- * it might not be any faster, and it's certainly more complicated.
- */
-
-
-/* log2(histogram cells in update box) for each axis; this can be adjusted */
-#define BOX_C0_LOG  (HIST_C0_BITS-3)
-#define BOX_C1_LOG  (HIST_C1_BITS-3)
-#define BOX_C2_LOG  (HIST_C2_BITS-3)
-
-#define BOX_C0_ELEMS  (1<<BOX_C0_LOG) /* # of hist cells in update box */
-#define BOX_C1_ELEMS  (1<<BOX_C1_LOG)
-#define BOX_C2_ELEMS  (1<<BOX_C2_LOG)
-
-#define BOX_C0_SHIFT  (C0_SHIFT + BOX_C0_LOG)
-#define BOX_C1_SHIFT  (C1_SHIFT + BOX_C1_LOG)
-#define BOX_C2_SHIFT  (C2_SHIFT + BOX_C2_LOG)
-
-
-/*
- * The next three routines implement inverse colormap filling.  They could
- * all be folded into one big routine, but splitting them up this way saves
- * some stack space (the mindist[] and bestdist[] arrays need not coexist)
- * and may allow some compilers to produce better code by registerizing more
- * inner-loop variables.
- */
-
-LOCAL(int)
-find_nearby_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
-		    JSAMPLE colorlist[])
-/* Locate the colormap entries close enough to an update box to be candidates
- * for the nearest entry to some cell(s) in the update box.  The update box
- * is specified by the center coordinates of its first cell.  The number of
- * candidate colormap entries is returned, and their colormap indexes are
- * placed in colorlist[].
- * This routine uses Heckbert's "locally sorted search" criterion to select
- * the colors that need further consideration.
- */
-{
-  int numcolors = cinfo->actual_number_of_colors;
-  int maxc0, maxc1, maxc2;
-  int centerc0, centerc1, centerc2;
-  int i, x, ncolors;
-  INT32 minmaxdist, min_dist, max_dist, tdist;
-  INT32 mindist[MAXNUMCOLORS];	/* min distance to colormap entry i */
-
-  /* Compute true coordinates of update box's upper corner and center.
-   * Actually we compute the coordinates of the center of the upper-corner
-   * histogram cell, which are the upper bounds of the volume we care about.
-   * Note that since ">>" rounds down, the "center" values may be closer to
-   * min than to max; hence comparisons to them must be "<=", not "<".
-   */
-  maxc0 = minc0 + ((1 << BOX_C0_SHIFT) - (1 << C0_SHIFT));
-  centerc0 = (minc0 + maxc0) >> 1;
-  maxc1 = minc1 + ((1 << BOX_C1_SHIFT) - (1 << C1_SHIFT));
-  centerc1 = (minc1 + maxc1) >> 1;
-  maxc2 = minc2 + ((1 << BOX_C2_SHIFT) - (1 << C2_SHIFT));
-  centerc2 = (minc2 + maxc2) >> 1;
-
-  /* For each color in colormap, find:
-   *  1. its minimum squared-distance to any point in the update box
-   *     (zero if color is within update box);
-   *  2. its maximum squared-distance to any point in the update box.
-   * Both of these can be found by considering only the corners of the box.
-   * We save the minimum distance for each color in mindist[];
-   * only the smallest maximum distance is of interest.
-   */
-  minmaxdist = 0x7FFFFFFFL;
-
-  for (i = 0; i < numcolors; i++) {
-    /* We compute the squared-c0-distance term, then add in the other two. */
-    x = GETJSAMPLE(cinfo->colormap[0][i]);
-    if (x < minc0) {
-      tdist = (x - minc0) * C0_SCALE;
-      min_dist = tdist*tdist;
-      tdist = (x - maxc0) * C0_SCALE;
-      max_dist = tdist*tdist;
-    } else if (x > maxc0) {
-      tdist = (x - maxc0) * C0_SCALE;
-      min_dist = tdist*tdist;
-      tdist = (x - minc0) * C0_SCALE;
-      max_dist = tdist*tdist;
-    } else {
-      /* within cell range so no contribution to min_dist */
-      min_dist = 0;
-      if (x <= centerc0) {
-	tdist = (x - maxc0) * C0_SCALE;
-	max_dist = tdist*tdist;
-      } else {
-	tdist = (x - minc0) * C0_SCALE;
-	max_dist = tdist*tdist;
-      }
-    }
-
-    x = GETJSAMPLE(cinfo->colormap[1][i]);
-    if (x < minc1) {
-      tdist = (x - minc1) * C1_SCALE;
-      min_dist += tdist*tdist;
-      tdist = (x - maxc1) * C1_SCALE;
-      max_dist += tdist*tdist;
-    } else if (x > maxc1) {
-      tdist = (x - maxc1) * C1_SCALE;
-      min_dist += tdist*tdist;
-      tdist = (x - minc1) * C1_SCALE;
-      max_dist += tdist*tdist;
-    } else {
-      /* within cell range so no contribution to min_dist */
-      if (x <= centerc1) {
-	tdist = (x - maxc1) * C1_SCALE;
-	max_dist += tdist*tdist;
-      } else {
-	tdist = (x - minc1) * C1_SCALE;
-	max_dist += tdist*tdist;
-      }
-    }
-
-    x = GETJSAMPLE(cinfo->colormap[2][i]);
-    if (x < minc2) {
-      tdist = (x - minc2) * C2_SCALE;
-      min_dist += tdist*tdist;
-      tdist = (x - maxc2) * C2_SCALE;
-      max_dist += tdist*tdist;
-    } else if (x > maxc2) {
-      tdist = (x - maxc2) * C2_SCALE;
-      min_dist += tdist*tdist;
-      tdist = (x - minc2) * C2_SCALE;
-      max_dist += tdist*tdist;
-    } else {
-      /* within cell range so no contribution to min_dist */
-      if (x <= centerc2) {
-	tdist = (x - maxc2) * C2_SCALE;
-	max_dist += tdist*tdist;
-      } else {
-	tdist = (x - minc2) * C2_SCALE;
-	max_dist += tdist*tdist;
-      }
-    }
-
-    mindist[i] = min_dist;	/* save away the results */
-    if (max_dist < minmaxdist)
-      minmaxdist = max_dist;
-  }
-
-  /* Now we know that no cell in the update box is more than minmaxdist
-   * away from some colormap entry.  Therefore, only colors that are
-   * within minmaxdist of some part of the box need be considered.
-   */
-  ncolors = 0;
-  for (i = 0; i < numcolors; i++) {
-    if (mindist[i] <= minmaxdist)
-      colorlist[ncolors++] = (JSAMPLE) i;
-  }
-  return ncolors;
-}
-
-
-LOCAL(void)
-find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
-		  int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[])
-/* Find the closest colormap entry for each cell in the update box,
- * given the list of candidate colors prepared by find_nearby_colors.
- * Return the indexes of the closest entries in the bestcolor[] array.
- * This routine uses Thomas' incremental distance calculation method to
- * find the distance from a colormap entry to successive cells in the box.
- */
-{
-  int ic0, ic1, ic2;
-  int i, icolor;
-  register INT32 * bptr;	/* pointer into bestdist[] array */
-  JSAMPLE * cptr;		/* pointer into bestcolor[] array */
-  INT32 dist0, dist1;		/* initial distance values */
-  register INT32 dist2;		/* current distance in inner loop */
-  INT32 xx0, xx1;		/* distance increments */
-  register INT32 xx2;
-  INT32 inc0, inc1, inc2;	/* initial values for increments */
-  /* This array holds the distance to the nearest-so-far color for each cell */
-  INT32 bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
-
-  /* Initialize best-distance for each cell of the update box */
-  bptr = bestdist;
-  for (i = BOX_C0_ELEMS*BOX_C1_ELEMS*BOX_C2_ELEMS-1; i >= 0; i--)
-    *bptr++ = 0x7FFFFFFFL;
-  
-  /* For each color selected by find_nearby_colors,
-   * compute its distance to the center of each cell in the box.
-   * If that's less than best-so-far, update best distance and color number.
-   */
-  
-  /* Nominal steps between cell centers ("x" in Thomas article) */
-#define STEP_C0  ((1 << C0_SHIFT) * C0_SCALE)
-#define STEP_C1  ((1 << C1_SHIFT) * C1_SCALE)
-#define STEP_C2  ((1 << C2_SHIFT) * C2_SCALE)
-  
-  for (i = 0; i < numcolors; i++) {
-    icolor = GETJSAMPLE(colorlist[i]);
-    /* Compute (square of) distance from minc0/c1/c2 to this color */
-    inc0 = (minc0 - GETJSAMPLE(cinfo->colormap[0][icolor])) * C0_SCALE;
-    dist0 = inc0*inc0;
-    inc1 = (minc1 - GETJSAMPLE(cinfo->colormap[1][icolor])) * C1_SCALE;
-    dist0 += inc1*inc1;
-    inc2 = (minc2 - GETJSAMPLE(cinfo->colormap[2][icolor])) * C2_SCALE;
-    dist0 += inc2*inc2;
-    /* Form the initial difference increments */
-    inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
-    inc1 = inc1 * (2 * STEP_C1) + STEP_C1 * STEP_C1;
-    inc2 = inc2 * (2 * STEP_C2) + STEP_C2 * STEP_C2;
-    /* Now loop over all cells in box, updating distance per Thomas method */
-    bptr = bestdist;
-    cptr = bestcolor;
-    xx0 = inc0;
-    for (ic0 = BOX_C0_ELEMS-1; ic0 >= 0; ic0--) {
-      dist1 = dist0;
-      xx1 = inc1;
-      for (ic1 = BOX_C1_ELEMS-1; ic1 >= 0; ic1--) {
-	dist2 = dist1;
-	xx2 = inc2;
-	for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) {
-	  if (dist2 < *bptr) {
-	    *bptr = dist2;
-	    *cptr = (JSAMPLE) icolor;
-	  }
-	  dist2 += xx2;
-	  xx2 += 2 * STEP_C2 * STEP_C2;
-	  bptr++;
-	  cptr++;
-	}
-	dist1 += xx1;
-	xx1 += 2 * STEP_C1 * STEP_C1;
-      }
-      dist0 += xx0;
-      xx0 += 2 * STEP_C0 * STEP_C0;
-    }
-  }
-}
-
-
-LOCAL(void)
-fill_inverse_cmap (j_decompress_ptr cinfo, int c0, int c1, int c2)
-/* Fill the inverse-colormap entries in the update box that contains */
-/* histogram cell c0/c1/c2.  (Only that one cell MUST be filled, but */
-/* we can fill as many others as we wish.) */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  int minc0, minc1, minc2;	/* lower left corner of update box */
-  int ic0, ic1, ic2;
-  register JSAMPLE * cptr;	/* pointer into bestcolor[] array */
-  register histptr cachep;	/* pointer into main cache array */
-  /* This array lists the candidate colormap indexes. */
-  JSAMPLE colorlist[MAXNUMCOLORS];
-  int numcolors;		/* number of candidate colors */
-  /* This array holds the actually closest colormap index for each cell. */
-  JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
-
-  /* Convert cell coordinates to update box ID */
-  c0 >>= BOX_C0_LOG;
-  c1 >>= BOX_C1_LOG;
-  c2 >>= BOX_C2_LOG;
-
-  /* Compute true coordinates of update box's origin corner.
-   * Actually we compute the coordinates of the center of the corner
-   * histogram cell, which are the lower bounds of the volume we care about.
-   */
-  minc0 = (c0 << BOX_C0_SHIFT) + ((1 << C0_SHIFT) >> 1);
-  minc1 = (c1 << BOX_C1_SHIFT) + ((1 << C1_SHIFT) >> 1);
-  minc2 = (c2 << BOX_C2_SHIFT) + ((1 << C2_SHIFT) >> 1);
-  
-  /* Determine which colormap entries are close enough to be candidates
-   * for the nearest entry to some cell in the update box.
-   */
-  numcolors = find_nearby_colors(cinfo, minc0, minc1, minc2, colorlist);
-
-  /* Determine the actually nearest colors. */
-  find_best_colors(cinfo, minc0, minc1, minc2, numcolors, colorlist,
-		   bestcolor);
-
-  /* Save the best color numbers (plus 1) in the main cache array */
-  c0 <<= BOX_C0_LOG;		/* convert ID back to base cell indexes */
-  c1 <<= BOX_C1_LOG;
-  c2 <<= BOX_C2_LOG;
-  cptr = bestcolor;
-  for (ic0 = 0; ic0 < BOX_C0_ELEMS; ic0++) {
-    for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
-      cachep = & histogram[c0+ic0][c1+ic1][c2];
-      for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
-	*cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1);
-      }
-    }
-  }
-}
-
-
-/*
- * Map some rows of pixels to the output colormapped representation.
- */
-
-METHODDEF(void)
-pass2_no_dither (j_decompress_ptr cinfo,
-		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
-/* This version performs no dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  register JSAMPROW inptr, outptr;
-  register histptr cachep;
-  register int c0, c1, c2;
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-
-  for (row = 0; row < num_rows; row++) {
-    inptr = input_buf[row];
-    outptr = output_buf[row];
-    for (col = width; col > 0; col--) {
-      /* get pixel value and index into the cache */
-      c0 = GETJSAMPLE(*inptr++) >> C0_SHIFT;
-      c1 = GETJSAMPLE(*inptr++) >> C1_SHIFT;
-      c2 = GETJSAMPLE(*inptr++) >> C2_SHIFT;
-      cachep = & histogram[c0][c1][c2];
-      /* If we have not seen this color before, find nearest colormap entry */
-      /* and update the cache */
-      if (*cachep == 0)
-	fill_inverse_cmap(cinfo, c0,c1,c2);
-      /* Now emit the colormap index for this cell */
-      *outptr++ = (JSAMPLE) (*cachep - 1);
-    }
-  }
-}
-
-
-METHODDEF(void)
-pass2_fs_dither (j_decompress_ptr cinfo,
-		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
-/* This version performs Floyd-Steinberg dithering */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  register LOCFSERROR cur0, cur1, cur2;	/* current error or pixel value */
-  LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */
-  LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */
-  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
-  JSAMPROW inptr;		/* => current input pixel */
-  JSAMPROW outptr;		/* => current output pixel */
-  histptr cachep;
-  int dir;			/* +1 or -1 depending on direction */
-  int dir3;			/* 3*dir, for advancing inptr & errorptr */
-  int row;
-  JDIMENSION col;
-  JDIMENSION width = cinfo->output_width;
-  JSAMPLE *range_limit = cinfo->sample_range_limit;
-  int *error_limit = cquantize->error_limiter;
-  JSAMPROW colormap0 = cinfo->colormap[0];
-  JSAMPROW colormap1 = cinfo->colormap[1];
-  JSAMPROW colormap2 = cinfo->colormap[2];
-  SHIFT_TEMPS
-
-  for (row = 0; row < num_rows; row++) {
-    inptr = input_buf[row];
-    outptr = output_buf[row];
-    if (cquantize->on_odd_row) {
-      /* work right to left in this row */
-      inptr += (width-1) * 3;	/* so point to rightmost pixel */
-      outptr += width-1;
-      dir = -1;
-      dir3 = -3;
-      errorptr = cquantize->fserrors + (width+1)*3; /* => entry after last column */
-      cquantize->on_odd_row = FALSE; /* flip for next time */
-    } else {
-      /* work left to right in this row */
-      dir = 1;
-      dir3 = 3;
-      errorptr = cquantize->fserrors; /* => entry before first real column */
-      cquantize->on_odd_row = TRUE; /* flip for next time */
-    }
-    /* Preset error values: no error propagated to first pixel from left */
-    cur0 = cur1 = cur2 = 0;
-    /* and no error propagated to row below yet */
-    belowerr0 = belowerr1 = belowerr2 = 0;
-    bpreverr0 = bpreverr1 = bpreverr2 = 0;
-
-    for (col = width; col > 0; col--) {
-      /* curN holds the error propagated from the previous pixel on the
-       * current line.  Add the error propagated from the previous line
-       * to form the complete error correction term for this pixel, and
-       * round the error term (which is expressed * 16) to an integer.
-       * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
-       * for either sign of the error value.
-       * Note: errorptr points to *previous* column's array entry.
-       */
-      cur0 = RIGHT_SHIFT(cur0 + errorptr[dir3+0] + 8, 4);
-      cur1 = RIGHT_SHIFT(cur1 + errorptr[dir3+1] + 8, 4);
-      cur2 = RIGHT_SHIFT(cur2 + errorptr[dir3+2] + 8, 4);
-      /* Limit the error using transfer function set by init_error_limit.
-       * See comments with init_error_limit for rationale.
-       */
-      cur0 = error_limit[cur0];
-      cur1 = error_limit[cur1];
-      cur2 = error_limit[cur2];
-      /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
-       * The maximum error is +- MAXJSAMPLE (or less with error limiting);
-       * this sets the required size of the range_limit array.
-       */
-      cur0 += GETJSAMPLE(inptr[0]);
-      cur1 += GETJSAMPLE(inptr[1]);
-      cur2 += GETJSAMPLE(inptr[2]);
-      cur0 = GETJSAMPLE(range_limit[cur0]);
-      cur1 = GETJSAMPLE(range_limit[cur1]);
-      cur2 = GETJSAMPLE(range_limit[cur2]);
-      /* Index into the cache with adjusted pixel value */
-      cachep = & histogram[cur0>>C0_SHIFT][cur1>>C1_SHIFT][cur2>>C2_SHIFT];
-      /* If we have not seen this color before, find nearest colormap */
-      /* entry and update the cache */
-      if (*cachep == 0)
-	fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT);
-      /* Now emit the colormap index for this cell */
-      { register int pixcode = *cachep - 1;
-	*outptr = (JSAMPLE) pixcode;
-	/* Compute representation error for this pixel */
-	cur0 -= GETJSAMPLE(colormap0[pixcode]);
-	cur1 -= GETJSAMPLE(colormap1[pixcode]);
-	cur2 -= GETJSAMPLE(colormap2[pixcode]);
-      }
-      /* Compute error fractions to be propagated to adjacent pixels.
-       * Add these into the running sums, and simultaneously shift the
-       * next-line error sums left by 1 column.
-       */
-      { register LOCFSERROR bnexterr, delta;
-
-	bnexterr = cur0;	/* Process component 0 */
-	delta = cur0 * 2;
-	cur0 += delta;		/* form error * 3 */
-	errorptr[0] = (FSERROR) (bpreverr0 + cur0);
-	cur0 += delta;		/* form error * 5 */
-	bpreverr0 = belowerr0 + cur0;
-	belowerr0 = bnexterr;
-	cur0 += delta;		/* form error * 7 */
-	bnexterr = cur1;	/* Process component 1 */
-	delta = cur1 * 2;
-	cur1 += delta;		/* form error * 3 */
-	errorptr[1] = (FSERROR) (bpreverr1 + cur1);
-	cur1 += delta;		/* form error * 5 */
-	bpreverr1 = belowerr1 + cur1;
-	belowerr1 = bnexterr;
-	cur1 += delta;		/* form error * 7 */
-	bnexterr = cur2;	/* Process component 2 */
-	delta = cur2 * 2;
-	cur2 += delta;		/* form error * 3 */
-	errorptr[2] = (FSERROR) (bpreverr2 + cur2);
-	cur2 += delta;		/* form error * 5 */
-	bpreverr2 = belowerr2 + cur2;
-	belowerr2 = bnexterr;
-	cur2 += delta;		/* form error * 7 */
-      }
-      /* At this point curN contains the 7/16 error value to be propagated
-       * to the next pixel on the current line, and all the errors for the
-       * next line have been shifted over.  We are therefore ready to move on.
-       */
-      inptr += dir3;		/* Advance pixel pointers to next column */
-      outptr += dir;
-      errorptr += dir3;		/* advance errorptr to current column */
-    }
-    /* Post-loop cleanup: we must unload the final error values into the
-     * final fserrors[] entry.  Note we need not unload belowerrN because
-     * it is for the dummy column before or after the actual array.
-     */
-    errorptr[0] = (FSERROR) bpreverr0; /* unload prev errs into array */
-    errorptr[1] = (FSERROR) bpreverr1;
-    errorptr[2] = (FSERROR) bpreverr2;
-  }
-}
-
-
-/*
- * Initialize the error-limiting transfer function (lookup table).
- * The raw F-S error computation can potentially compute error values of up to
- * +- MAXJSAMPLE.  But we want the maximum correction applied to a pixel to be
- * much less, otherwise obviously wrong pixels will be created.  (Typical
- * effects include weird fringes at color-area boundaries, isolated bright
- * pixels in a dark area, etc.)  The standard advice for avoiding this problem
- * is to ensure that the "corners" of the color cube are allocated as output
- * colors; then repeated errors in the same direction cannot cause cascading
- * error buildup.  However, that only prevents the error from getting
- * completely out of hand; Aaron Giles reports that error limiting improves
- * the results even with corner colors allocated.
- * A simple clamping of the error values to about +- MAXJSAMPLE/8 works pretty
- * well, but the smoother transfer function used below is even better.  Thanks
- * to Aaron Giles for this idea.
- */
-
-LOCAL(void)
-init_error_limit (j_decompress_ptr cinfo)
-/* Allocate and fill in the error_limiter table */
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  int * table;
-  int in, out;
-
-  table = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * SIZEOF(int));
-  table += MAXJSAMPLE;		/* so can index -MAXJSAMPLE .. +MAXJSAMPLE */
-  cquantize->error_limiter = table;
-
-#define STEPSIZE ((MAXJSAMPLE+1)/16)
-  /* Map errors 1:1 up to +- MAXJSAMPLE/16 */
-  out = 0;
-  for (in = 0; in < STEPSIZE; in++, out++) {
-    table[in] = out; table[-in] = -out;
-  }
-  /* Map errors 1:2 up to +- 3*MAXJSAMPLE/16 */
-  for (; in < STEPSIZE*3; in++, out += (in&1) ? 0 : 1) {
-    table[in] = out; table[-in] = -out;
-  }
-  /* Clamp the rest to final out value (which is (MAXJSAMPLE+1)/8) */
-  for (; in <= MAXJSAMPLE; in++) {
-    table[in] = out; table[-in] = -out;
-  }
-#undef STEPSIZE
-}
-
-
-/*
- * Finish up at the end of each pass.
- */
-
-METHODDEF(void)
-finish_pass1 (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-
-  /* Select the representative colors and fill in cinfo->colormap */
-  cinfo->colormap = cquantize->sv_colormap;
-  select_colors(cinfo, cquantize->desired);
-  /* Force next pass to zero the color index table */
-  cquantize->needs_zeroed = TRUE;
-}
-
-
-METHODDEF(void)
-finish_pass2 (j_decompress_ptr cinfo)
-{
-  /* no work */
-}
-
-
-/*
- * Initialize for each processing pass.
- */
-
-METHODDEF(void)
-start_pass_2_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  hist3d histogram = cquantize->histogram;
-  int i;
-
-  /* Only F-S dithering or no dithering is supported. */
-  /* If user asks for ordered dither, give him F-S. */
-  if (cinfo->dither_mode != JDITHER_NONE)
-    cinfo->dither_mode = JDITHER_FS;
-
-  if (is_pre_scan) {
-    /* Set up method pointers */
-    cquantize->pub.color_quantize = prescan_quantize;
-    cquantize->pub.finish_pass = finish_pass1;
-    cquantize->needs_zeroed = TRUE; /* Always zero histogram */
-  } else {
-    /* Set up method pointers */
-    if (cinfo->dither_mode == JDITHER_FS)
-      cquantize->pub.color_quantize = pass2_fs_dither;
-    else
-      cquantize->pub.color_quantize = pass2_no_dither;
-    cquantize->pub.finish_pass = finish_pass2;
-
-    /* Make sure color count is acceptable */
-    i = cinfo->actual_number_of_colors;
-    if (i < 1)
-      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 1);
-    if (i > MAXNUMCOLORS)
-      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
-
-    if (cinfo->dither_mode == JDITHER_FS) {
-      size_t arraysize = ((size_t) cinfo->output_width + (size_t) 2)
-	* (3 * SIZEOF(FSERROR));
-      /* Allocate Floyd-Steinberg workspace if we didn't already. */
-      if (cquantize->fserrors == NULL)
-	cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
-      /* Initialize the propagated errors to zero. */
-      FMEMZERO((void FAR *) cquantize->fserrors, arraysize);
-      /* Make the error-limit table if we didn't already. */
-      if (cquantize->error_limiter == NULL)
-	init_error_limit(cinfo);
-      cquantize->on_odd_row = FALSE;
-    }
-
-  }
-  /* Zero the histogram or inverse color map, if necessary */
-  if (cquantize->needs_zeroed) {
-    for (i = 0; i < HIST_C0_ELEMS; i++) {
-      FMEMZERO((void FAR *) histogram[i],
-	       HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
-    }
-    cquantize->needs_zeroed = FALSE;
-  }
-}
-
-
-/*
- * Switch to a new external colormap between output passes.
- */
-
-METHODDEF(void)
-new_color_map_2_quant (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-
-  /* Reset the inverse color map */
-  cquantize->needs_zeroed = TRUE;
-}
-
-
-/*
- * Module initialization routine for 2-pass color quantization.
- */
-
-GLOBAL(void)
-jinit_2pass_quantizer (j_decompress_ptr cinfo)
-{
-  my_cquantize_ptr cquantize;
-  int i;
-
-  cquantize = (my_cquantize_ptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_cquantizer));
-  cinfo->cquantize = &cquantize->pub;
-  cquantize->pub.start_pass = start_pass_2_quant;
-  cquantize->pub.new_color_map = new_color_map_2_quant;
-  cquantize->fserrors = NULL;	/* flag optional arrays not allocated */
-  cquantize->error_limiter = NULL;
-
-  /* Make sure jdmaster didn't give me a case I can't handle */
-  if (cinfo->out_color_components != 3)
-    ERREXIT(cinfo, JERR_NOTIMPL);
-
-  /* Allocate the histogram/inverse colormap storage */
-  cquantize->histogram = (hist3d) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * SIZEOF(hist2d));
-  for (i = 0; i < HIST_C0_ELEMS; i++) {
-    cquantize->histogram[i] = (hist2d) (*cinfo->mem->alloc_large)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
-  }
-  cquantize->needs_zeroed = TRUE; /* histogram is garbage now */
-
-  /* Allocate storage for the completed colormap, if required.
-   * We do this now since it is FAR storage and may affect
-   * the memory manager's space calculations.
-   */
-  if (cinfo->enable_2pass_quant) {
-    /* Make sure color count is acceptable */
-    int desired = cinfo->desired_number_of_colors;
-    /* Lower bound on # of colors ... somewhat arbitrary as long as > 0 */
-    if (desired < 8)
-      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 8);
-    /* Make sure colormap indexes can be represented by JSAMPLEs */
-    if (desired > MAXNUMCOLORS)
-      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
-    cquantize->sv_colormap = (*cinfo->mem->alloc_sarray)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       (JDIMENSION) desired, (JDIMENSION) 3);
-    cquantize->desired = desired;
-  } else
-    cquantize->sv_colormap = NULL;
-
-  /* Only F-S dithering or no dithering is supported. */
-  /* If user asks for ordered dither, give him F-S. */
-  if (cinfo->dither_mode != JDITHER_NONE)
-    cinfo->dither_mode = JDITHER_FS;
-
-  /* Allocate Floyd-Steinberg workspace if necessary.
-   * This isn't really needed until pass 2, but again it is FAR storage.
-   * Although we will cope with a later change in dither_mode,
-   * we do not promise to honor max_memory_to_use if dither_mode changes.
-   */
-  if (cinfo->dither_mode == JDITHER_FS) {
-    cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
-      ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       ((size_t) cinfo->output_width + (size_t) 2) * (3 * SIZEOF(FSERROR)));
-    /* Might as well create the error-limiting table too. */
-    init_error_limit(cinfo);
-  }
-}
-
-#endif /* QUANT_2PASS_SUPPORTED */
diff --git a/cmake/externals/libjpeg/jutils.c b/cmake/externals/libjpeg/jutils.c
deleted file mode 100644
index 31e16dfb5..000000000
--- a/cmake/externals/libjpeg/jutils.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * jutils.c
- *
- * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2009-2020 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains tables and miscellaneous utility routines needed
- * for both compression and decompression.
- * Note we prefix all global names with "j" to minimize conflicts with
- * a surrounding application.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-
-
-/*
- * jpeg_zigzag_order[i] is the zigzag-order position of the i'th element
- * of a DCT block read in natural order (left to right, top to bottom).
- */
-
-#if 0				/* This table is not actually needed in v6a */
-
-const int jpeg_zigzag_order[DCTSIZE2] = {
-   0,  1,  5,  6, 14, 15, 27, 28,
-   2,  4,  7, 13, 16, 26, 29, 42,
-   3,  8, 12, 17, 25, 30, 41, 43,
-   9, 11, 18, 24, 31, 40, 44, 53,
-  10, 19, 23, 32, 39, 45, 52, 54,
-  20, 22, 33, 38, 46, 51, 55, 60,
-  21, 34, 37, 47, 50, 56, 59, 61,
-  35, 36, 48, 49, 57, 58, 62, 63
-};
-
-#endif
-
-/*
- * jpeg_natural_order[i] is the natural-order position of the i'th element
- * of zigzag order.
- *
- * When reading corrupted data, the Huffman decoders could attempt
- * to reference an entry beyond the end of this array (if the decoded
- * zero run length reaches past the end of the block).  To prevent
- * wild stores without adding an inner-loop test, we put some extra
- * "63"s after the real entries.  This will cause the extra coefficient
- * to be stored in location 63 of the block, not somewhere random.
- * The worst case would be a run-length of 15, which means we need 16
- * fake entries.
- */
-
-const int jpeg_natural_order[DCTSIZE2+16] = {
-   0,  1,  8, 16,  9,  2,  3, 10,
-  17, 24, 32, 25, 18, 11,  4,  5,
-  12, 19, 26, 33, 40, 48, 41, 34,
-  27, 20, 13,  6,  7, 14, 21, 28,
-  35, 42, 49, 56, 57, 50, 43, 36,
-  29, 22, 15, 23, 30, 37, 44, 51,
-  58, 59, 52, 45, 38, 31, 39, 46,
-  53, 60, 61, 54, 47, 55, 62, 63,
-  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
-  63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order7[7*7+16] = {
-   0,  1,  8, 16,  9,  2,  3, 10,
-  17, 24, 32, 25, 18, 11,  4,  5,
-  12, 19, 26, 33, 40, 48, 41, 34,
-  27, 20, 13,  6, 14, 21, 28, 35,
-  42, 49, 50, 43, 36, 29, 22, 30,
-  37, 44, 51, 52, 45, 38, 46, 53,
-  54,
-  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
-  63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order6[6*6+16] = {
-   0,  1,  8, 16,  9,  2,  3, 10,
-  17, 24, 32, 25, 18, 11,  4,  5,
-  12, 19, 26, 33, 40, 41, 34, 27,
-  20, 13, 21, 28, 35, 42, 43, 36,
-  29, 37, 44, 45,
-  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
-  63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order5[5*5+16] = {
-   0,  1,  8, 16,  9,  2,  3, 10,
-  17, 24, 32, 25, 18, 11,  4, 12,
-  19, 26, 33, 34, 27, 20, 28, 35,
-  36,
-  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
-  63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order4[4*4+16] = {
-   0,  1,  8, 16,  9,  2,  3, 10,
-  17, 24, 25, 18, 11, 19, 26, 27,
-  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
-  63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order3[3*3+16] = {
-   0,  1,  8, 16,  9,  2, 10, 17,
-  18,
-  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
-  63, 63, 63, 63, 63, 63, 63, 63
-};
-
-const int jpeg_natural_order2[2*2+16] = {
-   0,  1,  8,  9,
-  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
-  63, 63, 63, 63, 63, 63, 63, 63
-};
-
-
-/*
- * Arithmetic utilities
- */
-
-GLOBAL(long)
-jdiv_round_up (long a, long b)
-/* Compute a/b rounded up to next integer, ie, ceil(a/b) */
-/* Assumes a >= 0, b > 0 */
-{
-  return (a + b - 1L) / b;
-}
-
-
-GLOBAL(long)
-jround_up (long a, long b)
-/* Compute a rounded up to next multiple of b, ie, ceil(a/b)*b */
-/* Assumes a >= 0, b > 0 */
-{
-  a += b - 1L;
-  return a - (a % b);
-}
-
-
-/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays
- * and coefficient-block arrays.  This won't work on 80x86 because the arrays
- * are FAR and we're assuming a small-pointer memory model.  However, some
- * DOS compilers provide far-pointer versions of memcpy() and memset() even
- * in the small-model libraries.  These will be used if USE_FMEM is defined.
- * Otherwise, the routines below do it the hard way.  (The performance cost
- * is not all that great, because these routines aren't very heavily used.)
- */
-
-#ifndef NEED_FAR_POINTERS	/* normal case, same as regular macro */
-#define FMEMCOPY(dest,src,size)	MEMCOPY(dest,src,size)
-#else				/* 80x86 case, define if we can */
-#ifdef USE_FMEM
-#define FMEMCOPY(dest,src,size)	_fmemcpy((void FAR *)(dest), (const void FAR *)(src), (size_t)(size))
-#else
-/* This function is for use by the FMEMZERO macro defined in jpegint.h.
- * Do not call this function directly, use the FMEMZERO macro instead.
- */
-GLOBAL(void)
-jzero_far (void FAR * target, size_t bytestozero)
-/* Zero out a chunk of FAR memory. */
-/* This might be sample-array data, block-array data, or alloc_large data. */
-{
-  register char FAR * ptr = (char FAR *) target;
-  register size_t count;
-
-  for (count = bytestozero; count > 0; count--) {
-    *ptr++ = 0;
-  }
-}
-#endif
-#endif
-
-
-GLOBAL(void)
-jcopy_sample_rows (JSAMPARRAY input_array,
-		   JSAMPARRAY output_array,
-		   int num_rows, JDIMENSION num_cols)
-/* Copy some rows of samples from one place to another.
- * num_rows rows are copied from *input_array++ to *output_array++;
- * these areas may overlap for duplication.
- * The source and destination arrays must be at least as wide as num_cols.
- */
-{
-  register JSAMPROW inptr, outptr;
-#ifdef FMEMCOPY
-  register size_t count = (size_t) num_cols * SIZEOF(JSAMPLE);
-#else
-  register JDIMENSION count;
-#endif
-  register int row;
-
-  for (row = num_rows; row > 0; row--) {
-    inptr = *input_array++;
-    outptr = *output_array++;
-#ifdef FMEMCOPY
-    FMEMCOPY(outptr, inptr, count);
-#else
-    for (count = num_cols; count > 0; count--)
-      *outptr++ = *inptr++;	/* needn't bother with GETJSAMPLE() here */
-#endif
-  }
-}
-
-
-GLOBAL(void)
-jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row,
-		 JDIMENSION num_blocks)
-/* Copy a row of coefficient blocks from one place to another. */
-{
-#ifdef FMEMCOPY
-  FMEMCOPY(output_row, input_row, (size_t) num_blocks * (DCTSIZE2 * SIZEOF(JCOEF)));
-#else
-  register JCOEFPTR inptr, outptr;
-  register long count;
-
-  inptr = (JCOEFPTR) input_row;
-  outptr = (JCOEFPTR) output_row;
-  for (count = (long) num_blocks * DCTSIZE2; count > 0; count--) {
-    *outptr++ = *inptr++;
-  }
-#endif
-}
diff --git a/cmake/externals/libjpeg/jversion.h b/cmake/externals/libjpeg/jversion.h
deleted file mode 100644
index df53ef5e5..000000000
--- a/cmake/externals/libjpeg/jversion.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * jversion.h
- *
- * Copyright (C) 1991-2024, Thomas G. Lane, Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains software version identification.
- */
-
-
-#define JVERSION	"9f  14-Jan-2024"
-
-#define JCOPYRIGHT	"Copyright (C) 2024, Thomas G. Lane, Guido Vollbeding"
diff --git a/cmake/externals/libspng/CMakeLists.txt b/cmake/externals/libspng/CMakeLists.txt
deleted file mode 100644
index c277888e5..000000000
--- a/cmake/externals/libspng/CMakeLists.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-# ----------------------------------------------------------------------------
-#  CMake file for libspng. See root CMakeLists.txt
-#
-# ----------------------------------------------------------------------------
-
-project(SPNG)
-
-set(CURR_INCLUDE_DIR "${CMAKE_CURRENT_LIST_DIR}")
-set_property(GLOBAL PROPERTY SPNG_INCLUDE_DIR ${CURR_INCLUDE_DIR})
-
-file(GLOB_RECURSE spng_headers RELATIVE "${CMAKE_CURRENT_LIST_DIR}" "*.h")
-file(GLOB_RECURSE spng_sources RELATIVE "${CMAKE_CURRENT_LIST_DIR}" "*.c")
-
-message(STATUS "libspng will be used as PNG codec")
-
-# ----------------------------------------------------------------------------------
-#         Define the library target:
-# ----------------------------------------------------------------------------------
-
-if(MSVC)
-    add_definitions(-D_CRT_SECURE_NO_DEPRECATE)
-endif(MSVC)
-
-add_library(SPNG STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${spng_headers} ${spng_sources})
-target_link_libraries(SPNG ${ZLIB_LIBRARIES})
-
-set_target_properties(SPNG
-        PROPERTIES OUTPUT_NAME SPNG
-        DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
-        COMPILE_PDB_NAME SPNG
-        COMPILE_PDB_NAME_DEBUG "SPNG${OPENCV_DEBUG_POSTFIX}"
-        ARCHIVE_OUTPUT_DIRECTORY "${3P_LIBRARY_OUTPUT_PATH}"
-        )
-
-target_compile_definitions(SPNG PUBLIC SPNG_STATIC)
-
-if(ENABLE_SOLUTION_FOLDERS)
-    set_target_properties(SPNG PROPERTIES FOLDER "3rdparty")
-endif()
\ No newline at end of file
diff --git a/cmake/externals/libspng/LICENSE b/cmake/externals/libspng/LICENSE
deleted file mode 100644
index a29d5a2fa..000000000
--- a/cmake/externals/libspng/LICENSE
+++ /dev/null
@@ -1,25 +0,0 @@
-BSD 2-Clause License
-
-Copyright (c) 2018-2023, Randy <randy408@protonmail.com>
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/cmake/externals/libspng/spng.c b/cmake/externals/libspng/spng.c
deleted file mode 100644
index b22b7110a..000000000
--- a/cmake/externals/libspng/spng.c
+++ /dev/null
@@ -1,6980 +0,0 @@
-/* SPDX-License-Identifier: (BSD-2-Clause AND libpng-2.0) */
-
-#define SPNG__BUILD
-
-#include "spng.h"
-
-#include <limits.h>
-#include <string.h>
-#include <stdio.h>
-#include <math.h>
-
-#define ZLIB_CONST
-
-#ifdef __FRAMAC__
-    #define SPNG_DISABLE_OPT
-    #include "tests/framac_stubs.h"
-#else
-    #ifdef SPNG_USE_MINIZ
-        #include <miniz.h>
-    #else
-        #include <zlib.h>
-    #endif
-#endif
-
-#ifdef SPNG_MULTITHREADING
-    #include <pthread.h>
-#endif
-
-/* Not build options, edit at your own risk! */
-#define SPNG_READ_SIZE (8192)
-#define SPNG_WRITE_SIZE SPNG_READ_SIZE
-#define SPNG_MAX_CHUNK_COUNT (1000)
-
-#define SPNG_TARGET_CLONES(x)
-
-#ifndef SPNG_DISABLE_OPT
-
-    #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
-        #define SPNG_X86
-
-        #if defined(__x86_64__) || defined(_M_X64)
-            #define SPNG_X86_64
-        #endif
-
-    #elif defined(__aarch64__) || defined(_M_ARM64) /* || defined(__ARM_NEON) */
-        #define SPNG_ARM /* NOTE: only arm64 builds are tested! */
-    #else
-        #pragma message "disabling SIMD optimizations for unknown target"
-        #define SPNG_DISABLE_OPT
-    #endif
-
-    #if defined(SPNG_X86_64) && defined(SPNG_ENABLE_TARGET_CLONES)
-        #undef SPNG_TARGET_CLONES
-        #define SPNG_TARGET_CLONES(x) __attribute__((target_clones(x)))
-    #else
-        #define SPNG_TARGET_CLONES(x)
-    #endif
-
-    #ifndef SPNG_DISABLE_OPT
-        static void defilter_sub3(size_t rowbytes, unsigned char *row);
-        static void defilter_sub4(size_t rowbytes, unsigned char *row);
-        static void defilter_avg3(size_t rowbytes, unsigned char *row, const unsigned char *prev);
-        static void defilter_avg4(size_t rowbytes, unsigned char *row, const unsigned char *prev);
-        static void defilter_paeth3(size_t rowbytes, unsigned char *row, const unsigned char *prev);
-        static void defilter_paeth4(size_t rowbytes, unsigned char *row, const unsigned char *prev);
-
-        #if defined(SPNG_ARM)
-        static uint32_t expand_palette_rgba8_neon(unsigned char *row, const unsigned char *scanline, const unsigned char *plte, uint32_t width);
-        static uint32_t expand_palette_rgb8_neon(unsigned char *row, const unsigned char *scanline, const unsigned char *plte, uint32_t width);
-        #endif
-    #endif
-#endif
-
-#if defined(_MSC_VER)
-    #pragma warning(push)
-    #pragma warning(disable: 4244)
-#endif
-
-#if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) || defined(__BIG_ENDIAN__)
-    #define SPNG_BIG_ENDIAN
-#else
-    #define SPNG_LITTLE_ENDIAN
-#endif
-
-enum spng_state
-{
-    SPNG_STATE_INVALID = 0,
-    SPNG_STATE_INIT = 1, /* No PNG buffer/stream is set */
-    SPNG_STATE_INPUT, /* Decoder input PNG was set */
-    SPNG_STATE_OUTPUT = SPNG_STATE_INPUT, /* Encoder output was set */
-    SPNG_STATE_IHDR, /* IHDR was read/written */
-    SPNG_STATE_FIRST_IDAT,  /* Encoded up to / reached first IDAT */
-    SPNG_STATE_DECODE_INIT, /* Decoder is ready for progressive reads */
-    SPNG_STATE_ENCODE_INIT = SPNG_STATE_DECODE_INIT,
-    SPNG_STATE_EOI, /* Reached the last scanline/row */
-    SPNG_STATE_LAST_IDAT, /* Reached last IDAT, set at end of decode_image() */
-    SPNG_STATE_AFTER_IDAT, /*  */
-    SPNG_STATE_IEND, /* Reached IEND */
-};
-
-enum spng__internal
-{
-    SPNG__IO_SIGNAL = 1 << 9,
-    SPNG__CTX_FLAGS_ALL = (SPNG_CTX_IGNORE_ADLER32 | SPNG_CTX_ENCODER)
-};
-
-#define SPNG_STR(x) _SPNG_STR(x)
-#define _SPNG_STR(x) #x
-
-#define SPNG_VERSION_STRING SPNG_STR(SPNG_VERSION_MAJOR) "." \
-                            SPNG_STR(SPNG_VERSION_MINOR) "." \
-                            SPNG_STR(SPNG_VERSION_PATCH)
-
-#define SPNG_GET_CHUNK_BOILERPLATE(chunk) \
-    if(ctx == NULL) return 1; \
-    int ret = read_chunks(ctx, 0); \
-    if(ret) return ret; \
-    if(!ctx->stored.chunk) return SPNG_ECHUNKAVAIL; \
-    if(chunk == NULL) return 1
-
-#define SPNG_SET_CHUNK_BOILERPLATE(chunk) \
-    if(ctx == NULL || chunk == NULL) return 1; \
-    if(ctx->data == NULL && !ctx->encode_only) return SPNG_ENOSRC; \
-    int ret = read_chunks(ctx, 0); \
-    if(ret) return ret
-
-/* Determine if the spng_option can be overriden/optimized */
-#define spng__optimize(option) (ctx->optimize_option & (1 << option))
-
-struct spng_subimage
-{
-    uint32_t width;
-    uint32_t height;
-    size_t out_width; /* byte width based on output format */
-    size_t scanline_width;
-};
-
-struct spng_text2
-{
-    int type;
-    char *keyword;
-    char *text;
-
-    size_t text_length;
-
-    uint8_t compression_flag; /* iTXt only */
-    char *language_tag; /* iTXt only */
-    char *translated_keyword; /* iTXt only */
-
-    size_t cache_usage;
-    char user_keyword_storage[80];
-};
-
-struct decode_flags
-{
-    unsigned apply_trns:  1;
-    unsigned apply_gamma: 1;
-    unsigned use_sbit:    1;
-    unsigned indexed:     1;
-    unsigned do_scaling:  1;
-    unsigned interlaced:  1;
-    unsigned same_layout: 1;
-    unsigned zerocopy:    1;
-    unsigned unpack:      1;
-};
-
-struct encode_flags
-{
-    unsigned interlace:      1;
-    unsigned same_layout:    1;
-    unsigned to_bigendian:   1;
-    unsigned progressive:    1;
-    unsigned finalize:       1;
-
-    enum spng_filter_choice filter_choice;
-};
-
-struct spng_chunk_bitfield
-{
-    unsigned ihdr: 1;
-    unsigned plte: 1;
-    unsigned chrm: 1;
-    unsigned iccp: 1;
-    unsigned gama: 1;
-    unsigned sbit: 1;
-    unsigned srgb: 1;
-    unsigned text: 1;
-    unsigned bkgd: 1;
-    unsigned hist: 1;
-    unsigned trns: 1;
-    unsigned phys: 1;
-    unsigned splt: 1;
-    unsigned time: 1;
-    unsigned offs: 1;
-    unsigned exif: 1;
-    unsigned unknown: 1;
-};
-
-/* Packed sample iterator */
-struct spng__iter
-{
-    const uint8_t mask;
-    unsigned shift_amount;
-    const unsigned initial_shift, bit_depth;
-    const unsigned char *samples;
-};
-
-union spng__decode_plte
-{
-    struct spng_plte_entry rgba[256];
-    unsigned char rgb[256 * 3];
-    unsigned char raw[256 * 4];
-    uint32_t align_this;
-};
-
-struct spng__zlib_options
-{
-    int compression_level;
-    int window_bits;
-    int mem_level;
-    int strategy;
-    int data_type;
-};
-
-typedef void spng__undo(spng_ctx *ctx);
-
-struct spng_ctx
-{
-    size_t data_size;
-    size_t bytes_read;
-    size_t stream_buf_size;
-    unsigned char *stream_buf;
-    const unsigned char *data;
-
-    /* User-defined pointers for streaming */
-    spng_read_fn *read_fn;
-    spng_write_fn *write_fn;
-    void *stream_user_ptr;
-
-    /* Used for buffer reads */
-    const unsigned char *png_base;
-    size_t bytes_left;
-    size_t last_read_size;
-
-    /* Used for encoding */
-    int user_owns_out_png;
-    unsigned char *out_png;
-    unsigned char *write_ptr;
-    size_t out_png_size;
-    size_t bytes_encoded;
-
-    /* These are updated by read/write_header()/read_chunk_bytes() */
-    struct spng_chunk current_chunk;
-    uint32_t cur_chunk_bytes_left;
-    uint32_t cur_actual_crc;
-
-    struct spng_alloc alloc;
-
-    enum spng_ctx_flags flags;
-    enum spng_format fmt;
-
-    enum spng_state state;
-
-    unsigned streaming: 1;
-    unsigned internal_buffer: 1; /* encoding to internal buffer */
-
-    unsigned inflate: 1;
-    unsigned deflate: 1;
-    unsigned encode_only: 1;
-    unsigned strict: 1;
-    unsigned discard: 1;
-    unsigned skip_crc: 1;
-    unsigned keep_unknown: 1;
-    unsigned prev_was_idat: 1;
-
-    struct spng__zlib_options image_options;
-    struct spng__zlib_options text_options;
-
-    spng__undo *undo;
-
-    /* input file contains this chunk */
-    struct spng_chunk_bitfield file;
-
-    /* chunk was stored with spng_set_*() */
-    struct spng_chunk_bitfield user;
-
-    /* chunk was stored by reading or with spng_set_*() */
-    struct spng_chunk_bitfield stored;
-
-    /* used to reset the above in case of an error */
-    struct spng_chunk_bitfield prev_stored;
-
-    struct spng_chunk first_idat, last_idat;
-
-    uint32_t max_width, max_height;
-
-    size_t max_chunk_size;
-    size_t chunk_cache_limit;
-    size_t chunk_cache_usage;
-    uint32_t chunk_count_limit;
-    uint32_t chunk_count_total;
-
-    int crc_action_critical;
-    int crc_action_ancillary;
-
-    uint32_t optimize_option;
-
-    struct spng_ihdr ihdr;
-
-    struct spng_plte plte;
-
-    struct spng_chrm_int chrm_int;
-    struct spng_iccp iccp;
-
-    uint32_t gama;
-
-    struct spng_sbit sbit;
-
-    uint8_t srgb_rendering_intent;
-
-    uint32_t n_text;
-    struct spng_text2 *text_list;
-
-    struct spng_bkgd bkgd;
-    struct spng_hist hist;
-    struct spng_trns trns;
-    struct spng_phys phys;
-
-    uint32_t n_splt;
-    struct spng_splt *splt_list;
-
-    struct spng_time time;
-    struct spng_offs offs;
-    struct spng_exif exif;
-
-    uint32_t n_chunks;
-    struct spng_unknown_chunk *chunk_list;
-
-    struct spng_subimage subimage[7];
-
-    z_stream zstream;
-    unsigned char *scanline_buf, *prev_scanline_buf, *row_buf, *filtered_scanline_buf;
-    unsigned char *scanline, *prev_scanline, *row, *filtered_scanline;
-
-    /* based on fmt */
-    size_t image_size; /* may be zero */
-    size_t image_width;
-
-    unsigned bytes_per_pixel; /* derived from ihdr */
-    unsigned pixel_size; /* derived from spng_format+ihdr */
-    int widest_pass;
-    int last_pass; /* last non-empty pass */
-
-    uint16_t *gamma_lut; /* points to either _lut8 or _lut16 */
-    uint16_t *gamma_lut16;
-    uint16_t gamma_lut8[256];
-    unsigned char trns_px[8];
-    union spng__decode_plte decode_plte;
-    struct spng_sbit decode_sb;
-    struct decode_flags decode_flags;
-    struct spng_row_info row_info;
-
-    struct encode_flags encode_flags;
-};
-
-static const uint32_t spng_u32max = INT32_MAX;
-
-static const uint32_t adam7_x_start[7] = { 0, 4, 0, 2, 0, 1, 0 };
-static const uint32_t adam7_y_start[7] = { 0, 0, 4, 0, 2, 0, 1 };
-static const uint32_t adam7_x_delta[7] = { 8, 8, 4, 4, 2, 2, 1 };
-static const uint32_t adam7_y_delta[7] = { 8, 8, 8, 4, 4, 2, 2 };
-
-static const uint8_t spng_signature[8] = { 137, 80, 78, 71, 13, 10, 26, 10 };
-
-static const uint8_t type_ihdr[4] = { 73, 72, 68, 82 };
-static const uint8_t type_plte[4] = { 80, 76, 84, 69 };
-static const uint8_t type_idat[4] = { 73, 68, 65, 84 };
-static const uint8_t type_iend[4] = { 73, 69, 78, 68 };
-
-static const uint8_t type_trns[4] = { 116, 82, 78, 83 };
-static const uint8_t type_chrm[4] = { 99,  72, 82, 77 };
-static const uint8_t type_gama[4] = { 103, 65, 77, 65 };
-static const uint8_t type_iccp[4] = { 105, 67, 67, 80 };
-static const uint8_t type_sbit[4] = { 115, 66, 73, 84 };
-static const uint8_t type_srgb[4] = { 115, 82, 71, 66 };
-static const uint8_t type_text[4] = { 116, 69, 88, 116 };
-static const uint8_t type_ztxt[4] = { 122, 84, 88, 116 };
-static const uint8_t type_itxt[4] = { 105, 84, 88, 116 };
-static const uint8_t type_bkgd[4] = { 98,  75, 71, 68 };
-static const uint8_t type_hist[4] = { 104, 73, 83, 84 };
-static const uint8_t type_phys[4] = { 112, 72, 89, 115 };
-static const uint8_t type_splt[4] = { 115, 80, 76, 84 };
-static const uint8_t type_time[4] = { 116, 73, 77, 69 };
-
-static const uint8_t type_offs[4] = { 111, 70, 70, 115 };
-static const uint8_t type_exif[4] = { 101, 88, 73, 102 };
-
-static inline void *spng__malloc(spng_ctx *ctx,  size_t size)
-{
-    return ctx->alloc.malloc_fn(size);
-}
-
-static inline void *spng__calloc(spng_ctx *ctx, size_t nmemb, size_t size)
-{
-    return ctx->alloc.calloc_fn(nmemb, size);
-}
-
-static inline void *spng__realloc(spng_ctx *ctx, void *ptr, size_t size)
-{
-    return ctx->alloc.realloc_fn(ptr, size);
-}
-
-static inline void spng__free(spng_ctx *ctx, void *ptr)
-{
-    ctx->alloc.free_fn(ptr);
-}
-
-#if defined(SPNG_USE_MINIZ)
-static void *spng__zalloc(void *opaque, size_t items, size_t size)
-#else
-static void *spng__zalloc(void *opaque, uInt items, uInt size)
-#endif
-{
-    spng_ctx *ctx = opaque;
-
-    if(size > SIZE_MAX / items) return NULL;
-
-    size_t len = (size_t)items * size;
-
-    return spng__malloc(ctx, len);
-}
-
-static void spng__zfree(void *opqaue, void *ptr)
-{
-    spng_ctx *ctx = opqaue;
-    spng__free(ctx, ptr);
-}
-
-static inline uint16_t read_u16(const void *src)
-{
-    const unsigned char *data = src;
-
-    return (data[0] & 0xFFU) << 8 | (data[1] & 0xFFU);
-}
-
-static inline uint32_t read_u32(const void *src)
-{
-    const unsigned char *data = src;
-
-    return (data[0] & 0xFFUL) << 24 | (data[1] & 0xFFUL) << 16 |
-           (data[2] & 0xFFUL) << 8  | (data[3] & 0xFFUL);
-}
-
-static inline int32_t read_s32(const void *src)
-{
-    int32_t ret = (int32_t)read_u32(src);
-
-    return ret;
-}
-
-static inline void write_u16(void *dest, uint16_t x)
-{
-    unsigned char *data = dest;
-
-    data[0] = x >> 8;
-    data[1] = x & 0xFF;
-}
-
-static inline void write_u32(void *dest, uint32_t x)
-{
-    unsigned char *data = dest;
-
-    data[0] = (x >> 24);
-    data[1] = (x >> 16) & 0xFF;
-    data[2] = (x >> 8) & 0xFF;
-    data[3] = x & 0xFF;
-}
-
-static inline void write_s32(void *dest, int32_t x)
-{
-    uint32_t n = x;
-    write_u32(dest, n);
-}
-
-/* Returns an iterator for 1,2,4,8-bit samples */
-static struct spng__iter spng__iter_init(unsigned bit_depth, const unsigned char *samples)
-{
-    struct spng__iter iter =
-    {
-        .mask = (uint32_t)(1 << bit_depth) - 1,
-        .shift_amount = 8 - bit_depth,
-        .initial_shift = 8 - bit_depth,
-        .bit_depth = bit_depth,
-        .samples = samples
-    };
-
-    return iter;
-}
-
-/* Returns the current sample unpacked, iterates to the next one */
-static inline uint8_t get_sample(struct spng__iter *iter)
-{
-    uint8_t x = (iter->samples[0] >> iter->shift_amount) & iter->mask;
-
-    iter->shift_amount -= iter->bit_depth;
-
-    if(iter->shift_amount > 7)
-    {
-        iter->shift_amount = iter->initial_shift;
-        iter->samples++;
-    }
-
-    return x;
-}
-
-static void u16_row_to_host(void *row, size_t size)
-{
-    uint16_t *px = row;
-    size_t i, n = size / 2;
-
-    for(i=0; i < n; i++)
-    {
-        px[i] = read_u16(&px[i]);
-    }
-}
-
-static void u16_row_to_bigendian(void *row, size_t size)
-{
-    uint16_t *px = (uint16_t*)row;
-    size_t i, n = size / 2;
-
-    for(i=0; i < n; i++)
-    {
-        write_u16(&px[i], px[i]);
-    }
-}
-
-static void rgb8_row_to_rgba8(const unsigned char *row, unsigned char *out, uint32_t n)
-{
-    uint32_t i;
-    for(i=0; i < n; i++)
-    {
-        memcpy(out + i * 4, row + i * 3, 3);
-        out[i*4+3] = 255;
-    }
-}
-
-static unsigned num_channels(const struct spng_ihdr *ihdr)
-{
-    switch(ihdr->color_type)
-    {
-        case SPNG_COLOR_TYPE_TRUECOLOR: return 3;
-        case SPNG_COLOR_TYPE_GRAYSCALE_ALPHA: return 2;
-        case SPNG_COLOR_TYPE_TRUECOLOR_ALPHA: return 4;
-        case SPNG_COLOR_TYPE_GRAYSCALE:
-        case SPNG_COLOR_TYPE_INDEXED:
-            return 1;
-        default: return 0;
-    }
-}
-
-/* Calculate scanline width in bits, round up to the nearest byte */
-static int calculate_scanline_width(const struct spng_ihdr *ihdr, uint32_t width, size_t *scanline_width)
-{
-    if(ihdr == NULL || !width) return SPNG_EINTERNAL;
-
-    size_t res = num_channels(ihdr) * ihdr->bit_depth;
-
-    if(res > SIZE_MAX / width) return SPNG_EOVERFLOW;
-    res = res * width;
-
-    res += 15; /* Filter byte + 7 for rounding */
-
-    if(res < 15) return SPNG_EOVERFLOW;
-
-    res /= 8;
-
-    if(res > UINT32_MAX) return SPNG_EOVERFLOW;
-
-    *scanline_width = res;
-
-    return 0;
-}
-
-static int calculate_subimages(struct spng_ctx *ctx)
-{
-    if(ctx == NULL) return SPNG_EINTERNAL;
-
-    struct spng_ihdr *ihdr = &ctx->ihdr;
-    struct spng_subimage *sub = ctx->subimage;
-
-    if(ihdr->interlace_method == 1)
-    {
-        sub[0].width = (ihdr->width + 7) >> 3;
-        sub[0].height = (ihdr->height + 7) >> 3;
-        sub[1].width = (ihdr->width + 3) >> 3;
-        sub[1].height = (ihdr->height + 7) >> 3;
-        sub[2].width = (ihdr->width + 3) >> 2;
-        sub[2].height = (ihdr->height + 3) >> 3;
-        sub[3].width = (ihdr->width + 1) >> 2;
-        sub[3].height = (ihdr->height + 3) >> 2;
-        sub[4].width = (ihdr->width + 1) >> 1;
-        sub[4].height = (ihdr->height + 1) >> 2;
-        sub[5].width = ihdr->width >> 1;
-        sub[5].height = (ihdr->height + 1) >> 1;
-        sub[6].width = ihdr->width;
-        sub[6].height = ihdr->height >> 1;
-    }
-    else
-    {
-        sub[0].width = ihdr->width;
-        sub[0].height = ihdr->height;
-    }
-
-    int i;
-    for(i=0; i < 7; i++)
-    {
-        if(sub[i].width == 0 || sub[i].height == 0) continue;
-
-        int ret = calculate_scanline_width(ihdr, sub[i].width, &sub[i].scanline_width);
-        if(ret) return ret;
-
-        if(sub[ctx->widest_pass].scanline_width < sub[i].scanline_width) ctx->widest_pass = i;
-
-        ctx->last_pass = i;
-    }
-
-    return 0;
-}
-
-static int check_decode_fmt(const struct spng_ihdr *ihdr, const int fmt)
-{
-    switch(fmt)
-    {
-        case SPNG_FMT_RGBA8:
-        case SPNG_FMT_RGBA16:
-        case SPNG_FMT_RGB8:
-        case SPNG_FMT_PNG:
-        case SPNG_FMT_RAW:
-            return 0;
-        case SPNG_FMT_G8:
-        case SPNG_FMT_GA8:
-            if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE && ihdr->bit_depth <= 8) return 0;
-            else return SPNG_EFMT;
-        case SPNG_FMT_GA16:
-            if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE && ihdr->bit_depth == 16) return 0;
-            else return SPNG_EFMT;
-        default: return SPNG_EFMT;
-    }
-}
-
-static int calculate_image_width(const struct spng_ihdr *ihdr, int fmt, size_t *len)
-{
-    if(ihdr == NULL || len == NULL) return SPNG_EINTERNAL;
-
-    size_t res = ihdr->width;
-    unsigned bytes_per_pixel;
-
-    switch(fmt)
-    {
-        case SPNG_FMT_RGBA8:
-        case SPNG_FMT_GA16:
-            bytes_per_pixel = 4;
-            break;
-        case SPNG_FMT_RGBA16:
-            bytes_per_pixel = 8;
-            break;
-        case SPNG_FMT_RGB8:
-            bytes_per_pixel = 3;
-            break;
-        case SPNG_FMT_PNG:
-        case SPNG_FMT_RAW:
-        {
-            int ret = calculate_scanline_width(ihdr, ihdr->width, &res);
-            if(ret) return ret;
-
-            res -= 1; /* exclude filter byte */
-            bytes_per_pixel = 1;
-            break;
-        }
-        case SPNG_FMT_G8:
-            bytes_per_pixel = 1;
-            break;
-        case SPNG_FMT_GA8:
-            bytes_per_pixel = 2;
-            break;
-        default: return SPNG_EINTERNAL;
-    }
-
-    if(res > SIZE_MAX / bytes_per_pixel) return SPNG_EOVERFLOW;
-    res = res * bytes_per_pixel;
-
-    *len = res;
-
-    return 0;
-}
-
-static int calculate_image_size(const struct spng_ihdr *ihdr, int fmt, size_t *len)
-{
-    if(ihdr == NULL || len == NULL) return SPNG_EINTERNAL;
-
-    size_t res = 0;
-
-    int ret = calculate_image_width(ihdr, fmt, &res);
-    if(ret) return ret;
-
-    if(res > SIZE_MAX / ihdr->height) return SPNG_EOVERFLOW;
-    res = res * ihdr->height;
-
-    *len = res;
-
-    return 0;
-}
-
-static int increase_cache_usage(spng_ctx *ctx, size_t bytes, int new_chunk)
-{
-    if(ctx == NULL || !bytes) return SPNG_EINTERNAL;
-
-    if(new_chunk)
-    {
-        ctx->chunk_count_total++;
-        if(ctx->chunk_count_total < 1) return SPNG_EOVERFLOW;
-
-        if(ctx->chunk_count_total > ctx->chunk_count_limit) return SPNG_ECHUNK_LIMITS;
-    }
-
-    size_t new_usage = ctx->chunk_cache_usage + bytes;
-
-    if(new_usage < ctx->chunk_cache_usage) return SPNG_EOVERFLOW;
-
-    if(new_usage > ctx->chunk_cache_limit) return SPNG_ECHUNK_LIMITS;
-
-    ctx->chunk_cache_usage = new_usage;
-
-    return 0;
-}
-
-static int decrease_cache_usage(spng_ctx *ctx, size_t usage)
-{
-    if(ctx == NULL || !usage) return SPNG_EINTERNAL;
-    if(usage > ctx->chunk_cache_usage) return SPNG_EINTERNAL;
-
-    ctx->chunk_cache_usage -= usage;
-
-    return 0;
-}
-
-static int is_critical_chunk(struct spng_chunk *chunk)
-{
-    if(chunk == NULL) return 0;
-    if((chunk->type[0] & (1 << 5)) == 0) return 1;
-
-    return 0;
-}
-
-static int decode_err(spng_ctx *ctx, int err)
-{
-    ctx->state = SPNG_STATE_INVALID;
-
-    return err;
-}
-
-static int encode_err(spng_ctx *ctx, int err)
-{
-    ctx->state = SPNG_STATE_INVALID;
-
-    return err;
-}
-
-static inline int read_data(spng_ctx *ctx, size_t bytes)
-{
-    if(ctx == NULL) return SPNG_EINTERNAL;
-    if(!bytes) return 0;
-
-    if(ctx->streaming && (bytes > SPNG_READ_SIZE)) return SPNG_EINTERNAL;
-
-    int ret = ctx->read_fn(ctx, ctx->stream_user_ptr, ctx->stream_buf, bytes);
-
-    if(ret)
-    {
-        if(ret > 0 || ret < SPNG_IO_ERROR) ret = SPNG_IO_ERROR;
-
-        return ret;
-    }
-
-    ctx->bytes_read += bytes;
-    if(ctx->bytes_read < bytes) return SPNG_EOVERFLOW;
-
-    return 0;
-}
-
-/* Ensure there is enough space for encoding starting at ctx->write_ptr  */
-static int require_bytes(spng_ctx *ctx, size_t bytes)
-{
-    if(ctx == NULL) return SPNG_EINTERNAL;
-
-    if(ctx->streaming)
-    {
-        if(bytes > ctx->stream_buf_size)
-        {
-            size_t new_size = ctx->stream_buf_size;
-
-            /* Start at default IDAT size + header + crc */
-            if(new_size < (SPNG_WRITE_SIZE + 12)) new_size = SPNG_WRITE_SIZE + 12;
-
-            if(new_size < bytes) new_size = bytes;
-
-            void *temp = spng__realloc(ctx, ctx->stream_buf, new_size);
-
-            if(temp == NULL) return encode_err(ctx, SPNG_EMEM);
-
-            ctx->stream_buf = temp;
-            ctx->stream_buf_size = bytes;
-            ctx->write_ptr = ctx->stream_buf;
-        }
-
-        return 0;
-    }
-
-    if(!ctx->internal_buffer) return SPNG_ENODST;
-
-    size_t required = ctx->bytes_encoded + bytes;
-    if(required < bytes) return SPNG_EOVERFLOW;
-
-    if(required > ctx->out_png_size)
-    {
-        size_t new_size = ctx->out_png_size;
-
-        /* Start with a size that doesn't require a realloc() 100% of the time */
-        if(new_size < (SPNG_WRITE_SIZE * 2)) new_size = SPNG_WRITE_SIZE * 2;
-
-        /* Prefer the next power of two over the requested size */
-        while(new_size < required)
-        {
-            if(new_size / SIZE_MAX > 2) return encode_err(ctx, SPNG_EOVERFLOW);
-
-            new_size *= 2;
-        }
-
-        void *temp = spng__realloc(ctx, ctx->out_png, new_size);
-
-        if(temp == NULL) return encode_err(ctx, SPNG_EMEM);
-
-        ctx->out_png = temp;
-        ctx->out_png_size = new_size;
-        ctx->write_ptr = ctx->out_png + ctx->bytes_encoded;
-    }
-
-    return 0;
-}
-
-static int write_data(spng_ctx *ctx, const void *data, size_t bytes)
-{
-    if(ctx == NULL) return SPNG_EINTERNAL;
-    if(!bytes) return 0;
-
-    if(ctx->streaming)
-    {
-        if(bytes > SPNG_WRITE_SIZE) return SPNG_EINTERNAL;
-
-        int ret = ctx->write_fn(ctx, ctx->stream_user_ptr, (void*)data, bytes);
-
-        if(ret)
-        {
-            if(ret > 0 || ret < SPNG_IO_ERROR) ret = SPNG_IO_ERROR;
-
-            return encode_err(ctx, ret);
-        }
-    }
-    else
-    {
-        int ret = require_bytes(ctx, bytes);
-        if(ret) return encode_err(ctx, ret);
-
-        memcpy(ctx->write_ptr, data, bytes);
-
-        ctx->write_ptr += bytes;
-    }
-
-    ctx->bytes_encoded += bytes;
-    if(ctx->bytes_encoded < bytes) return SPNG_EOVERFLOW;
-
-    return 0;
-}
-
-static int write_header(spng_ctx *ctx, const uint8_t chunk_type[4], size_t chunk_length, unsigned char **data)
-{
-    if(ctx == NULL || chunk_type == NULL) return SPNG_EINTERNAL;
-    if(chunk_length > spng_u32max) return SPNG_EINTERNAL;
-
-    size_t total = chunk_length + 12;
-
-    int ret = require_bytes(ctx, total);
-    if(ret) return ret;
-
-    uint32_t crc = crc32(0, NULL, 0);
-    ctx->current_chunk.crc = crc32(crc, chunk_type, 4);
-
-    memcpy(&ctx->current_chunk.type, chunk_type, 4);
-    ctx->current_chunk.length = (uint32_t)chunk_length;
-
-    if(!data) return SPNG_EINTERNAL;
-
-    if(ctx->streaming) *data = ctx->stream_buf + 8;
-    else *data = ctx->write_ptr + 8;
-
-    return 0;
-}
-
-static int trim_chunk(spng_ctx *ctx, uint32_t length)
-{
-    if(length > spng_u32max) return SPNG_EINTERNAL;
-    if(length > ctx->current_chunk.length) return SPNG_EINTERNAL;
-
-    ctx->current_chunk.length = length;
-
-    return 0;
-}
-
-static int finish_chunk(spng_ctx *ctx)
-{
-    if(ctx == NULL) return SPNG_EINTERNAL;
-
-    struct spng_chunk *chunk = &ctx->current_chunk;
-
-    unsigned char *header;
-    unsigned char *chunk_data;
-
-    if(ctx->streaming)
-    {
-        chunk_data = ctx->stream_buf + 8;
-        header = ctx->stream_buf;
-    }
-    else
-    {
-        chunk_data = ctx->write_ptr + 8;
-        header = ctx->write_ptr;
-    }
-
-    write_u32(header, chunk->length);
-    memcpy(header + 4, chunk->type, 4);
-
-    chunk->crc = crc32(chunk->crc, chunk_data, chunk->length);
-
-    write_u32(chunk_data + chunk->length, chunk->crc);
-
-    if(ctx->streaming)
-    {
-        const unsigned char *ptr = ctx->stream_buf;
-        uint32_t bytes_left = chunk->length + 12;
-        uint32_t len = 0;
-
-        while(bytes_left)
-        {
-            ptr += len;
-            len = SPNG_WRITE_SIZE;
-
-            if(len > bytes_left) len = bytes_left;
-
-            int ret = write_data(ctx, ptr, len);
-            if(ret) return ret;
-
-            bytes_left -= len;
-        }
-    }
-    else
-    {
-        ctx->bytes_encoded += chunk->length;
-        if(ctx->bytes_encoded < chunk->length) return SPNG_EOVERFLOW;
-
-        ctx->bytes_encoded += 12;
-        if(ctx->bytes_encoded < 12) return SPNG_EOVERFLOW;
-
-        ctx->write_ptr += chunk->length + 12;
-    }
-
-    return 0;
-}
-
-static int write_chunk(spng_ctx *ctx, const uint8_t type[4], const void *data, size_t length)
-{
-    if(ctx == NULL || type == NULL) return SPNG_EINTERNAL;
-    if(length && data == NULL) return SPNG_EINTERNAL;
-
-    unsigned char *write_ptr;
-
-    int ret = write_header(ctx, type, length, &write_ptr);
-    if(ret) return ret;
-
-    if(length) memcpy(write_ptr, data, length);
-
-    return finish_chunk(ctx);
-}
-
-static int write_iend(spng_ctx *ctx)
-{
-    unsigned char iend_chunk[12] = { 0, 0, 0, 0, 73, 69, 78, 68, 174, 66, 96, 130 };
-    return write_data(ctx, iend_chunk, 12);
-}
-
-static int write_unknown_chunks(spng_ctx *ctx, enum spng_location location)
-{
-    if(!ctx->stored.unknown) return 0;
-
-    const struct spng_unknown_chunk *chunk = ctx->chunk_list;
-
-    uint32_t i;
-    for(i=0; i < ctx->n_chunks; i++, chunk++)
-    {
-        if(chunk->location != location) continue;
-
-        int ret = write_chunk(ctx, chunk->type, chunk->data, chunk->length);
-        if(ret) return ret;
-    }
-
-    return 0;
-}
-
-/* Read and check the current chunk's crc,
-   returns -SPNG_CRC_DISCARD if the chunk should be discarded */
-static inline int read_and_check_crc(spng_ctx *ctx)
-{
-    if(ctx == NULL) return SPNG_EINTERNAL;
-
-    int ret;
-    ret = read_data(ctx, 4);
-    if(ret) return ret;
-
-    ctx->current_chunk.crc = read_u32(ctx->data);
-
-    if(ctx->skip_crc) return 0;
-
-    if(ctx->cur_actual_crc != ctx->current_chunk.crc)
-    {
-        if(is_critical_chunk(&ctx->current_chunk))
-        {
-            if(ctx->crc_action_critical == SPNG_CRC_USE) return 0;
-        }
-        else
-        {
-            if(ctx->crc_action_ancillary == SPNG_CRC_USE) return 0;
-            if(ctx->crc_action_ancillary == SPNG_CRC_DISCARD) return -SPNG_CRC_DISCARD;
-        }
-
-        return SPNG_ECHUNK_CRC;
-    }
-
-    return 0;
-}
-
-/* Read and validate the current chunk's crc and the next chunk header */
-static inline int read_header(spng_ctx *ctx)
-{
-    if(ctx == NULL) return SPNG_EINTERNAL;
-
-    int ret;
-    struct spng_chunk chunk = { 0 };
-
-    ret = read_and_check_crc(ctx);
-    if(ret)
-    {
-        if(ret == -SPNG_CRC_DISCARD)
-        {
-            ctx->discard = 1;
-        }
-        else return ret;
-    }
-
-    ret = read_data(ctx, 8);
-    if(ret) return ret;
-
-    chunk.offset = ctx->bytes_read - 8;
-
-    chunk.length = read_u32(ctx->data);
-
-    memcpy(&chunk.type, ctx->data + 4, 4);
-
-    if(chunk.length > spng_u32max) return SPNG_ECHUNK_STDLEN;
-
-    ctx->cur_chunk_bytes_left = chunk.length;
-
-    if(is_critical_chunk(&chunk) && ctx->crc_action_critical == SPNG_CRC_USE) ctx->skip_crc = 1;
-    else if(ctx->crc_action_ancillary == SPNG_CRC_USE) ctx->skip_crc = 1;
-    else ctx->skip_crc = 0;
-
-    if(!ctx->skip_crc)
-    {
-        ctx->cur_actual_crc = crc32(0, NULL, 0);
-        ctx->cur_actual_crc = crc32(ctx->cur_actual_crc, chunk.type, 4);
-    }
-
-    ctx->current_chunk = chunk;
-
-    return 0;
-}
-
-/* Read chunk bytes and update crc */
-static int read_chunk_bytes(spng_ctx *ctx, uint32_t bytes)
-{
-    if(ctx == NULL) return SPNG_EINTERNAL;
-    if(!ctx->cur_chunk_bytes_left || !bytes) return SPNG_EINTERNAL;
-    if(bytes > ctx->cur_chunk_bytes_left) return SPNG_EINTERNAL; /* XXX: more specific error? */
-
-    int ret;
-
-    ret = read_data(ctx, bytes);
-    if(ret) return ret;
-
-    if(!ctx->skip_crc) ctx->cur_actual_crc = crc32(ctx->cur_actual_crc, ctx->data, bytes);
-
-    ctx->cur_chunk_bytes_left -= bytes;
-
-    return ret;
-}
-
-/* read_chunk_bytes() + read_data() with custom output buffer */
-static int read_chunk_bytes2(spng_ctx *ctx, void *out, uint32_t bytes)
-{
-    if(ctx == NULL) return SPNG_EINTERNAL;
-    if(!ctx->cur_chunk_bytes_left || !bytes) return SPNG_EINTERNAL;
-    if(bytes > ctx->cur_chunk_bytes_left) return SPNG_EINTERNAL; /* XXX: more specific error? */
-
-    int ret;
-    uint32_t len = bytes;
-
-    if(ctx->streaming && len > SPNG_READ_SIZE) len = SPNG_READ_SIZE;
-
-    while(bytes)
-    {
-        if(len > bytes) len = bytes;
-
-        ret = ctx->read_fn(ctx, ctx->stream_user_ptr, out, len);
-        if(ret) return ret;
-
-        if(!ctx->streaming) memcpy(out, ctx->data, len);
-
-        ctx->bytes_read += len;
-        if(ctx->bytes_read < len) return SPNG_EOVERFLOW;
-
-        if(!ctx->skip_crc) ctx->cur_actual_crc = crc32(ctx->cur_actual_crc, out, len);
-
-        ctx->cur_chunk_bytes_left -= len;
-
-        out = (char*)out + len;
-        bytes -= len;
-        len = SPNG_READ_SIZE;
-    }
-
-    return 0;
-}
-
-static int discard_chunk_bytes(spng_ctx *ctx, uint32_t bytes)
-{
-    if(ctx == NULL) return SPNG_EINTERNAL;
-    if(!bytes) return 0;
-
-    int ret;
-
-    if(ctx->streaming) /* Do small, consecutive reads */
-    {
-        while(bytes)
-        {
-            uint32_t len = SPNG_READ_SIZE;
-
-            if(len > bytes) len = bytes;
-
-            ret = read_chunk_bytes(ctx, len);
-            if(ret) return ret;
-
-            bytes -= len;
-        }
-    }
-    else
-    {
-        ret = read_chunk_bytes(ctx, bytes);
-        if(ret) return ret;
-    }
-
-    return 0;
-}
-
-static int spng__inflate_init(spng_ctx *ctx, int window_bits)
-{
-    if(ctx->zstream.state) inflateEnd(&ctx->zstream);
-
-    ctx->inflate = 1;
-
-    ctx->zstream.zalloc = spng__zalloc;
-    ctx->zstream.zfree = spng__zfree;
-    ctx->zstream.opaque = ctx;
-
-    if(inflateInit2(&ctx->zstream, window_bits) != Z_OK) return SPNG_EZLIB_INIT;
-
-#if ZLIB_VERNUM >= 0x1290 && !defined(SPNG_USE_MINIZ)
-
-    int validate = 1;
-
-    if(ctx->flags & SPNG_CTX_IGNORE_ADLER32) validate = 0;
-
-    if(is_critical_chunk(&ctx->current_chunk))
-    {
-        if(ctx->crc_action_critical == SPNG_CRC_USE) validate = 0;
-    }
-    else /* ancillary */
-    {
-        if(ctx->crc_action_ancillary == SPNG_CRC_USE) validate = 0;
-    }
-
-    if(inflateValidate(&ctx->zstream, validate)) return SPNG_EZLIB_INIT;
-
-#else /* This requires zlib >= 1.2.11 */
-    #pragma message ("inflateValidate() not available, SPNG_CTX_IGNORE_ADLER32 will be ignored")
-#endif
-
-    return 0;
-}
-
-static int spng__deflate_init(spng_ctx *ctx, struct spng__zlib_options *options)
-{
-    if(ctx->zstream.state) deflateEnd(&ctx->zstream);
-
-    ctx->deflate = 1;
-
-    z_stream *zstream = &ctx->zstream;
-    zstream->zalloc = spng__zalloc;
-    zstream->zfree = spng__zfree;
-    zstream->opaque = ctx;
-    zstream->data_type = options->data_type;
-
-    int ret = deflateInit2(zstream, options->compression_level, Z_DEFLATED, options->window_bits, options->mem_level, options->strategy);
-
-    if(ret != Z_OK) return SPNG_EZLIB_INIT;
-
-    return 0;
-}
-
-/* Inflate a zlib stream starting with start_buf if non-NULL,
-   continuing from the datastream till an end marker,
-   allocating and writing the inflated stream to *out,
-   leaving "extra" bytes at the end, final buffer length is *len.
-
-   Takes into account the chunk size and cache limits.
-*/
-static int spng__inflate_stream(spng_ctx *ctx, char **out, size_t *len, size_t extra, const void *start_buf, size_t start_len)
-{
-    int ret = spng__inflate_init(ctx, 15);
-    if(ret) return ret;
-
-    size_t max = ctx->chunk_cache_limit - ctx->chunk_cache_usage;
-
-    if(ctx->max_chunk_size < max) max = ctx->max_chunk_size;
-
-    if(extra > max) return SPNG_ECHUNK_LIMITS;
-    max -= extra;
-
-    uint32_t read_size;
-    size_t size = 8 * 1024;
-    void *t, *buf = spng__malloc(ctx, size);
-
-    if(buf == NULL) return SPNG_EMEM;
-
-    z_stream *stream = &ctx->zstream;
-
-    if(start_buf != NULL && start_len)
-    {
-        stream->avail_in = (uInt)start_len;
-        stream->next_in = start_buf;
-    }
-    else
-    {
-        stream->avail_in = 0;
-        stream->next_in = NULL;
-    }
-
-    stream->avail_out = (uInt)size;
-    stream->next_out = buf;
-
-    while(ret != Z_STREAM_END)
-    {
-        ret = inflate(stream, Z_NO_FLUSH);
-
-        if(ret == Z_STREAM_END) break;
-
-        if(ret != Z_OK && ret != Z_BUF_ERROR)
-        {
-            ret = SPNG_EZLIB;
-            goto err;
-        }
-
-        if(!stream->avail_out) /* Resize buffer */
-        {
-            /* overflow or reached chunk/cache limit */
-            if( (2 > SIZE_MAX / size) || (size > max / 2) )
-            {
-                ret = SPNG_ECHUNK_LIMITS;
-                goto err;
-            }
-
-            size *= 2;
-
-            t = spng__realloc(ctx, buf, size);
-            if(t == NULL) goto mem;
-
-            buf = t;
-
-            stream->avail_out = (uInt)size / 2;
-            stream->next_out = (unsigned char*)buf + size / 2;
-        }
-        else if(!stream->avail_in) /* Read more chunk bytes */
-        {
-            read_size = ctx->cur_chunk_bytes_left;
-            if(ctx->streaming && read_size > SPNG_READ_SIZE) read_size = SPNG_READ_SIZE;
-
-            ret = read_chunk_bytes(ctx, read_size);
-
-            if(ret)
-            {
-                if(!read_size) ret = SPNG_EZLIB;
-
-                goto err;
-            }
-
-            stream->avail_in = read_size;
-            stream->next_in = ctx->data;
-        }
-    }
-
-    size = stream->total_out;
-
-    if(!size)
-    {
-        ret = SPNG_EZLIB;
-        goto err;
-    }
-
-    size += extra;
-    if(size < extra) goto mem;
-
-    t = spng__realloc(ctx, buf, size);
-    if(t == NULL) goto mem;
-
-    buf = t;
-
-    (void)increase_cache_usage(ctx, size, 0);
-
-    *out = buf;
-    *len = size;
-
-    return 0;
-
-mem:
-    ret = SPNG_EMEM;
-err:
-    spng__free(ctx, buf);
-    return ret;
-}
-
-/* Read at least one byte from the IDAT stream */
-static int read_idat_bytes(spng_ctx *ctx, uint32_t *bytes_read)
-{
-    if(ctx == NULL || bytes_read == NULL) return SPNG_EINTERNAL;
-    if(memcmp(ctx->current_chunk.type, type_idat, 4)) return SPNG_EIDAT_TOO_SHORT;
-
-    int ret;
-    uint32_t len;
-
-    while(!ctx->cur_chunk_bytes_left)
-    {
-        ret = read_header(ctx);
-        if(ret) return ret;
-
-        if(memcmp(ctx->current_chunk.type, type_idat, 4)) return SPNG_EIDAT_TOO_SHORT;
-    }
-
-    if(ctx->streaming)
-    {/* TODO: estimate bytes to read for progressive reads */
-        len = SPNG_READ_SIZE;
-        if(len > ctx->cur_chunk_bytes_left) len = ctx->cur_chunk_bytes_left;
-    }
-    else len = ctx->current_chunk.length;
-
-    ret = read_chunk_bytes(ctx, len);
-
-    *bytes_read = len;
-
-    return ret;
-}
-
-static int read_scanline_bytes(spng_ctx *ctx, unsigned char *dest, size_t len)
-{
-    if(ctx == NULL || dest == NULL) return SPNG_EINTERNAL;
-
-    int ret = Z_OK;
-    uint32_t bytes_read;
-
-    z_stream *zstream = &ctx->zstream;
-
-    zstream->avail_out = (uInt)len;
-    zstream->next_out = dest;
-
-    while(zstream->avail_out != 0)
-    {
-        ret = inflate(zstream, Z_NO_FLUSH);
-
-        if(ret == Z_OK) continue;
-
-        if(ret == Z_STREAM_END) /* Reached an end-marker */
-        {
-            if(zstream->avail_out != 0) return SPNG_EIDAT_TOO_SHORT;
-        }
-        else if(ret == Z_BUF_ERROR) /* Read more IDAT bytes */
-        {
-            ret = read_idat_bytes(ctx, &bytes_read);
-            if(ret) return ret;
-
-            zstream->avail_in = bytes_read;
-            zstream->next_in = ctx->data;
-        }
-        else return SPNG_EIDAT_STREAM;
-    }
-
-    return 0;
-}
-
-static uint8_t paeth(uint8_t a, uint8_t b, uint8_t c)
-{
-    int16_t p = a + b - c;
-    int16_t pa = abs(p - a);
-    int16_t pb = abs(p - b);
-    int16_t pc = abs(p - c);
-
-    if(pa <= pb && pa <= pc) return a;
-    else if(pb <= pc) return b;
-
-    return c;
-}
-
-SPNG_TARGET_CLONES("default,avx2")
-static void defilter_up(size_t bytes, unsigned char *row, const unsigned char *prev)
-{
-    size_t i;
-    for(i=0; i < bytes; i++)
-    {
-        row[i] += prev[i];
-    }
-}
-
-/* Defilter *scanline in-place.
-   *prev_scanline and *scanline should point to the first pixel,
-   scanline_width is the width of the scanline including the filter byte.
-*/
-static int defilter_scanline(const unsigned char *prev_scanline, unsigned char *scanline,
-                             size_t scanline_width, unsigned bytes_per_pixel, unsigned filter)
-{
-    if(prev_scanline == NULL || scanline == NULL || !scanline_width) return SPNG_EINTERNAL;
-
-    size_t i;
-    scanline_width--;
-
-    if(filter == 0) return 0;
-
-#ifndef SPNG_DISABLE_OPT
-    if(filter == SPNG_FILTER_UP) goto no_opt;
-
-    if(bytes_per_pixel == 4)
-    {
-        if(filter == SPNG_FILTER_SUB)
-            defilter_sub4(scanline_width, scanline);
-        else if(filter == SPNG_FILTER_AVERAGE)
-            defilter_avg4(scanline_width, scanline, prev_scanline);
-        else if(filter == SPNG_FILTER_PAETH)
-            defilter_paeth4(scanline_width, scanline, prev_scanline);
-        else return SPNG_EFILTER;
-
-        return 0;
-    }
-    else if(bytes_per_pixel == 3)
-    {
-        if(filter == SPNG_FILTER_SUB)
-            defilter_sub3(scanline_width, scanline);
-        else if(filter == SPNG_FILTER_AVERAGE)
-            defilter_avg3(scanline_width, scanline, prev_scanline);
-        else if(filter == SPNG_FILTER_PAETH)
-            defilter_paeth3(scanline_width, scanline, prev_scanline);
-        else return SPNG_EFILTER;
-
-        return 0;
-    }
-no_opt:
-#endif
-
-    if(filter == SPNG_FILTER_UP)
-    {
-        defilter_up(scanline_width, scanline, prev_scanline);
-        return 0;
-    }
-
-    for(i=0; i < scanline_width; i++)
-    {
-        uint8_t x, a, b, c;
-
-        if(i >= bytes_per_pixel)
-        {
-            a = scanline[i - bytes_per_pixel];
-            b = prev_scanline[i];
-            c = prev_scanline[i - bytes_per_pixel];
-        }
-        else /* First pixel in row */
-        {
-            a = 0;
-            b = prev_scanline[i];
-            c = 0;
-        }
-
-        x = scanline[i];
-
-        switch(filter)
-        {
-            case SPNG_FILTER_SUB:
-            {
-                x = x + a;
-                break;
-            }
-            case SPNG_FILTER_AVERAGE:
-            {
-                uint16_t avg = (a + b) / 2;
-                x = x + avg;
-                break;
-            }
-            case SPNG_FILTER_PAETH:
-            {
-                x = x + paeth(a,b,c);
-                break;
-            }
-        }
-
-        scanline[i] = x;
-    }
-
-    return 0;
-}
-
-static int filter_scanline(unsigned char *filtered, const unsigned char *prev_scanline, const unsigned char *scanline,
-                           size_t scanline_width, unsigned bytes_per_pixel, const unsigned filter)
-{
-    if(prev_scanline == NULL || scanline == NULL || scanline_width <= 1) return SPNG_EINTERNAL;
-
-    if(filter > 4) return SPNG_EFILTER;
-    if(filter == 0) return 0;
-
-    scanline_width--;
-
-    uint32_t i;
-    for(i=0; i < scanline_width; i++)
-    {
-        uint8_t x, a, b, c;
-
-        if(i >= bytes_per_pixel)
-        {
-            a = scanline[i - bytes_per_pixel];
-            b = prev_scanline[i];
-            c = prev_scanline[i - bytes_per_pixel];
-        }
-        else /* first pixel in row */
-        {
-            a = 0;
-            b = prev_scanline[i];
-            c = 0;
-        }
-
-        x = scanline[i];
-
-        switch(filter)
-        {
-            case SPNG_FILTER_SUB:
-            {
-                x = x - a;
-                break;
-            }
-            case SPNG_FILTER_UP:
-            {
-                x = x - b;
-                break;
-            }
-            case SPNG_FILTER_AVERAGE:
-            {
-                uint16_t avg = (a + b) / 2;
-                x = x - avg;
-                break;
-            }
-            case SPNG_FILTER_PAETH:
-            {
-                x = x - paeth(a,b,c);
-                break;
-            }
-        }
-
-        filtered[i] = x;
-    }
-
-    return 0;
-}
-
-static int32_t filter_sum(const unsigned char *prev_scanline, const unsigned char *scanline,
-                          size_t size, unsigned bytes_per_pixel, const unsigned filter)
-{
-    /* prevent potential over/underflow, bails out at a width of ~8M pixels for RGBA8 */
-    if(size > (INT32_MAX / 128)) return INT32_MAX;
-
-    uint32_t i;
-    int32_t sum = 0;
-    uint8_t x, a, b, c;
-
-    for(i=0; i < size; i++)
-    {
-        if(i >= bytes_per_pixel)
-        {
-            a = scanline[i - bytes_per_pixel];
-            b = prev_scanline[i];
-            c = prev_scanline[i - bytes_per_pixel];
-        }
-        else /* first pixel in row */
-        {
-            a = 0;
-            b = prev_scanline[i];
-            c = 0;
-        }
-
-        x = scanline[i];
-
-        switch(filter)
-        {
-            case SPNG_FILTER_NONE:
-            {
-                break;
-            }
-            case SPNG_FILTER_SUB:
-            {
-                x = x - a;
-                break;
-            }
-            case SPNG_FILTER_UP:
-            {
-                x = x - b;
-                break;
-            }
-            case SPNG_FILTER_AVERAGE:
-            {
-                uint16_t avg = (a + b) / 2;
-                x = x - avg;
-                break;
-            }
-            case SPNG_FILTER_PAETH:
-            {
-                x = x - paeth(a,b,c);
-                break;
-            }
-        }
-
-        sum += 128 - abs((int)x - 128);
-    }
-
-    return sum;
-}
-
-static unsigned get_best_filter(const unsigned char *prev_scanline, const unsigned char *scanline,
-                                size_t scanline_width, unsigned bytes_per_pixel, const int choices)
-{
-    if(!choices) return SPNG_FILTER_NONE;
-
-    scanline_width--;
-
-    int i;
-    unsigned int best_filter = 0;
-    enum spng_filter_choice flag;
-    int32_t sum, best_score = INT32_MAX;
-    int32_t filter_scores[5] = { INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX };
-
-    if( !(choices & (choices - 1)) )
-    {/* only one choice/bit is set */
-        for(i=0; i < 5; i++)
-        {
-            if(choices == 1 << (i + 3)) return i;
-        }
-    }
-
-    for(i=0; i < 5; i++)
-    {
-        flag = 1 << (i + 3);
-
-        if(choices & flag) sum = filter_sum(prev_scanline, scanline, scanline_width, bytes_per_pixel, i);
-        else continue;
-
-        filter_scores[i] = abs(sum);
-
-        if(filter_scores[i] < best_score)
-        {
-            best_score = filter_scores[i];
-            best_filter = i;
-        }
-    }
-
-    return best_filter;
-}
-
-/* Scale "sbits" significant bits in "sample" from "bit_depth" to "target"
-
-   "bit_depth" must be a valid PNG depth
-   "sbits" must be less than or equal to "bit_depth"
-   "target" must be between 1 and 16
-*/
-static uint16_t sample_to_target(uint16_t sample, unsigned bit_depth, unsigned sbits, unsigned target)
-{
-    if(bit_depth == sbits)
-    {
-        if(target == sbits) return sample; /* No scaling */
-    }/* bit_depth > sbits */
-    else sample = sample >> (bit_depth - sbits); /* Shift significant bits to bottom */
-
-    /* Downscale */
-    if(target < sbits) return sample >> (sbits - target);
-
-    /* Upscale using left bit replication */
-    int8_t shift_amount = target - sbits;
-    uint16_t sample_bits = sample;
-    sample = 0;
-
-    while(shift_amount >= 0)
-    {
-        sample = sample | (sample_bits << shift_amount);
-        shift_amount -= sbits;
-    }
-
-    int8_t partial = shift_amount + (int8_t)sbits;
-
-    if(partial != 0) sample = sample | (sample_bits >> abs(shift_amount));
-
-    return sample;
-}
-
-static inline void gamma_correct_row(unsigned char *row, uint32_t pixels, int fmt, const uint16_t *gamma_lut)
-{
-    uint32_t i;
-
-    if(fmt == SPNG_FMT_RGBA8)
-    {
-        unsigned char *px;
-        for(i=0; i < pixels; i++)
-        {
-            px = row + i * 4;
-
-            px[0] = gamma_lut[px[0]];
-            px[1] = gamma_lut[px[1]];
-            px[2] = gamma_lut[px[2]];
-        }
-    }
-    else if(fmt == SPNG_FMT_RGBA16)
-    {
-        for(i=0; i < pixels; i++)
-        {
-            uint16_t px[4];
-            memcpy(px, row + i * 8, 8);
-
-            px[0] = gamma_lut[px[0]];
-            px[1] = gamma_lut[px[1]];
-            px[2] = gamma_lut[px[2]];
-
-            memcpy(row + i * 8, px, 8);
-        }
-    }
-    else if(fmt == SPNG_FMT_RGB8)
-    {
-        unsigned char *px;
-        for(i=0; i < pixels; i++)
-        {
-            px = row + i * 3;
-
-            px[0] = gamma_lut[px[0]];
-            px[1] = gamma_lut[px[1]];
-            px[2] = gamma_lut[px[2]];
-        }
-    }
-}
-
-/* Apply transparency to output row */
-static inline void trns_row(unsigned char *row,
-                            const unsigned char *scanline,
-                            const unsigned char *trns,
-                            unsigned scanline_stride,
-                            struct spng_ihdr *ihdr,
-                            uint32_t pixels,
-                            int fmt)
-{
-    uint32_t i;
-    unsigned row_stride;
-    unsigned depth = ihdr->bit_depth;
-
-    if(fmt == SPNG_FMT_RGBA8)
-    {
-        if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE) return; /* already applied in the decoding loop */
-
-        row_stride = 4;
-        for(i=0; i < pixels; i++, scanline+=scanline_stride, row+=row_stride)
-        {
-            if(!memcmp(scanline, trns, scanline_stride)) row[3] = 0;
-        }
-    }
-    else if(fmt == SPNG_FMT_RGBA16)
-    {
-        if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE) return; /* already applied in the decoding loop */
-
-        row_stride = 8;
-        for(i=0; i < pixels; i++, scanline+=scanline_stride, row+=row_stride)
-        {
-            if(!memcmp(scanline, trns, scanline_stride)) memset(row + 6, 0, 2);
-        }
-    }
-    else if(fmt == SPNG_FMT_GA8)
-    {
-        row_stride = 2;
-
-        if(depth == 16)
-        {
-            for(i=0; i < pixels; i++, scanline+=scanline_stride, row+=row_stride)
-            {
-                if(!memcmp(scanline, trns, scanline_stride)) memset(row + 1, 0, 1);
-            }
-        }
-        else /* depth <= 8 */
-        {
-            struct spng__iter iter = spng__iter_init(depth, scanline);
-
-            for(i=0; i < pixels; i++, row+=row_stride)
-            {
-                if(trns[0] == get_sample(&iter)) row[1] = 0;
-            }
-        }
-    }
-    else if(fmt == SPNG_FMT_GA16)
-    {
-        row_stride = 4;
-
-        if(depth == 16)
-        {
-            for(i=0; i< pixels; i++, scanline+=scanline_stride, row+=row_stride)
-            {
-                if(!memcmp(scanline, trns, 2)) memset(row + 2, 0, 2);
-            }
-        }
-        else
-        {
-            struct spng__iter iter = spng__iter_init(depth, scanline);
-
-            for(i=0; i< pixels; i++, row+=row_stride)
-            {
-                if(trns[0] == get_sample(&iter)) memset(row + 2, 0, 2);
-            }
-        }
-    }
-    else return;
-}
-
-static inline void scale_row(unsigned char *row, uint32_t pixels, int fmt, unsigned depth, const struct spng_sbit *sbit)
-{
-    uint32_t i;
-
-    if(fmt == SPNG_FMT_RGBA8)
-    {
-        unsigned char px[4];
-        for(i=0; i < pixels; i++)
-        {
-            memcpy(px, row + i * 4, 4);
-
-            px[0] = sample_to_target(px[0], depth, sbit->red_bits, 8);
-            px[1] = sample_to_target(px[1], depth, sbit->green_bits, 8);
-            px[2] = sample_to_target(px[2], depth, sbit->blue_bits, 8);
-            px[3] = sample_to_target(px[3], depth, sbit->alpha_bits, 8);
-
-            memcpy(row + i * 4, px, 4);
-        }
-    }
-    else if(fmt == SPNG_FMT_RGBA16)
-    {
-        uint16_t px[4];
-        for(i=0; i < pixels; i++)
-        {
-            memcpy(px, row + i * 8, 8);
-
-            px[0] = sample_to_target(px[0], depth, sbit->red_bits, 16);
-            px[1] = sample_to_target(px[1], depth, sbit->green_bits, 16);
-            px[2] = sample_to_target(px[2], depth, sbit->blue_bits, 16);
-            px[3] = sample_to_target(px[3], depth, sbit->alpha_bits, 16);
-
-            memcpy(row + i * 8, px, 8);
-        }
-    }
-    else if(fmt == SPNG_FMT_RGB8)
-    {
-        unsigned char px[4];
-        for(i=0; i < pixels; i++)
-        {
-            memcpy(px, row + i * 3, 3);
-
-            px[0] = sample_to_target(px[0], depth, sbit->red_bits, 8);
-            px[1] = sample_to_target(px[1], depth, sbit->green_bits, 8);
-            px[2] = sample_to_target(px[2], depth, sbit->blue_bits, 8);
-
-            memcpy(row + i * 3, px, 3);
-        }
-    }
-    else if(fmt == SPNG_FMT_G8)
-    {
-        for(i=0; i < pixels; i++)
-        {
-            row[i] = sample_to_target(row[i], depth, sbit->grayscale_bits, 8);
-        }
-    }
-    else if(fmt == SPNG_FMT_GA8)
-    {
-        for(i=0; i < pixels; i++)
-        {
-            row[i*2] = sample_to_target(row[i*2], depth, sbit->grayscale_bits, 8);
-        }
-    }
-}
-
-/* Expand to *row using 8-bit palette indices from *scanline */
-static void expand_row(unsigned char *row,
-                       const unsigned char *scanline,
-                       const union spng__decode_plte *decode_plte,
-                       uint32_t width,
-                       int fmt)
-{
-    uint32_t i = 0;
-    unsigned char *px;
-    unsigned char entry;
-    const struct spng_plte_entry *plte = decode_plte->rgba;
-
-#if defined(SPNG_ARM)
-    if(fmt == SPNG_FMT_RGBA8) i = expand_palette_rgba8_neon(row, scanline, decode_plte->raw, width);
-    else if(fmt == SPNG_FMT_RGB8)
-    {
-        i = expand_palette_rgb8_neon(row, scanline, decode_plte->raw, width);
-
-        for(; i < width; i++)
-        {/* In this case the LUT is 3 bytes packed */
-            px = row + i * 3;
-            entry = scanline[i];
-            px[0] = decode_plte->raw[entry * 3 + 0];
-            px[1] = decode_plte->raw[entry * 3 + 1];
-            px[2] = decode_plte->raw[entry * 3 + 2];
-        }
-        return;
-    }
-#endif
-
-    if(fmt == SPNG_FMT_RGBA8)
-    {
-        for(; i < width; i++)
-        {
-            px = row + i * 4;
-            entry = scanline[i];
-            px[0] = plte[entry].red;
-            px[1] = plte[entry].green;
-            px[2] = plte[entry].blue;
-            px[3] = plte[entry].alpha;
-        }
-    }
-    else if(fmt == SPNG_FMT_RGB8)
-    {
-        for(; i < width; i++)
-        {
-            px = row + i * 3;
-            entry = scanline[i];
-            px[0] = plte[entry].red;
-            px[1] = plte[entry].green;
-            px[2] = plte[entry].blue;
-        }
-    }
-}
-
-/* Unpack 1/2/4/8-bit samples to G8/GA8/GA16 or G16 -> GA16 */
-static void unpack_scanline(unsigned char *out, const unsigned char *scanline, uint32_t width, unsigned bit_depth, int fmt)
-{
-    struct spng__iter iter = spng__iter_init(bit_depth, scanline);
-    uint32_t i;
-    uint16_t sample, alpha = 65535;
-
-
-    if(fmt == SPNG_FMT_GA8) goto ga8;
-    else if(fmt == SPNG_FMT_GA16) goto ga16;
-
-    /* 1/2/4-bit -> 8-bit */
-    for(i=0; i < width; i++) out[i] = get_sample(&iter);
-
-    return;
-
-ga8:
-    /* 1/2/4/8-bit -> GA8 */
-    for(i=0; i < width; i++)
-    {
-        out[i*2] = get_sample(&iter);
-        out[i*2 + 1] = 255;
-    }
-
-    return;
-
-ga16:
-
-    /* 16 -> GA16 */
-    if(bit_depth == 16)
-    {
-        for(i=0; i < width; i++)
-        {
-            memcpy(out + i * 4, scanline + i * 2, 2);
-            memcpy(out + i * 4 + 2, &alpha, 2);
-        }
-        return;
-    }
-
-     /* 1/2/4/8-bit -> GA16 */
-    for(i=0; i < width; i++)
-    {
-        sample = get_sample(&iter);
-        memcpy(out + i * 4, &sample, 2);
-        memcpy(out + i * 4 + 2, &alpha, 2);
-    }
-}
-
-static int check_ihdr(const struct spng_ihdr *ihdr, uint32_t max_width, uint32_t max_height)
-{
-    if(ihdr->width > spng_u32max || !ihdr->width) return SPNG_EWIDTH;
-    if(ihdr->height > spng_u32max || !ihdr->height) return SPNG_EHEIGHT;
-
-    if(ihdr->width > max_width) return SPNG_EUSER_WIDTH;
-    if(ihdr->height > max_height) return SPNG_EUSER_HEIGHT;
-
-    switch(ihdr->color_type)
-    {
-        case SPNG_COLOR_TYPE_GRAYSCALE:
-        {
-            if( !(ihdr->bit_depth == 1 || ihdr->bit_depth == 2 ||
-                  ihdr->bit_depth == 4 || ihdr->bit_depth == 8 ||
-                  ihdr->bit_depth == 16) )
-                  return SPNG_EBIT_DEPTH;
-
-            break;
-        }
-        case SPNG_COLOR_TYPE_TRUECOLOR:
-        case SPNG_COLOR_TYPE_GRAYSCALE_ALPHA:
-        case SPNG_COLOR_TYPE_TRUECOLOR_ALPHA:
-        {
-            if( !(ihdr->bit_depth == 8 || ihdr->bit_depth == 16) )
-                return SPNG_EBIT_DEPTH;
-
-            break;
-        }
-        case SPNG_COLOR_TYPE_INDEXED:
-        {
-            if( !(ihdr->bit_depth == 1 || ihdr->bit_depth == 2 ||
-                  ihdr->bit_depth == 4 || ihdr->bit_depth == 8) )
-                return SPNG_EBIT_DEPTH;
-
-            break;
-        }
-        default: return SPNG_ECOLOR_TYPE;
-    }
-
-    if(ihdr->compression_method) return SPNG_ECOMPRESSION_METHOD;
-    if(ihdr->filter_method) return SPNG_EFILTER_METHOD;
-
-    if(ihdr->interlace_method > 1) return SPNG_EINTERLACE_METHOD;
-
-    return 0;
-}
-
-static int check_plte(const struct spng_plte *plte, const struct spng_ihdr *ihdr)
-{
-    if(plte == NULL || ihdr == NULL) return 1;
-
-    if(plte->n_entries == 0) return 1;
-    if(plte->n_entries > 256) return 1;
-
-    if(ihdr->color_type == SPNG_COLOR_TYPE_INDEXED)
-    {
-        if(plte->n_entries > (1U << ihdr->bit_depth)) return 1;
-    }
-
-    return 0;
-}
-
-static int check_sbit(const struct spng_sbit *sbit, const struct spng_ihdr *ihdr)
-{
-    if(sbit == NULL || ihdr == NULL) return 1;
-
-    if(ihdr->color_type == 0)
-    {
-        if(sbit->grayscale_bits == 0) return SPNG_ESBIT;
-        if(sbit->grayscale_bits > ihdr->bit_depth) return SPNG_ESBIT;
-    }
-    else if(ihdr->color_type == 2 || ihdr->color_type == 3)
-    {
-        if(sbit->red_bits == 0) return SPNG_ESBIT;
-        if(sbit->green_bits == 0) return SPNG_ESBIT;
-        if(sbit->blue_bits == 0) return SPNG_ESBIT;
-
-        uint8_t bit_depth;
-        if(ihdr->color_type == 3) bit_depth = 8;
-        else bit_depth = ihdr->bit_depth;
-
-        if(sbit->red_bits > bit_depth) return SPNG_ESBIT;
-        if(sbit->green_bits > bit_depth) return SPNG_ESBIT;
-        if(sbit->blue_bits > bit_depth) return SPNG_ESBIT;
-    }
-    else if(ihdr->color_type == 4)
-    {
-        if(sbit->grayscale_bits == 0) return SPNG_ESBIT;
-        if(sbit->alpha_bits == 0) return SPNG_ESBIT;
-
-        if(sbit->grayscale_bits > ihdr->bit_depth) return SPNG_ESBIT;
-        if(sbit->alpha_bits > ihdr->bit_depth) return SPNG_ESBIT;
-    }
-    else if(ihdr->color_type == 6)
-    {
-        if(sbit->red_bits == 0) return SPNG_ESBIT;
-        if(sbit->green_bits == 0) return SPNG_ESBIT;
-        if(sbit->blue_bits == 0) return SPNG_ESBIT;
-        if(sbit->alpha_bits == 0) return SPNG_ESBIT;
-
-        if(sbit->red_bits > ihdr->bit_depth) return SPNG_ESBIT;
-        if(sbit->green_bits > ihdr->bit_depth) return SPNG_ESBIT;
-        if(sbit->blue_bits > ihdr->bit_depth) return SPNG_ESBIT;
-        if(sbit->alpha_bits > ihdr->bit_depth) return SPNG_ESBIT;
-    }
-
-    return 0;
-}
-
-static int check_chrm_int(const struct spng_chrm_int *chrm_int)
-{
-    if(chrm_int == NULL) return 1;
-
-    if(chrm_int->white_point_x > spng_u32max ||
-       chrm_int->white_point_y > spng_u32max ||
-       chrm_int->red_x > spng_u32max ||
-       chrm_int->red_y > spng_u32max ||
-       chrm_int->green_x  > spng_u32max ||
-       chrm_int->green_y  > spng_u32max ||
-       chrm_int->blue_x > spng_u32max ||
-       chrm_int->blue_y > spng_u32max) return SPNG_ECHRM;
-
-    return 0;
-}
-
-static int check_phys(const struct spng_phys *phys)
-{
-    if(phys == NULL) return 1;
-
-    if(phys->unit_specifier > 1) return SPNG_EPHYS;
-
-    if(phys->ppu_x > spng_u32max) return SPNG_EPHYS;
-    if(phys->ppu_y > spng_u32max) return SPNG_EPHYS;
-
-    return 0;
-}
-
-static int check_time(const struct spng_time *time)
-{
-    if(time == NULL) return 1;
-
-    if(time->month == 0 || time->month > 12) return 1;
-    if(time->day == 0 || time->day > 31) return 1;
-    if(time->hour > 23) return 1;
-    if(time->minute > 59) return 1;
-    if(time->second > 60) return 1;
-
-    return 0;
-}
-
-static int check_offs(const struct spng_offs *offs)
-{
-    if(offs == NULL) return 1;
-
-    if(offs->unit_specifier > 1) return 1;
-
-    return 0;
-}
-
-static int check_exif(const struct spng_exif *exif)
-{
-    if(exif == NULL) return 1;
-    if(exif->data == NULL) return 1;
-
-    if(exif->length < 4) return SPNG_ECHUNK_SIZE;
-    if(exif->length > spng_u32max) return SPNG_ECHUNK_STDLEN;
-
-    const uint8_t exif_le[4] = { 73, 73, 42, 0 };
-    const uint8_t exif_be[4] = { 77, 77, 0, 42 };
-
-    if(memcmp(exif->data, exif_le, 4) && memcmp(exif->data, exif_be, 4)) return 1;
-
-    return 0;
-}
-
-/* Validate PNG keyword */
-static int check_png_keyword(const char *str)
-{
-    if(str == NULL) return 1;
-    size_t len = strlen(str);
-    const char *end = str + len;
-
-    if(!len) return 1;
-    if(len > 79) return 1;
-    if(str[0] == ' ') return 1; /* Leading space */
-    if(end[-1] == ' ') return 1; /* Trailing space */
-    if(strstr(str, "  ") != NULL) return 1; /* Consecutive spaces */
-
-    uint8_t c;
-    while(str != end)
-    {
-        memcpy(&c, str, 1);
-
-        if( (c >= 32 && c <= 126) || (c >= 161) ) str++;
-        else return 1; /* Invalid character */
-    }
-
-    return 0;
-}
-
-/* Validate PNG text *str up to 'len' bytes */
-static int check_png_text(const char *str, size_t len)
-{/* XXX: are consecutive newlines permitted? */
-    if(str == NULL || len == 0) return 1;
-
-    uint8_t c;
-    size_t i = 0;
-    while(i < len)
-    {
-        memcpy(&c, str + i, 1);
-
-        if( (c >= 32 && c <= 126) || (c >= 161) || c == 10) i++;
-        else return 1; /* Invalid character */
-    }
-
-    return 0;
-}
-
-/* Returns non-zero for standard chunks which are stored without allocating memory */
-static int is_small_chunk(uint8_t type[4])
-{
-    if(!memcmp(type, type_plte, 4)) return 1;
-    else if(!memcmp(type, type_chrm, 4)) return 1;
-    else if(!memcmp(type, type_gama, 4)) return 1;
-    else if(!memcmp(type, type_sbit, 4)) return 1;
-    else if(!memcmp(type, type_srgb, 4)) return 1;
-    else if(!memcmp(type, type_bkgd, 4)) return 1;
-    else if(!memcmp(type, type_trns, 4)) return 1;
-    else if(!memcmp(type, type_hist, 4)) return 1;
-    else if(!memcmp(type, type_phys, 4)) return 1;
-    else if(!memcmp(type, type_time, 4)) return 1;
-    else if(!memcmp(type, type_offs, 4)) return 1;
-    else return 0;
-}
-
-static int read_ihdr(spng_ctx *ctx)
-{
-    int ret;
-    struct spng_chunk *chunk = &ctx->current_chunk;
-    const unsigned char *data;
-
-    chunk->offset = 8;
-    chunk->length = 13;
-    size_t sizeof_sig_ihdr = 29;
-
-    ret = read_data(ctx, sizeof_sig_ihdr);
-    if(ret) return ret;
-
-    data = ctx->data;
-
-    if(memcmp(data, spng_signature, sizeof(spng_signature))) return SPNG_ESIGNATURE;
-
-    chunk->length = read_u32(data + 8);
-    memcpy(&chunk->type, data + 12, 4);
-
-    if(chunk->length != 13) return SPNG_EIHDR_SIZE;
-    if(memcmp(chunk->type, type_ihdr, 4)) return SPNG_ENOIHDR;
-
-    ctx->cur_actual_crc = crc32(0, NULL, 0);
-    ctx->cur_actual_crc = crc32(ctx->cur_actual_crc, data + 12, 17);
-
-    ctx->ihdr.width = read_u32(data + 16);
-    ctx->ihdr.height = read_u32(data + 20);
-    ctx->ihdr.bit_depth = data[24];
-    ctx->ihdr.color_type = data[25];
-    ctx->ihdr.compression_method = data[26];
-    ctx->ihdr.filter_method = data[27];
-    ctx->ihdr.interlace_method = data[28];
-
-    ret = check_ihdr(&ctx->ihdr, ctx->max_width, ctx->max_height);
-    if(ret) return ret;
-
-    ctx->file.ihdr = 1;
-    ctx->stored.ihdr = 1;
-
-    if(ctx->ihdr.bit_depth < 8) ctx->bytes_per_pixel = 1;
-    else ctx->bytes_per_pixel = num_channels(&ctx->ihdr) * (ctx->ihdr.bit_depth / 8);
-
-    ret = calculate_subimages(ctx);
-    if(ret) return ret;
-
-    return 0;
-}
-
-static void splt_undo(spng_ctx *ctx)
-{
-    struct spng_splt *splt = &ctx->splt_list[ctx->n_splt - 1];
-
-    spng__free(ctx, splt->entries);
-
-    decrease_cache_usage(ctx, sizeof(struct spng_splt));
-    decrease_cache_usage(ctx, splt->n_entries * sizeof(struct spng_splt_entry));
-
-    splt->entries = NULL;
-
-    ctx->n_splt--;
-}
-
-static void text_undo(spng_ctx *ctx)
-{
-    struct spng_text2 *text = &ctx->text_list[ctx->n_text - 1];
-
-    spng__free(ctx, text->keyword);
-    if(text->compression_flag) spng__free(ctx, text->text);
-
-    decrease_cache_usage(ctx, text->cache_usage);
-    decrease_cache_usage(ctx, sizeof(struct spng_text2));
-
-    text->keyword = NULL;
-    text->text = NULL;
-
-    ctx->n_text--;
-}
-
-static void chunk_undo(spng_ctx *ctx)
-{
-    struct spng_unknown_chunk *chunk = &ctx->chunk_list[ctx->n_chunks - 1];
-
-    spng__free(ctx, chunk->data);
-
-    decrease_cache_usage(ctx, chunk->length);
-    decrease_cache_usage(ctx, sizeof(struct spng_unknown_chunk));
-
-    chunk->data = NULL;
-
-    ctx->n_chunks--;
-}
-
-static int read_non_idat_chunks(spng_ctx *ctx)
-{
-    int ret;
-    struct spng_chunk chunk;
-    const unsigned char *data;
-
-    ctx->discard = 0;
-    ctx->undo = NULL;
-    ctx->prev_stored = ctx->stored;
-
-    while( !(ret = read_header(ctx)))
-    {
-        if(ctx->discard)
-        {
-            if(ctx->undo) ctx->undo(ctx);
-
-            ctx->stored = ctx->prev_stored;
-        }
-
-        ctx->discard = 0;
-        ctx->undo = NULL;
-
-        ctx->prev_stored = ctx->stored;
-        chunk = ctx->current_chunk;
-
-        if(!memcmp(chunk.type, type_idat, 4))
-        {
-            if(ctx->state < SPNG_STATE_FIRST_IDAT)
-            {
-                if(ctx->ihdr.color_type == 3 && !ctx->stored.plte) return SPNG_ENOPLTE;
-
-                ctx->first_idat = chunk;
-                return 0;
-            }
-
-            if(ctx->prev_was_idat)
-            {
-                /* Ignore extra IDAT's */
-                ret = discard_chunk_bytes(ctx, chunk.length);
-                if(ret) return ret;
-
-                continue;
-            }
-            else return SPNG_ECHUNK_POS; /* IDAT chunk not at the end of the IDAT sequence */
-        }
-
-        ctx->prev_was_idat = 0;
-
-        if(is_small_chunk(chunk.type))
-        {
-            /* None of the known chunks can be zero length */
-            if(!chunk.length) return SPNG_ECHUNK_SIZE;
-
-            /* The largest of these chunks is PLTE with 256 entries */
-            ret = read_chunk_bytes(ctx, chunk.length > 768 ? 768 : chunk.length);
-            if(ret) return ret;
-        }
-
-        data = ctx->data;
-
-        if(is_critical_chunk(&chunk))
-        {
-            if(!memcmp(chunk.type, type_plte, 4))
-            {
-                if(ctx->file.trns || ctx->file.hist || ctx->file.bkgd) return SPNG_ECHUNK_POS;
-                if(chunk.length % 3 != 0) return SPNG_ECHUNK_SIZE;
-
-                ctx->plte.n_entries = chunk.length / 3;
-
-                if(check_plte(&ctx->plte, &ctx->ihdr)) return SPNG_ECHUNK_SIZE; /* XXX: EPLTE? */
-
-                size_t i;
-                for(i=0; i < ctx->plte.n_entries; i++)
-                {
-                    ctx->plte.entries[i].red   = data[i * 3];
-                    ctx->plte.entries[i].green = data[i * 3 + 1];
-                    ctx->plte.entries[i].blue  = data[i * 3 + 2];
-                }
-
-                ctx->file.plte = 1;
-                ctx->stored.plte = 1;
-            }
-            else if(!memcmp(chunk.type, type_iend, 4))
-            {
-                if(ctx->state == SPNG_STATE_AFTER_IDAT)
-                {
-                    if(chunk.length) return SPNG_ECHUNK_SIZE;
-
-                    ret = read_and_check_crc(ctx);
-                    if(ret == -SPNG_CRC_DISCARD) ret = 0;
-
-                    return ret;
-                }
-                else return SPNG_ECHUNK_POS;
-            }
-            else if(!memcmp(chunk.type, type_ihdr, 4)) return SPNG_ECHUNK_POS;
-            else return SPNG_ECHUNK_UNKNOWN_CRITICAL;
-        }
-        else if(!memcmp(chunk.type, type_chrm, 4)) /* Ancillary chunks */
-        {
-            if(ctx->file.plte) return SPNG_ECHUNK_POS;
-            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
-            if(ctx->file.chrm) return SPNG_EDUP_CHRM;
-
-            if(chunk.length != 32) return SPNG_ECHUNK_SIZE;
-
-            ctx->chrm_int.white_point_x = read_u32(data);
-            ctx->chrm_int.white_point_y = read_u32(data + 4);
-            ctx->chrm_int.red_x = read_u32(data + 8);
-            ctx->chrm_int.red_y = read_u32(data + 12);
-            ctx->chrm_int.green_x = read_u32(data + 16);
-            ctx->chrm_int.green_y = read_u32(data + 20);
-            ctx->chrm_int.blue_x = read_u32(data + 24);
-            ctx->chrm_int.blue_y = read_u32(data + 28);
-
-            if(check_chrm_int(&ctx->chrm_int)) return SPNG_ECHRM;
-
-            ctx->file.chrm = 1;
-            ctx->stored.chrm = 1;
-        }
-        else if(!memcmp(chunk.type, type_gama, 4))
-        {
-            if(ctx->file.plte) return SPNG_ECHUNK_POS;
-            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
-            if(ctx->file.gama) return SPNG_EDUP_GAMA;
-
-            if(chunk.length != 4) return SPNG_ECHUNK_SIZE;
-
-            ctx->gama = read_u32(data);
-
-            if(!ctx->gama) return SPNG_EGAMA;
-            if(ctx->gama > spng_u32max) return SPNG_EGAMA;
-
-            ctx->file.gama = 1;
-            ctx->stored.gama = 1;
-        }
-        else if(!memcmp(chunk.type, type_sbit, 4))
-        {
-            if(ctx->file.plte) return SPNG_ECHUNK_POS;
-            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
-            if(ctx->file.sbit) return SPNG_EDUP_SBIT;
-
-            if(ctx->ihdr.color_type == 0)
-            {
-                if(chunk.length != 1) return SPNG_ECHUNK_SIZE;
-
-                ctx->sbit.grayscale_bits = data[0];
-            }
-            else if(ctx->ihdr.color_type == 2 || ctx->ihdr.color_type == 3)
-            {
-                if(chunk.length != 3) return SPNG_ECHUNK_SIZE;
-
-                ctx->sbit.red_bits = data[0];
-                ctx->sbit.green_bits = data[1];
-                ctx->sbit.blue_bits = data[2];
-            }
-            else if(ctx->ihdr.color_type == 4)
-            {
-                if(chunk.length != 2) return SPNG_ECHUNK_SIZE;
-
-                ctx->sbit.grayscale_bits = data[0];
-                ctx->sbit.alpha_bits = data[1];
-            }
-            else if(ctx->ihdr.color_type == 6)
-            {
-                if(chunk.length != 4) return SPNG_ECHUNK_SIZE;
-
-                ctx->sbit.red_bits = data[0];
-                ctx->sbit.green_bits = data[1];
-                ctx->sbit.blue_bits = data[2];
-                ctx->sbit.alpha_bits = data[3];
-            }
-
-            if(check_sbit(&ctx->sbit, &ctx->ihdr)) return SPNG_ESBIT;
-
-            ctx->file.sbit = 1;
-            ctx->stored.sbit = 1;
-        }
-        else if(!memcmp(chunk.type, type_srgb, 4))
-        {
-            if(ctx->file.plte) return SPNG_ECHUNK_POS;
-            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
-            if(ctx->file.srgb) return SPNG_EDUP_SRGB;
-
-            if(chunk.length != 1) return SPNG_ECHUNK_SIZE;
-
-            ctx->srgb_rendering_intent = data[0];
-
-            if(ctx->srgb_rendering_intent > 3) return SPNG_ESRGB;
-
-            ctx->file.srgb = 1;
-            ctx->stored.srgb = 1;
-        }
-        else if(!memcmp(chunk.type, type_bkgd, 4))
-        {
-            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
-            if(ctx->file.bkgd) return SPNG_EDUP_BKGD;
-
-            if(ctx->ihdr.color_type == 0 || ctx->ihdr.color_type == 4)
-            {
-                if(chunk.length != 2) return SPNG_ECHUNK_SIZE;
-
-                ctx->bkgd.gray = read_u16(data);
-            }
-            else if(ctx->ihdr.color_type == 2 || ctx->ihdr.color_type == 6)
-            {
-                if(chunk.length != 6) return SPNG_ECHUNK_SIZE;
-
-                ctx->bkgd.red = read_u16(data);
-                ctx->bkgd.green = read_u16(data + 2);
-                ctx->bkgd.blue = read_u16(data + 4);
-            }
-            else if(ctx->ihdr.color_type == 3)
-            {
-                if(chunk.length != 1) return SPNG_ECHUNK_SIZE;
-                if(!ctx->file.plte) return SPNG_EBKGD_NO_PLTE;
-
-                ctx->bkgd.plte_index = data[0];
-                if(ctx->bkgd.plte_index >= ctx->plte.n_entries) return SPNG_EBKGD_PLTE_IDX;
-            }
-
-            ctx->file.bkgd = 1;
-            ctx->stored.bkgd = 1;
-        }
-        else if(!memcmp(chunk.type, type_trns, 4))
-        {
-            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
-            if(ctx->file.trns) return SPNG_EDUP_TRNS;
-            if(!chunk.length) return SPNG_ECHUNK_SIZE;
-
-            if(ctx->ihdr.color_type == 0)
-            {
-                if(chunk.length != 2) return SPNG_ECHUNK_SIZE;
-
-                ctx->trns.gray = read_u16(data);
-            }
-            else if(ctx->ihdr.color_type == 2)
-            {
-                if(chunk.length != 6) return SPNG_ECHUNK_SIZE;
-
-                ctx->trns.red = read_u16(data);
-                ctx->trns.green = read_u16(data + 2);
-                ctx->trns.blue = read_u16(data + 4);
-            }
-            else if(ctx->ihdr.color_type == 3)
-            {
-                if(chunk.length > ctx->plte.n_entries) return SPNG_ECHUNK_SIZE;
-                if(!ctx->file.plte) return SPNG_ETRNS_NO_PLTE;
-
-                memcpy(ctx->trns.type3_alpha, data, chunk.length);
-                ctx->trns.n_type3_entries = chunk.length;
-            }
-
-            if(ctx->ihdr.color_type == 4 || ctx->ihdr.color_type == 6)  return SPNG_ETRNS_COLOR_TYPE;
-
-            ctx->file.trns = 1;
-            ctx->stored.trns = 1;
-        }
-        else if(!memcmp(chunk.type, type_hist, 4))
-        {
-            if(!ctx->file.plte) return SPNG_EHIST_NO_PLTE;
-            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
-            if(ctx->file.hist) return SPNG_EDUP_HIST;
-
-            if( (chunk.length / 2) != (ctx->plte.n_entries) ) return SPNG_ECHUNK_SIZE;
-
-            size_t k;
-            for(k=0; k < (chunk.length / 2); k++)
-            {
-                ctx->hist.frequency[k] = read_u16(data + k*2);
-            }
-
-            ctx->file.hist = 1;
-            ctx->stored.hist = 1;
-        }
-        else if(!memcmp(chunk.type, type_phys, 4))
-        {
-            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
-            if(ctx->file.phys) return SPNG_EDUP_PHYS;
-
-            if(chunk.length != 9) return SPNG_ECHUNK_SIZE;
-
-            ctx->phys.ppu_x = read_u32(data);
-            ctx->phys.ppu_y = read_u32(data + 4);
-            ctx->phys.unit_specifier = data[8];
-
-            if(check_phys(&ctx->phys)) return SPNG_EPHYS;
-
-            ctx->file.phys = 1;
-            ctx->stored.phys = 1;
-        }
-        else if(!memcmp(chunk.type, type_time, 4))
-        {
-            if(ctx->file.time) return SPNG_EDUP_TIME;
-
-            if(chunk.length != 7) return SPNG_ECHUNK_SIZE;
-
-            struct spng_time time;
-
-            time.year = read_u16(data);
-            time.month = data[2];
-            time.day = data[3];
-            time.hour = data[4];
-            time.minute = data[5];
-            time.second = data[6];
-
-            if(check_time(&time)) return SPNG_ETIME;
-
-            ctx->file.time = 1;
-
-            if(!ctx->user.time) ctx->time = time;
-
-            ctx->stored.time = 1;
-        }
-        else if(!memcmp(chunk.type, type_offs, 4))
-        {
-            if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
-            if(ctx->file.offs) return SPNG_EDUP_OFFS;
-
-            if(chunk.length != 9) return SPNG_ECHUNK_SIZE;
-
-            ctx->offs.x = read_s32(data);
-            ctx->offs.y = read_s32(data + 4);
-            ctx->offs.unit_specifier = data[8];
-
-            if(check_offs(&ctx->offs)) return SPNG_EOFFS;
-
-            ctx->file.offs = 1;
-            ctx->stored.offs = 1;
-        }
-        else /* Arbitrary-length chunk */
-        {
-
-            if(!memcmp(chunk.type, type_exif, 4))
-            {
-                if(ctx->file.exif) return SPNG_EDUP_EXIF;
-                if(!chunk.length) return SPNG_EEXIF;
-
-                ctx->file.exif = 1;
-
-                if(ctx->user.exif) goto discard;
-
-                if(increase_cache_usage(ctx, chunk.length, 1)) return SPNG_ECHUNK_LIMITS;
-
-                struct spng_exif exif;
-
-                exif.length = chunk.length;
-
-                exif.data = spng__malloc(ctx, chunk.length);
-                if(exif.data == NULL) return SPNG_EMEM;
-
-                ret = read_chunk_bytes2(ctx, exif.data, chunk.length);
-                if(ret)
-                {
-                    spng__free(ctx, exif.data);
-                    return ret;
-                }
-
-                if(check_exif(&exif))
-                {
-                    spng__free(ctx, exif.data);
-                    return SPNG_EEXIF;
-                }
-
-                ctx->exif = exif;
-
-                ctx->stored.exif = 1;
-            }
-            else if(!memcmp(chunk.type, type_iccp, 4))
-            {/* TODO: add test file with color profile */
-                if(ctx->file.plte) return SPNG_ECHUNK_POS;
-                if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
-                if(ctx->file.iccp) return SPNG_EDUP_ICCP;
-                if(!chunk.length) return SPNG_ECHUNK_SIZE;
-
-                ctx->file.iccp = 1;
-
-                uint32_t peek_bytes =  81 > chunk.length ? chunk.length : 81;
-
-                ret = read_chunk_bytes(ctx, peek_bytes);
-                if(ret) return ret;
-
-                unsigned char *keyword_nul = memchr(ctx->data, '\0', peek_bytes);
-                if(keyword_nul == NULL) return SPNG_EICCP_NAME;
-
-                uint32_t keyword_len = keyword_nul - ctx->data;
-
-                if(keyword_len > 79) return SPNG_EICCP_NAME;
-
-                memcpy(ctx->iccp.profile_name, ctx->data, keyword_len);
-
-                if(check_png_keyword(ctx->iccp.profile_name)) return SPNG_EICCP_NAME;
-
-                if(chunk.length < (keyword_len + 2)) return SPNG_ECHUNK_SIZE;
-
-                if(ctx->data[keyword_len + 1] != 0) return SPNG_EICCP_COMPRESSION_METHOD;
-
-                ret = spng__inflate_stream(ctx, &ctx->iccp.profile, &ctx->iccp.profile_len, 0, ctx->data + keyword_len + 2, peek_bytes - (keyword_len + 2));
-
-                if(ret) return ret;
-
-                ctx->stored.iccp = 1;
-            }
-             else if(!memcmp(chunk.type, type_text, 4) ||
-                     !memcmp(chunk.type, type_ztxt, 4) ||
-                     !memcmp(chunk.type, type_itxt, 4))
-            {
-                if(!chunk.length) return SPNG_ECHUNK_SIZE;
-
-                ctx->file.text = 1;
-
-                if(ctx->user.text) goto discard;
-
-                if(increase_cache_usage(ctx, sizeof(struct spng_text2), 1)) return SPNG_ECHUNK_LIMITS;
-
-                ctx->n_text++;
-                if(ctx->n_text < 1) return SPNG_EOVERFLOW;
-                if(sizeof(struct spng_text2) > SIZE_MAX / ctx->n_text) return SPNG_EOVERFLOW;
-
-                void *buf = spng__realloc(ctx, ctx->text_list, ctx->n_text * sizeof(struct spng_text2));
-                if(buf == NULL) return SPNG_EMEM;
-                ctx->text_list = buf;
-
-                struct spng_text2 *text = &ctx->text_list[ctx->n_text - 1];
-                memset(text, 0, sizeof(struct spng_text2));
-
-                ctx->undo = text_undo;
-
-                uint32_t text_offset = 0, language_tag_offset = 0, translated_keyword_offset = 0;
-                uint32_t peek_bytes = 256; /* enough for 3 80-byte keywords and some text bytes */
-                uint32_t keyword_len;
-
-                if(peek_bytes > chunk.length) peek_bytes = chunk.length;
-
-                ret = read_chunk_bytes(ctx, peek_bytes);
-                if(ret) return ret;
-
-                data = ctx->data;
-
-                const unsigned char *zlib_stream = NULL;
-                const unsigned char *peek_end = data + peek_bytes;
-                const unsigned char *keyword_nul = memchr(data, 0, chunk.length > 80 ? 80 : chunk.length);
-
-                if(keyword_nul == NULL) return SPNG_ETEXT_KEYWORD;
-
-                keyword_len = keyword_nul - data;
-
-                if(!memcmp(chunk.type, type_text, 4))
-                {
-                    text->type = SPNG_TEXT;
-
-                    text->text_length = chunk.length - keyword_len - 1;
-
-                    text_offset = keyword_len;
-
-                    /* increment past nul if there is a text field */
-                    if(text->text_length) text_offset++;
-                }
-                else if(!memcmp(chunk.type, type_ztxt, 4))
-                {
-                    text->type = SPNG_ZTXT;
-
-                    if((peek_bytes - keyword_len) <= 2) return SPNG_EZTXT;
-
-                    if(keyword_nul[1]) return SPNG_EZTXT_COMPRESSION_METHOD;
-
-                    text->compression_flag = 1;
-
-                    text_offset = keyword_len + 2;
-                }
-                else if(!memcmp(chunk.type, type_itxt, 4))
-                {
-                    text->type = SPNG_ITXT;
-
-                    /* at least two 1-byte fields, two >=0 length strings, and one byte of (compressed) text */
-                    if((peek_bytes - keyword_len) < 5) return SPNG_EITXT;
-
-                    text->compression_flag = keyword_nul[1];
-
-                    if(text->compression_flag > 1) return SPNG_EITXT_COMPRESSION_FLAG;
-
-                    if(keyword_nul[2]) return SPNG_EITXT_COMPRESSION_METHOD;
-
-                    language_tag_offset = keyword_len + 3;
-
-                    const unsigned char *term;
-                    term = memchr(data + language_tag_offset, 0, peek_bytes - language_tag_offset);
-                    if(term == NULL) return SPNG_EITXT_LANG_TAG;
-
-                    if((peek_end - term) < 2) return SPNG_EITXT;
-
-                    translated_keyword_offset = term - data + 1;
-
-                    zlib_stream = memchr(data + translated_keyword_offset, 0, peek_bytes - translated_keyword_offset);
-                    if(zlib_stream == NULL) return SPNG_EITXT;
-                    if(zlib_stream == peek_end) return SPNG_EITXT;
-
-                    text_offset = zlib_stream - data + 1;
-                    text->text_length = chunk.length - text_offset;
-                }
-                else return SPNG_EINTERNAL;
-
-
-                if(text->compression_flag)
-                {
-                    /* cache usage = peek_bytes + decompressed text size + nul */
-                    if(increase_cache_usage(ctx, peek_bytes, 0)) return SPNG_ECHUNK_LIMITS;
-
-                    text->keyword = spng__calloc(ctx, 1, peek_bytes);
-                    if(text->keyword == NULL) return SPNG_EMEM;
-
-                    memcpy(text->keyword, data, peek_bytes);
-
-                    zlib_stream = ctx->data + text_offset;
-
-                    ret = spng__inflate_stream(ctx, &text->text, &text->text_length, 1, zlib_stream, peek_bytes - text_offset);
-
-                    if(ret) return ret;
-
-                    text->text[text->text_length - 1] = '\0';
-                    text->cache_usage = text->text_length + peek_bytes;
-                }
-                else
-                {
-                    if(increase_cache_usage(ctx, chunk.length + 1, 0)) return SPNG_ECHUNK_LIMITS;
-
-                    text->keyword = spng__malloc(ctx, chunk.length + 1);
-                    if(text->keyword == NULL) return SPNG_EMEM;
-
-                    memcpy(text->keyword, data, peek_bytes);
-
-                    if(chunk.length > peek_bytes)
-                    {
-                        ret = read_chunk_bytes2(ctx, text->keyword + peek_bytes, chunk.length - peek_bytes);
-                        if(ret) return ret;
-                    }
-
-                    text->text = text->keyword + text_offset;
-
-                    text->text_length = chunk.length - text_offset;
-
-                    text->text[text->text_length] = '\0';
-                    text->cache_usage = chunk.length + 1;
-                }
-
-                if(check_png_keyword(text->keyword)) return SPNG_ETEXT_KEYWORD;
-
-                text->text_length = strlen(text->text);
-
-                if(text->type != SPNG_ITXT)
-                {
-                    language_tag_offset = keyword_len;
-                    translated_keyword_offset = keyword_len;
-
-                    if(ctx->strict && check_png_text(text->text, text->text_length))
-                    {
-                        if(text->type == SPNG_ZTXT) return SPNG_EZTXT;
-                        else return SPNG_ETEXT;
-                    }
-                }
-
-                text->language_tag = text->keyword + language_tag_offset;
-                text->translated_keyword = text->keyword + translated_keyword_offset;
-
-                ctx->stored.text = 1;
-            }
-            else if(!memcmp(chunk.type, type_splt, 4))
-            {
-                if(ctx->state == SPNG_STATE_AFTER_IDAT) return SPNG_ECHUNK_POS;
-                if(ctx->user.splt) goto discard; /* XXX: could check profile names for uniqueness */
-                if(!chunk.length) return SPNG_ECHUNK_SIZE;
-
-                ctx->file.splt = 1;
-
-                /* chunk.length + sizeof(struct spng_splt) + splt->n_entries * sizeof(struct spng_splt_entry) */
-                if(increase_cache_usage(ctx, chunk.length + sizeof(struct spng_splt), 1)) return SPNG_ECHUNK_LIMITS;
-
-                ctx->n_splt++;
-                if(ctx->n_splt < 1) return SPNG_EOVERFLOW;
-                if(sizeof(struct spng_splt) > SIZE_MAX / ctx->n_splt) return SPNG_EOVERFLOW;
-
-                void *buf = spng__realloc(ctx, ctx->splt_list, ctx->n_splt * sizeof(struct spng_splt));
-                if(buf == NULL) return SPNG_EMEM;
-                ctx->splt_list = buf;
-
-                struct spng_splt *splt = &ctx->splt_list[ctx->n_splt - 1];
-
-                memset(splt, 0, sizeof(struct spng_splt));
-
-                ctx->undo = splt_undo;
-
-                void *t = spng__malloc(ctx, chunk.length);
-                if(t == NULL) return SPNG_EMEM;
-
-                splt->entries = t; /* simplifies error handling */
-                data = t;
-
-                ret = read_chunk_bytes2(ctx, t, chunk.length);
-                if(ret) return ret;
-
-                uint32_t keyword_len = chunk.length < 80 ? chunk.length : 80;
-
-                const unsigned char *keyword_nul = memchr(data, 0, keyword_len);
-                if(keyword_nul == NULL) return SPNG_ESPLT_NAME;
-
-                keyword_len = keyword_nul - data;
-
-                memcpy(splt->name, data, keyword_len);
-
-                if(check_png_keyword(splt->name)) return SPNG_ESPLT_NAME;
-
-                uint32_t j;
-                for(j=0; j < (ctx->n_splt - 1); j++)
-                {
-                    if(!strcmp(ctx->splt_list[j].name, splt->name)) return SPNG_ESPLT_DUP_NAME;
-                }
-
-                if( (chunk.length - keyword_len) <= 2) return SPNG_ECHUNK_SIZE;
-
-                splt->sample_depth = data[keyword_len + 1];
-
-                uint32_t entries_len = chunk.length - keyword_len - 2;
-                if(!entries_len) return SPNG_ECHUNK_SIZE;
-
-                if(splt->sample_depth == 16)
-                {
-                    if(entries_len % 10 != 0) return SPNG_ECHUNK_SIZE;
-                    splt->n_entries = entries_len / 10;
-                }
-                else if(splt->sample_depth == 8)
-                {
-                    if(entries_len % 6 != 0) return SPNG_ECHUNK_SIZE;
-                    splt->n_entries = entries_len / 6;
-                }
-                else return SPNG_ESPLT_DEPTH;
-
-                if(!splt->n_entries) return SPNG_ECHUNK_SIZE;
-
-                size_t list_size = splt->n_entries;
-
-                if(list_size > SIZE_MAX / sizeof(struct spng_splt_entry)) return SPNG_EOVERFLOW;
-
-                list_size *= sizeof(struct spng_splt_entry);
-
-                if(increase_cache_usage(ctx, list_size, 0)) return SPNG_ECHUNK_LIMITS;
-
-                splt->entries = spng__malloc(ctx, list_size);
-                if(splt->entries == NULL)
-                {
-                    spng__free(ctx, t);
-                    return SPNG_EMEM;
-                }
-
-                data = (unsigned char*)t + keyword_len + 2;
-
-                uint32_t k;
-                if(splt->sample_depth == 16)
-                {
-                    for(k=0; k < splt->n_entries; k++)
-                    {
-                        splt->entries[k].red =   read_u16(data + k * 10);
-                        splt->entries[k].green = read_u16(data + k * 10 + 2);
-                        splt->entries[k].blue =  read_u16(data + k * 10 + 4);
-                        splt->entries[k].alpha = read_u16(data + k * 10 + 6);
-                        splt->entries[k].frequency = read_u16(data + k * 10 + 8);
-                    }
-                }
-                else if(splt->sample_depth == 8)
-                {
-                    for(k=0; k < splt->n_entries; k++)
-                    {
-                        splt->entries[k].red =   data[k * 6];
-                        splt->entries[k].green = data[k * 6 + 1];
-                        splt->entries[k].blue =  data[k * 6 + 2];
-                        splt->entries[k].alpha = data[k * 6 + 3];
-                        splt->entries[k].frequency = read_u16(data + k * 6 + 4);
-                    }
-                }
-
-                spng__free(ctx, t);
-                decrease_cache_usage(ctx, chunk.length);
-
-                ctx->stored.splt = 1;
-            }
-            else /* Unknown chunk */
-            {
-                ctx->file.unknown = 1;
-
-                if(!ctx->keep_unknown) goto discard;
-                if(ctx->user.unknown) goto discard;
-
-                if(increase_cache_usage(ctx, chunk.length + sizeof(struct spng_unknown_chunk), 1)) return SPNG_ECHUNK_LIMITS;
-
-                ctx->n_chunks++;
-                if(ctx->n_chunks < 1) return SPNG_EOVERFLOW;
-                if(sizeof(struct spng_unknown_chunk) > SIZE_MAX / ctx->n_chunks) return SPNG_EOVERFLOW;
-
-                void *buf = spng__realloc(ctx, ctx->chunk_list, ctx->n_chunks * sizeof(struct spng_unknown_chunk));
-                if(buf == NULL) return SPNG_EMEM;
-                ctx->chunk_list = buf;
-
-                struct spng_unknown_chunk *chunkp = &ctx->chunk_list[ctx->n_chunks - 1];
-
-                memset(chunkp, 0, sizeof(struct spng_unknown_chunk));
-
-                ctx->undo = chunk_undo;
-
-                memcpy(chunkp->type, chunk.type, 4);
-
-                if(ctx->state < SPNG_STATE_FIRST_IDAT)
-                {
-                    if(ctx->file.plte) chunkp->location = SPNG_AFTER_PLTE;
-                    else chunkp->location = SPNG_AFTER_IHDR;
-                }
-                else if(ctx->state >= SPNG_STATE_AFTER_IDAT) chunkp->location = SPNG_AFTER_IDAT;
-
-                if(chunk.length > 0)
-                {
-                    void *t = spng__malloc(ctx, chunk.length);
-                    if(t == NULL) return SPNG_EMEM;
-
-                    ret = read_chunk_bytes2(ctx, t, chunk.length);
-                    if(ret)
-                    {
-                        spng__free(ctx, t);
-                        return ret;
-                    }
-
-                    chunkp->length = chunk.length;
-                    chunkp->data = t;
-                }
-
-                ctx->stored.unknown = 1;
-            }
-
-discard:
-            ret = discard_chunk_bytes(ctx, ctx->cur_chunk_bytes_left);
-            if(ret) return ret;
-        }
-
-    }
-
-    return ret;
-}
-
-/* Read chunks before or after the IDAT chunks depending on state */
-static int read_chunks(spng_ctx *ctx, int only_ihdr)
-{
-    if(ctx == NULL) return SPNG_EINTERNAL;
-    if(!ctx->state) return SPNG_EBADSTATE;
-    if(ctx->data == NULL)
-    {
-        if(ctx->encode_only) return 0;
-        else return SPNG_EINTERNAL;
-    }
-
-    int ret = 0;
-
-    if(ctx->state == SPNG_STATE_INPUT)
-    {
-        ret = read_ihdr(ctx);
-
-        if(ret) return decode_err(ctx, ret);
-
-        ctx->state = SPNG_STATE_IHDR;
-    }
-
-    if(only_ihdr) return 0;
-
-    if(ctx->state == SPNG_STATE_EOI)
-    {
-        ctx->state = SPNG_STATE_AFTER_IDAT;
-        ctx->prev_was_idat = 1;
-    }
-
-    while(ctx->state < SPNG_STATE_FIRST_IDAT || ctx->state == SPNG_STATE_AFTER_IDAT)
-    {
-        ret = read_non_idat_chunks(ctx);
-
-        if(!ret)
-        {
-            if(ctx->state < SPNG_STATE_FIRST_IDAT) ctx->state = SPNG_STATE_FIRST_IDAT;
-            else if(ctx->state == SPNG_STATE_AFTER_IDAT) ctx->state = SPNG_STATE_IEND;
-        }
-        else
-        {
-            switch(ret)
-            {
-                case SPNG_ECHUNK_POS:
-                case SPNG_ECHUNK_SIZE: /* size != expected size, SPNG_ECHUNK_STDLEN = invalid size */
-                case SPNG_EDUP_PLTE:
-                case SPNG_EDUP_CHRM:
-                case SPNG_EDUP_GAMA:
-                case SPNG_EDUP_ICCP:
-                case SPNG_EDUP_SBIT:
-                case SPNG_EDUP_SRGB:
-                case SPNG_EDUP_BKGD:
-                case SPNG_EDUP_HIST:
-                case SPNG_EDUP_TRNS:
-                case SPNG_EDUP_PHYS:
-                case SPNG_EDUP_TIME:
-                case SPNG_EDUP_OFFS:
-                case SPNG_EDUP_EXIF:
-                case SPNG_ECHRM:
-                case SPNG_ETRNS_COLOR_TYPE:
-                case SPNG_ETRNS_NO_PLTE:
-                case SPNG_EGAMA:
-                case SPNG_EICCP_NAME:
-                case SPNG_EICCP_COMPRESSION_METHOD:
-                case SPNG_ESBIT:
-                case SPNG_ESRGB:
-                case SPNG_ETEXT:
-                case SPNG_ETEXT_KEYWORD:
-                case SPNG_EZTXT:
-                case SPNG_EZTXT_COMPRESSION_METHOD:
-                case SPNG_EITXT:
-                case SPNG_EITXT_COMPRESSION_FLAG:
-                case SPNG_EITXT_COMPRESSION_METHOD:
-                case SPNG_EITXT_LANG_TAG:
-                case SPNG_EITXT_TRANSLATED_KEY:
-                case SPNG_EBKGD_NO_PLTE:
-                case SPNG_EBKGD_PLTE_IDX:
-                case SPNG_EHIST_NO_PLTE:
-                case SPNG_EPHYS:
-                case SPNG_ESPLT_NAME:
-                case SPNG_ESPLT_DUP_NAME:
-                case SPNG_ESPLT_DEPTH:
-                case SPNG_ETIME:
-                case SPNG_EOFFS:
-                case SPNG_EEXIF:
-                case SPNG_EZLIB:
-                {
-                    if(!ctx->strict && !is_critical_chunk(&ctx->current_chunk))
-                    {
-                        ret = discard_chunk_bytes(ctx, ctx->cur_chunk_bytes_left);
-                        if(ret) return decode_err(ctx, ret);
-
-                        if(ctx->undo) ctx->undo(ctx);
-
-                        ctx->stored = ctx->prev_stored;
-
-                        ctx->discard = 0;
-                        ctx->undo = NULL;
-
-                        continue;
-                    }
-                    else return decode_err(ctx, ret);
-
-                    break;
-                }
-                default: return decode_err(ctx, ret);
-            }
-        }
-    }
-
-    return ret;
-}
-
-static int read_scanline(spng_ctx *ctx)
-{
-    int ret, pass = ctx->row_info.pass;
-    struct spng_row_info *ri = &ctx->row_info;
-    const struct spng_subimage *sub = ctx->subimage;
-    size_t scanline_width = sub[pass].scanline_width;
-    uint32_t scanline_idx = ri->scanline_idx;
-
-    uint8_t next_filter = 0;
-
-    if(scanline_idx == (sub[pass].height - 1) && ri->pass == ctx->last_pass)
-    {
-        ret = read_scanline_bytes(ctx, ctx->scanline, scanline_width - 1);
-    }
-    else
-    {
-        ret = read_scanline_bytes(ctx, ctx->scanline, scanline_width);
-        if(ret) return ret;
-
-        next_filter = ctx->scanline[scanline_width - 1];
-        if(next_filter > 4) ret = SPNG_EFILTER;
-    }
-
-    if(ret) return ret;
-
-    if(!scanline_idx && ri->filter > 1)
-    {
-        /* prev_scanline is all zeros for the first scanline */
-        memset(ctx->prev_scanline, 0, scanline_width);
-    }
-
-    if(ctx->ihdr.bit_depth == 16 && ctx->fmt != SPNG_FMT_RAW) u16_row_to_host(ctx->scanline, scanline_width - 1);
-
-    ret = defilter_scanline(ctx->prev_scanline, ctx->scanline, scanline_width, ctx->bytes_per_pixel, ri->filter);
-    if(ret) return ret;
-
-    ri->filter = next_filter;
-
-    return 0;
-}
-
-static int update_row_info(spng_ctx *ctx)
-{
-    int interlacing = ctx->ihdr.interlace_method;
-    struct spng_row_info *ri = &ctx->row_info;
-    const struct spng_subimage *sub = ctx->subimage;
-
-    if(ri->scanline_idx == (sub[ri->pass].height - 1)) /* Last scanline */
-    {
-        if(ri->pass == ctx->last_pass)
-        {
-            ctx->state = SPNG_STATE_EOI;
-
-            return SPNG_EOI;
-        }
-
-        ri->scanline_idx = 0;
-        ri->pass++;
-
-        /* Skip empty passes */
-        while( (!sub[ri->pass].width || !sub[ri->pass].height) && (ri->pass < ctx->last_pass)) ri->pass++;
-    }
-    else
-    {
-        ri->row_num++;
-        ri->scanline_idx++;
-    }
-
-    if(interlacing) ri->row_num = adam7_y_start[ri->pass] + ri->scanline_idx * adam7_y_delta[ri->pass];
-
-    return 0;
-}
-
-int spng_decode_scanline(spng_ctx *ctx, void *out, size_t len)
-{
-    if(ctx == NULL || out == NULL) return 1;
-
-    if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI;
-
-    struct decode_flags f = ctx->decode_flags;
-
-    struct spng_row_info *ri = &ctx->row_info;
-    const struct spng_subimage *sub = ctx->subimage;
-
-    const struct spng_ihdr *ihdr = &ctx->ihdr;
-    const uint16_t *gamma_lut = ctx->gamma_lut;
-    unsigned char *trns_px = ctx->trns_px;
-    const struct spng_sbit *sb = &ctx->decode_sb;
-    const struct spng_plte_entry *plte = ctx->decode_plte.rgba;
-    struct spng__iter iter = spng__iter_init(ihdr->bit_depth, ctx->scanline);
-
-    const unsigned char *scanline;
-
-    const int pass = ri->pass;
-    const int fmt = ctx->fmt;
-    const size_t scanline_width = sub[pass].scanline_width;
-    const uint32_t width = sub[pass].width;
-    uint32_t k;
-    uint8_t r_8, g_8, b_8, a_8, gray_8;
-    uint16_t r_16, g_16, b_16, a_16, gray_16;
-    r_8=0; g_8=0; b_8=0; a_8=0; gray_8=0;
-    r_16=0; g_16=0; b_16=0; a_16=0; gray_16=0;
-    size_t pixel_size = 4; /* SPNG_FMT_RGBA8 */
-    size_t pixel_offset = 0;
-    unsigned char *pixel;
-    unsigned processing_depth = ihdr->bit_depth;
-
-    if(f.indexed) processing_depth = 8;
-
-    if(fmt == SPNG_FMT_RGBA16) pixel_size = 8;
-    else if(fmt == SPNG_FMT_RGB8) pixel_size = 3;
-
-    if(len < sub[pass].out_width) return SPNG_EBUFSIZ;
-
-    int ret = read_scanline(ctx);
-
-    if(ret) return decode_err(ctx, ret);
-
-    scanline = ctx->scanline;
-
-    for(k=0; k < width; k++)
-    {
-        pixel = (unsigned char*)out + pixel_offset;
-        pixel_offset += pixel_size;
-
-        if(f.same_layout)
-        {
-            if(f.zerocopy) break;
-
-            memcpy(out, scanline, scanline_width - 1);
-            break;
-        }
-
-        if(f.unpack)
-        {
-            unpack_scanline(out, scanline, width, ihdr->bit_depth, fmt);
-            break;
-        }
-
-        if(ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR)
-        {
-            if(ihdr->bit_depth == 16)
-            {
-                memcpy(&r_16, scanline + (k * 6), 2);
-                memcpy(&g_16, scanline + (k * 6) + 2, 2);
-                memcpy(&b_16, scanline + (k * 6) + 4, 2);
-
-                a_16 = 65535;
-            }
-            else /* == 8 */
-            {
-                if(fmt == SPNG_FMT_RGBA8)
-                {
-                    rgb8_row_to_rgba8(scanline, out, width);
-                    break;
-                }
-
-                r_8 = scanline[k * 3];
-                g_8 = scanline[k * 3 + 1];
-                b_8 = scanline[k * 3 + 2];
-
-                a_8 = 255;
-            }
-        }
-        else if(ihdr->color_type == SPNG_COLOR_TYPE_INDEXED)
-        {
-            uint8_t entry = 0;
-
-            if(ihdr->bit_depth == 8)
-            {
-                if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGB8))
-                {
-                    expand_row(out, scanline, &ctx->decode_plte, width, fmt);
-                    break;
-                }
-
-                entry = scanline[k];
-            }
-            else /* < 8 */
-            {
-                entry = get_sample(&iter);
-            }
-
-            if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGB8))
-            {
-                pixel[0] = plte[entry].red;
-                pixel[1] = plte[entry].green;
-                pixel[2] = plte[entry].blue;
-                if(fmt == SPNG_FMT_RGBA8) pixel[3] = plte[entry].alpha;
-
-                continue;
-            }
-            else /* RGBA16 */
-            {
-                r_16 = plte[entry].red;
-                g_16 = plte[entry].green;
-                b_16 = plte[entry].blue;
-                a_16 = plte[entry].alpha;
-
-                r_16 = (r_16 << 8) | r_16;
-                g_16 = (g_16 << 8) | g_16;
-                b_16 = (b_16 << 8) | b_16;
-                a_16 = (a_16 << 8) | a_16;
-
-                memcpy(pixel, &r_16, 2);
-                memcpy(pixel + 2, &g_16, 2);
-                memcpy(pixel + 4, &b_16, 2);
-                memcpy(pixel + 6, &a_16, 2);
-
-                continue;
-            }
-        }
-        else if(ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR_ALPHA)
-        {
-            if(ihdr->bit_depth == 16)
-            {
-                memcpy(&r_16, scanline + (k * 8), 2);
-                memcpy(&g_16, scanline + (k * 8) + 2, 2);
-                memcpy(&b_16, scanline + (k * 8) + 4, 2);
-                memcpy(&a_16, scanline + (k * 8) + 6, 2);
-            }
-            else /* == 8 */
-            {
-                r_8 = scanline[k * 4];
-                g_8 = scanline[k * 4 + 1];
-                b_8 = scanline[k * 4 + 2];
-                a_8 = scanline[k * 4 + 3];
-            }
-        }
-        else if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE)
-        {
-            if(ihdr->bit_depth == 16)
-            {
-                memcpy(&gray_16, scanline + k * 2, 2);
-
-                if(f.apply_trns && ctx->trns.gray == gray_16) a_16 = 0;
-                else a_16 = 65535;
-
-                r_16 = gray_16;
-                g_16 = gray_16;
-                b_16 = gray_16;
-            }
-            else /* <= 8 */
-            {
-                gray_8 = get_sample(&iter);
-
-                if(f.apply_trns && ctx->trns.gray == gray_8) a_8 = 0;
-                else a_8 = 255;
-
-                r_8 = gray_8; g_8 = gray_8; b_8 = gray_8;
-            }
-        }
-        else if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE_ALPHA)
-        {
-            if(ihdr->bit_depth == 16)
-            {
-                memcpy(&gray_16, scanline + (k * 4), 2);
-                memcpy(&a_16, scanline + (k * 4) + 2, 2);
-
-                r_16 = gray_16;
-                g_16 = gray_16;
-                b_16 = gray_16;
-            }
-            else /* == 8 */
-            {
-                gray_8 = scanline[k * 2];
-                a_8 = scanline[k * 2 + 1];
-
-                r_8 = gray_8;
-                g_8 = gray_8;
-                b_8 = gray_8;
-            }
-        }
-
-
-        if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGB8))
-        {
-            if(ihdr->bit_depth == 16)
-            {
-                r_8 = r_16 >> 8;
-                g_8 = g_16 >> 8;
-                b_8 = b_16 >> 8;
-                a_8 = a_16 >> 8;
-            }
-
-            pixel[0] = r_8;
-            pixel[1] = g_8;
-            pixel[2] = b_8;
-
-            if(fmt == SPNG_FMT_RGBA8) pixel[3] = a_8;
-        }
-        else if(fmt == SPNG_FMT_RGBA16)
-        {
-            if(ihdr->bit_depth != 16)
-            {
-                r_16 = r_8;
-                g_16 = g_8;
-                b_16 = b_8;
-                a_16 = a_8;
-            }
-
-            memcpy(pixel, &r_16, 2);
-            memcpy(pixel + 2, &g_16, 2);
-            memcpy(pixel + 4, &b_16, 2);
-            memcpy(pixel + 6, &a_16, 2);
-        }
-    }/* for(k=0; k < width; k++) */
-
-    if(f.apply_trns) trns_row(out, scanline, trns_px, ctx->bytes_per_pixel, &ctx->ihdr, width, fmt);
-
-    if(f.do_scaling) scale_row(out, width, fmt, processing_depth, sb);
-
-    if(f.apply_gamma) gamma_correct_row(out, width, fmt, gamma_lut);
-
-    /* The previous scanline is always defiltered */
-    void *t = ctx->prev_scanline;
-    ctx->prev_scanline = ctx->scanline;
-    ctx->scanline = t;
-
-    ret = update_row_info(ctx);
-
-    if(ret == SPNG_EOI)
-    {
-        if(ctx->cur_chunk_bytes_left) /* zlib stream ended before an IDAT chunk boundary */
-        {/* Discard the rest of the chunk */
-            int error = discard_chunk_bytes(ctx, ctx->cur_chunk_bytes_left);
-            if(error) return decode_err(ctx, error);
-        }
-
-        ctx->last_idat = ctx->current_chunk;
-    }
-
-    return ret;
-}
-
-int spng_decode_row(spng_ctx *ctx, void *out, size_t len)
-{
-    if(ctx == NULL || out == NULL) return 1;
-    if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI;
-    if(len < ctx->image_width) return SPNG_EBUFSIZ;
-
-    const struct spng_ihdr *ihdr = &ctx->ihdr;
-    int ret, pass = ctx->row_info.pass;
-    unsigned char *outptr = out;
-
-    if(!ihdr->interlace_method || pass == 6) return spng_decode_scanline(ctx, out, len);
-
-    ret = spng_decode_scanline(ctx, ctx->row, ctx->image_width);
-    if(ret && ret != SPNG_EOI) return ret;
-
-    uint32_t k;
-    unsigned pixel_size = 4; /* RGBA8 */
-    if(ctx->fmt == SPNG_FMT_RGBA16) pixel_size = 8;
-    else if(ctx->fmt == SPNG_FMT_RGB8) pixel_size = 3;
-    else if(ctx->fmt == SPNG_FMT_G8) pixel_size = 1;
-    else if(ctx->fmt == SPNG_FMT_GA8) pixel_size = 2;
-    else if(ctx->fmt & (SPNG_FMT_PNG | SPNG_FMT_RAW))
-    {
-        if(ihdr->bit_depth < 8)
-        {
-            struct spng__iter iter = spng__iter_init(ihdr->bit_depth, ctx->row);
-            const uint8_t samples_per_byte = 8 / ihdr->bit_depth;
-            uint8_t sample;
-
-            for(k=0; k < ctx->subimage[pass].width; k++)
-            {
-                sample = get_sample(&iter);
-
-                size_t ioffset = adam7_x_start[pass] + k * adam7_x_delta[pass];
-
-                sample = sample << (iter.initial_shift - ioffset * ihdr->bit_depth % 8);
-
-                ioffset /= samples_per_byte;
-
-                outptr[ioffset] |= sample;
-            }
-
-            return 0;
-        }
-        else pixel_size = ctx->bytes_per_pixel;
-    }
-
-    for(k=0; k < ctx->subimage[pass].width; k++)
-    {
-        size_t ioffset = (adam7_x_start[pass] + (size_t) k * adam7_x_delta[pass]) * pixel_size;
-
-        memcpy(outptr + ioffset, ctx->row + k * pixel_size, pixel_size);
-    }
-
-    return 0;
-}
-
-int spng_decode_chunks(spng_ctx *ctx)
-{
-    if(ctx == NULL) return 1;
-    if(ctx->encode_only) return SPNG_ECTXTYPE;
-    if(ctx->state < SPNG_STATE_INPUT) return SPNG_ENOSRC;
-    if(ctx->state == SPNG_STATE_IEND) return 0;
-
-    return read_chunks(ctx, 0);
-}
-
-int spng_decode_image(spng_ctx *ctx, void *out, size_t len, int fmt, int flags)
-{
-    if(ctx == NULL) return 1;
-    if(ctx->encode_only) return SPNG_ECTXTYPE;
-    if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI;
-
-    const struct spng_ihdr *ihdr = &ctx->ihdr;
-
-    int ret = read_chunks(ctx, 0);
-    if(ret) return decode_err(ctx, ret);
-
-    ret = check_decode_fmt(ihdr, fmt);
-    if(ret) return ret;
-
-    ret = calculate_image_width(ihdr, fmt, &ctx->image_width);
-    if(ret) return decode_err(ctx, ret);
-
-    if(ctx->image_width > SIZE_MAX / ihdr->height) ctx->image_size = 0; /* overflow */
-    else ctx->image_size = ctx->image_width * ihdr->height;
-
-    if( !(flags & SPNG_DECODE_PROGRESSIVE) )
-    {
-        if(out == NULL) return 1;
-        if(!ctx->image_size) return SPNG_EOVERFLOW;
-        if(len < ctx->image_size) return SPNG_EBUFSIZ;
-    }
-
-    uint32_t bytes_read = 0;
-
-    ret = read_idat_bytes(ctx, &bytes_read);
-    if(ret) return decode_err(ctx, ret);
-
-    if(bytes_read > 1)
-    {
-        int valid = read_u16(ctx->data) % 31 ? 0 : 1;
-
-        unsigned flg = ctx->data[1];
-        unsigned flevel = flg >> 6;
-        int compression_level = Z_DEFAULT_COMPRESSION;
-
-        if(flevel == 0) compression_level = 0; /* fastest */
-        else if(flevel == 1) compression_level = 1; /* fast */
-        else if(flevel == 2) compression_level = 6; /* default */
-        else if(flevel == 3) compression_level = 9; /* slowest, max compression */
-
-        if(valid) ctx->image_options.compression_level = compression_level;
-    }
-
-    ret = spng__inflate_init(ctx, ctx->image_options.window_bits);
-    if(ret) return decode_err(ctx, ret);
-
-    ctx->zstream.avail_in = bytes_read;
-    ctx->zstream.next_in = ctx->data;
-
-    size_t scanline_buf_size = ctx->subimage[ctx->widest_pass].scanline_width;
-
-    scanline_buf_size += 32;
-
-    if(scanline_buf_size < 32) return SPNG_EOVERFLOW;
-
-    ctx->scanline_buf = spng__malloc(ctx, scanline_buf_size);
-    ctx->prev_scanline_buf = spng__malloc(ctx, scanline_buf_size);
-
-    ctx->scanline = ctx->scanline_buf;
-    ctx->prev_scanline = ctx->prev_scanline_buf;
-
-    struct decode_flags f = {0};
-
-    ctx->fmt = fmt;
-
-    if(ihdr->color_type == SPNG_COLOR_TYPE_INDEXED) f.indexed = 1;
-
-    unsigned processing_depth = ihdr->bit_depth;
-
-    if(f.indexed) processing_depth = 8;
-
-    if(ihdr->interlace_method)
-    {
-        f.interlaced = 1;
-        ctx->row_buf = spng__malloc(ctx, ctx->image_width);
-        ctx->row = ctx->row_buf;
-
-        if(ctx->row == NULL) return decode_err(ctx, SPNG_EMEM);
-    }
-
-    if(ctx->scanline == NULL || ctx->prev_scanline == NULL)
-    {
-        return decode_err(ctx, SPNG_EMEM);
-    }
-
-    f.do_scaling = 1;
-    if(f.indexed) f.do_scaling = 0;
-
-    unsigned depth_target = 8; /* FMT_RGBA8, G8 */
-    if(fmt == SPNG_FMT_RGBA16) depth_target = 16;
-
-    if(flags & SPNG_DECODE_TRNS && ctx->stored.trns) f.apply_trns = 1;
-    else flags &= ~SPNG_DECODE_TRNS;
-
-    if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE_ALPHA ||
-       ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR_ALPHA) flags &= ~SPNG_DECODE_TRNS;
-
-    if(flags & SPNG_DECODE_GAMMA && ctx->stored.gama) f.apply_gamma = 1;
-    else flags &= ~SPNG_DECODE_GAMMA;
-
-    if(flags & SPNG_DECODE_USE_SBIT && ctx->stored.sbit) f.use_sbit = 1;
-    else flags &= ~SPNG_DECODE_USE_SBIT;
-
-    if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGBA16))
-    {
-        if(ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR_ALPHA &&
-           ihdr->bit_depth == depth_target) f.same_layout = 1;
-    }
-    else if(fmt == SPNG_FMT_RGB8)
-    {
-        if(ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR &&
-           ihdr->bit_depth == depth_target) f.same_layout = 1;
-
-        f.apply_trns = 0; /* not applicable */
-    }
-    else if(fmt & (SPNG_FMT_PNG | SPNG_FMT_RAW))
-    {
-        f.same_layout = 1;
-        f.do_scaling = 0;
-        f.apply_gamma = 0; /* for now */
-        f.apply_trns = 0;
-    }
-    else if(fmt == SPNG_FMT_G8 && ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE && ihdr->bit_depth <= 8)
-    {
-        if(ihdr->bit_depth == depth_target) f.same_layout = 1;
-        else if(ihdr->bit_depth < 8) f.unpack = 1;
-
-        f.apply_trns = 0;
-    }
-    else if(fmt == SPNG_FMT_GA8 && ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE && ihdr->bit_depth <= 8)
-    {
-        if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE_ALPHA &&
-           ihdr->bit_depth == depth_target) f.same_layout = 1;
-        else if(ihdr->bit_depth <= 8) f.unpack = 1;
-    }
-    else if(fmt == SPNG_FMT_GA16 && ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE && ihdr->bit_depth == 16)
-    {
-        if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE_ALPHA &&
-           ihdr->bit_depth == depth_target) f.same_layout = 1;
-        else if(ihdr->bit_depth == 16) f.unpack = 1;
-    }
-
-    /*if(f.same_layout && !flags && !f.interlaced) f.zerocopy = 1;*/
-
-    uint16_t *gamma_lut = NULL;
-
-    if(f.apply_gamma)
-    {
-        float file_gamma = (float)ctx->gama / 100000.0f;
-        float max;
-
-        unsigned lut_entries;
-
-        if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGB8))
-        {
-            lut_entries = 256;
-            max = 255.0f;
-
-            gamma_lut = ctx->gamma_lut8;
-            ctx->gamma_lut = ctx->gamma_lut8;
-        }
-        else /* SPNG_FMT_RGBA16 */
-        {
-            lut_entries = 65536;
-            max = 65535.0f;
-
-            ctx->gamma_lut16 = spng__malloc(ctx, lut_entries * sizeof(uint16_t));
-            if(ctx->gamma_lut16 == NULL) return decode_err(ctx, SPNG_EMEM);
-
-            gamma_lut = ctx->gamma_lut16;
-            ctx->gamma_lut = ctx->gamma_lut16;
-        }
-
-        float screen_gamma = 2.2f;
-        float exponent = file_gamma * screen_gamma;
-
-        if(FP_ZERO == fpclassify(exponent)) return decode_err(ctx, SPNG_EGAMA);
-
-        exponent = 1.0f / exponent;
-
-        unsigned i;
-        for(i=0; i < lut_entries; i++)
-        {
-            float c = pow((float)i / max, exponent) * max;
-            if(c > max) c = max;
-
-            gamma_lut[i] = (uint16_t)c;
-        }
-    }
-
-    struct spng_sbit *sb = &ctx->decode_sb;
-
-    sb->red_bits = processing_depth;
-    sb->green_bits = processing_depth;
-    sb->blue_bits = processing_depth;
-    sb->alpha_bits = processing_depth;
-    sb->grayscale_bits = processing_depth;
-
-    if(f.use_sbit)
-    {
-        if(ihdr->color_type == 0)
-        {
-            sb->grayscale_bits = ctx->sbit.grayscale_bits;
-            sb->alpha_bits = ihdr->bit_depth;
-        }
-        else if(ihdr->color_type == 2 || ihdr->color_type == 3)
-        {
-            sb->red_bits = ctx->sbit.red_bits;
-            sb->green_bits = ctx->sbit.green_bits;
-            sb->blue_bits = ctx->sbit.blue_bits;
-            sb->alpha_bits = ihdr->bit_depth;
-        }
-        else if(ihdr->color_type == 4)
-        {
-            sb->grayscale_bits = ctx->sbit.grayscale_bits;
-            sb->alpha_bits = ctx->sbit.alpha_bits;
-        }
-        else /* == 6 */
-        {
-            sb->red_bits = ctx->sbit.red_bits;
-            sb->green_bits = ctx->sbit.green_bits;
-            sb->blue_bits = ctx->sbit.blue_bits;
-            sb->alpha_bits = ctx->sbit.alpha_bits;
-        }
-    }
-
-    if(ihdr->bit_depth == 16 && fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGB8))
-    {/* samples are scaled down by 8 bits in the decode loop */
-        sb->red_bits -= 8;
-        sb->green_bits -= 8;
-        sb->blue_bits -= 8;
-        sb->alpha_bits -= 8;
-        sb->grayscale_bits -= 8;
-
-        processing_depth = 8;
-    }
-
-    /* Prevent infinite loops in sample_to_target() */
-    if(!depth_target || depth_target > 16 ||
-       !processing_depth || processing_depth > 16 ||
-       !sb->grayscale_bits || sb->grayscale_bits > processing_depth ||
-       !sb->alpha_bits || sb->alpha_bits > processing_depth ||
-       !sb->red_bits || sb->red_bits > processing_depth ||
-       !sb->green_bits || sb->green_bits > processing_depth ||
-       !sb->blue_bits || sb->blue_bits > processing_depth)
-    {
-        return decode_err(ctx, SPNG_ESBIT);
-    }
-
-    if(sb->red_bits == sb->green_bits &&
-       sb->green_bits == sb->blue_bits &&
-       sb->blue_bits == sb->alpha_bits &&
-       sb->alpha_bits == processing_depth &&
-       processing_depth == depth_target) f.do_scaling = 0;
-
-    struct spng_plte_entry *plte = ctx->decode_plte.rgba;
-
-    /* Pre-process palette entries */
-    if(f.indexed)
-    {
-        uint8_t red, green, blue, alpha;
-
-        uint32_t i;
-        for(i=0; i < 256; i++)
-        {
-            if(f.apply_trns && i < ctx->trns.n_type3_entries)
-                ctx->plte.entries[i].alpha = ctx->trns.type3_alpha[i];
-            else
-                ctx->plte.entries[i].alpha = 255;
-
-            red   = sample_to_target(ctx->plte.entries[i].red, 8, sb->red_bits, 8);
-            green = sample_to_target(ctx->plte.entries[i].green, 8, sb->green_bits, 8);
-            blue  = sample_to_target(ctx->plte.entries[i].blue, 8, sb->blue_bits, 8);
-            alpha = sample_to_target(ctx->plte.entries[i].alpha, 8, sb->alpha_bits, 8);
-
-#if defined(SPNG_ARM)
-            if(fmt == SPNG_FMT_RGB8 && ihdr->bit_depth == 8)
-            {/* Working with 3 bytes at a time is more of an ARM thing */
-                ctx->decode_plte.rgb[i * 3 + 0] = red;
-                ctx->decode_plte.rgb[i * 3 + 1] = green;
-                ctx->decode_plte.rgb[i * 3 + 2] = blue;
-                continue;
-            }
-#endif
-            plte[i].red = red;
-            plte[i].green = green;
-            plte[i].blue = blue;
-            plte[i].alpha = alpha;
-        }
-
-        f.apply_trns = 0;
-    }
-
-    unsigned char *trns_px = ctx->trns_px;
-
-    if(f.apply_trns)
-    {
-        uint16_t mask = ~0;
-        if(ctx->ihdr.bit_depth < 16) mask = (1 << ctx->ihdr.bit_depth) - 1;
-
-        if(fmt & (SPNG_FMT_RGBA8 | SPNG_FMT_RGBA16))
-        {
-            if(ihdr->color_type == SPNG_COLOR_TYPE_TRUECOLOR)
-            {
-                if(ihdr->bit_depth == 16)
-                {
-                    memcpy(trns_px, &ctx->trns.red, 2);
-                    memcpy(trns_px + 2, &ctx->trns.green, 2);
-                    memcpy(trns_px + 4, &ctx->trns.blue, 2);
-                }
-                else
-                {
-                    trns_px[0] = ctx->trns.red & mask;
-                    trns_px[1] = ctx->trns.green & mask;
-                    trns_px[2] = ctx->trns.blue & mask;
-                }
-            }
-        }
-        else if(ihdr->color_type == SPNG_COLOR_TYPE_GRAYSCALE) // fmt == SPNG_FMT_GA8 &&
-        {
-            if(ihdr->bit_depth == 16)
-            {
-                memcpy(trns_px, &ctx->trns.gray, 2);
-            }
-            else
-            {
-                trns_px[0] = ctx->trns.gray & mask;
-            }
-        }
-    }
-
-    ctx->decode_flags = f;
-
-    ctx->state = SPNG_STATE_DECODE_INIT;
-
-    struct spng_row_info *ri = &ctx->row_info;
-    struct spng_subimage *sub = ctx->subimage;
-
-    while(!sub[ri->pass].width || !sub[ri->pass].height) ri->pass++;
-
-    if(f.interlaced) ri->row_num = adam7_y_start[ri->pass];
-
-    unsigned pixel_size = 4; /* SPNG_FMT_RGBA8 */
-
-    if(fmt == SPNG_FMT_RGBA16) pixel_size = 8;
-    else if(fmt == SPNG_FMT_RGB8) pixel_size = 3;
-    else if(fmt == SPNG_FMT_G8) pixel_size = 1;
-    else if(fmt == SPNG_FMT_GA8) pixel_size = 2;
-
-    int i;
-    for(i=ri->pass; i <= ctx->last_pass; i++)
-    {
-        if(!sub[i].scanline_width) continue;
-
-        if(fmt & (SPNG_FMT_PNG | SPNG_FMT_RAW)) sub[i].out_width = sub[i].scanline_width - 1;
-        else sub[i].out_width = (size_t)sub[i].width * pixel_size;
-
-        if(sub[i].out_width > UINT32_MAX) return decode_err(ctx, SPNG_EOVERFLOW);
-    }
-
-    /* Read the first filter byte, offsetting all reads by 1 byte.
-    The scanlines will be aligned with the start of the array with
-    the next scanline's filter byte at the end,
-    the last scanline will end up being 1 byte "shorter". */
-    ret = read_scanline_bytes(ctx, &ri->filter, 1);
-    if(ret) return decode_err(ctx, ret);
-
-    if(ri->filter > 4) return decode_err(ctx, SPNG_EFILTER);
-
-    if(flags & SPNG_DECODE_PROGRESSIVE)
-    {
-        return 0;
-    }
-
-    do
-    {
-        size_t ioffset = ri->row_num * ctx->image_width;
-
-        ret = spng_decode_row(ctx, (unsigned char*)out + ioffset, ctx->image_width);
-    }while(!ret);
-
-    if(ret != SPNG_EOI) return decode_err(ctx, ret);
-
-    return 0;
-}
-
-int spng_get_row_info(spng_ctx *ctx, struct spng_row_info *row_info)
-{
-    if(ctx == NULL || row_info == NULL || ctx->state < SPNG_STATE_DECODE_INIT) return 1;
-
-    if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI;
-
-    *row_info = ctx->row_info;
-
-    return 0;
-}
-
-static int write_chunks_before_idat(spng_ctx *ctx)
-{
-    if(ctx == NULL) return SPNG_EINTERNAL;
-    if(!ctx->encode_only) return SPNG_EINTERNAL;
-    if(!ctx->stored.ihdr) return SPNG_EINTERNAL;
-
-    int ret;
-    uint32_t i;
-    size_t length;
-    const struct spng_ihdr *ihdr = &ctx->ihdr;
-    unsigned char *data = ctx->decode_plte.raw;
-
-    ret = write_data(ctx, spng_signature, 8);
-    if(ret) return ret;
-
-    write_u32(data,     ihdr->width);
-    write_u32(data + 4, ihdr->height);
-    data[8]  = ihdr->bit_depth;
-    data[9]  = ihdr->color_type;
-    data[10] = ihdr->compression_method;
-    data[11] = ihdr->filter_method;
-    data[12] = ihdr->interlace_method;
-
-    ret = write_chunk(ctx, type_ihdr, data, 13);
-    if(ret) return ret;
-
-    if(ctx->stored.chrm)
-    {
-        write_u32(data,      ctx->chrm_int.white_point_x);
-        write_u32(data + 4,  ctx->chrm_int.white_point_y);
-        write_u32(data + 8,  ctx->chrm_int.red_x);
-        write_u32(data + 12, ctx->chrm_int.red_y);
-        write_u32(data + 16, ctx->chrm_int.green_x);
-        write_u32(data + 20, ctx->chrm_int.green_y);
-        write_u32(data + 24, ctx->chrm_int.blue_x);
-        write_u32(data + 28, ctx->chrm_int.blue_y);
-
-        ret = write_chunk(ctx, type_chrm, data, 32);
-        if(ret) return ret;
-    }
-
-    if(ctx->stored.gama)
-    {
-        write_u32(data, ctx->gama);
-
-        ret = write_chunk(ctx, type_gama, data, 4);
-        if(ret) return ret;
-    }
-
-    if(ctx->stored.iccp)
-    {
-        uLongf dest_len = compressBound((uLong)ctx->iccp.profile_len);
-
-        Bytef *buf = spng__malloc(ctx, dest_len);
-        if(buf == NULL) return SPNG_EMEM;
-
-        ret = compress2(buf, &dest_len, (void*)ctx->iccp.profile, (uLong)ctx->iccp.profile_len, Z_DEFAULT_COMPRESSION);
-
-        if(ret != Z_OK)
-        {
-            spng__free(ctx, buf);
-            return SPNG_EZLIB;
-        }
-
-        size_t name_len = strlen(ctx->iccp.profile_name);
-
-        length = name_len + 2;
-        length += dest_len;
-
-        if(dest_len > length) return SPNG_EOVERFLOW;
-
-        unsigned char *cdata = NULL;
-
-        ret = write_header(ctx, type_iccp, length, &cdata);
-
-        if(ret)
-        {
-            spng__free(ctx, buf);
-            return ret;
-        }
-
-        memcpy(cdata, ctx->iccp.profile_name, name_len + 1);
-        cdata[name_len + 1] = 0; /* compression method */
-        memcpy(cdata + name_len + 2, buf, dest_len);
-
-        spng__free(ctx, buf);
-
-        ret = finish_chunk(ctx);
-        if(ret) return ret;
-    }
-
-    if(ctx->stored.sbit)
-    {
-        switch(ctx->ihdr.color_type)
-        {
-            case SPNG_COLOR_TYPE_GRAYSCALE:
-            {
-                length = 1;
-
-                data[0] = ctx->sbit.grayscale_bits;
-
-                break;
-            }
-            case SPNG_COLOR_TYPE_TRUECOLOR:
-            case SPNG_COLOR_TYPE_INDEXED:
-            {
-                length = 3;
-
-                data[0] = ctx->sbit.red_bits;
-                data[1] = ctx->sbit.green_bits;
-                data[2] = ctx->sbit.blue_bits;
-
-                break;
-            }
-            case SPNG_COLOR_TYPE_GRAYSCALE_ALPHA:
-            {
-                length = 2;
-
-                data[0] = ctx->sbit.grayscale_bits;
-                data[1] = ctx->sbit.alpha_bits;
-
-                break;
-            }
-            case SPNG_COLOR_TYPE_TRUECOLOR_ALPHA:
-            {
-                length = 4;
-
-                data[0] = ctx->sbit.red_bits;
-                data[1] = ctx->sbit.green_bits;
-                data[2] = ctx->sbit.blue_bits;
-                data[3] = ctx->sbit.alpha_bits;
-
-                break;
-            }
-            default: return SPNG_EINTERNAL;
-        }
-
-        ret = write_chunk(ctx, type_sbit, data, length);
-        if(ret) return ret;
-    }
-
-    if(ctx->stored.srgb)
-    {
-        ret = write_chunk(ctx, type_srgb, &ctx->srgb_rendering_intent, 1);
-        if(ret) return ret;
-    }
-
-    ret = write_unknown_chunks(ctx, SPNG_AFTER_IHDR);
-    if(ret) return ret;
-
-    if(ctx->stored.plte)
-    {
-        for(i=0; i < ctx->plte.n_entries; i++)
-        {
-            data[i * 3 + 0] = ctx->plte.entries[i].red;
-            data[i * 3 + 1] = ctx->plte.entries[i].green;
-            data[i * 3 + 2] = ctx->plte.entries[i].blue;
-        }
-
-        ret = write_chunk(ctx, type_plte, data, ctx->plte.n_entries * 3);
-        if(ret) return ret;
-    }
-
-    if(ctx->stored.bkgd)
-    {
-        switch(ctx->ihdr.color_type)
-        {
-            case SPNG_COLOR_TYPE_GRAYSCALE:
-            case SPNG_COLOR_TYPE_GRAYSCALE_ALPHA:
-            {
-                length = 2;
-
-                write_u16(data, ctx->bkgd.gray);
-
-                break;
-            }
-            case SPNG_COLOR_TYPE_TRUECOLOR:
-            case SPNG_COLOR_TYPE_TRUECOLOR_ALPHA:
-            {
-                length = 6;
-
-                write_u16(data,     ctx->bkgd.red);
-                write_u16(data + 2, ctx->bkgd.green);
-                write_u16(data + 4, ctx->bkgd.blue);
-
-                break;
-            }
-            case SPNG_COLOR_TYPE_INDEXED:
-            {
-                length = 1;
-
-                data[0] = ctx->bkgd.plte_index;
-
-                break;
-            }
-            default: return SPNG_EINTERNAL;
-        }
-
-        ret = write_chunk(ctx, type_bkgd, data, length);
-        if(ret) return ret;
-    }
-
-    if(ctx->stored.hist)
-    {
-        length = ctx->plte.n_entries * 2;
-
-        for(i=0; i < ctx->plte.n_entries; i++)
-        {
-            write_u16(data + i * 2, ctx->hist.frequency[i]);
-        }
-
-        ret = write_chunk(ctx, type_hist, data, length);
-        if(ret) return ret;
-    }
-
-    if(ctx->stored.trns)
-    {
-        if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_GRAYSCALE)
-        {
-            write_u16(data, ctx->trns.gray);
-
-            ret = write_chunk(ctx, type_trns, data, 2);
-        }
-        else if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_TRUECOLOR)
-        {
-            write_u16(data,     ctx->trns.red);
-            write_u16(data + 2, ctx->trns.green);
-            write_u16(data + 4, ctx->trns.blue);
-
-            ret = write_chunk(ctx, type_trns, data, 6);
-        }
-        else if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_INDEXED)
-        {
-            ret = write_chunk(ctx, type_trns, ctx->trns.type3_alpha, ctx->trns.n_type3_entries);
-        }
-
-        if(ret) return ret;
-    }
-
-    if(ctx->stored.phys)
-    {
-        write_u32(data,     ctx->phys.ppu_x);
-        write_u32(data + 4, ctx->phys.ppu_y);
-        data[8] = ctx->phys.unit_specifier;
-
-        ret = write_chunk(ctx, type_phys, data, 9);
-        if(ret) return ret;
-    }
-
-    if(ctx->stored.splt)
-    {
-        const struct spng_splt *splt;
-        unsigned char *cdata = NULL;
-
-        uint32_t k;
-        for(i=0; i < ctx->n_splt; i++)
-        {
-            splt = &ctx->splt_list[i];
-
-            size_t name_len = strlen(splt->name);
-            length = name_len + 1;
-
-            if(splt->sample_depth == 8) length += splt->n_entries * 6 + 1;
-            else if(splt->sample_depth == 16) length += splt->n_entries * 10 + 1;
-
-            ret = write_header(ctx, type_splt, length, &cdata);
-            if(ret) return ret;
-
-            memcpy(cdata, splt->name, name_len + 1);
-            cdata += name_len + 2;
-            cdata[-1] = splt->sample_depth;
-
-            if(splt->sample_depth == 8)
-            {
-                for(k=0; k < splt->n_entries; k++)
-                {
-                    cdata[k * 6 + 0] = splt->entries[k].red;
-                    cdata[k * 6 + 1] = splt->entries[k].green;
-                    cdata[k * 6 + 2] = splt->entries[k].blue;
-                    cdata[k * 6 + 3] = splt->entries[k].alpha;
-                    write_u16(cdata + k * 6 + 4, splt->entries[k].frequency);
-                }
-            }
-            else if(splt->sample_depth == 16)
-            {
-                for(k=0; k < splt->n_entries; k++)
-                {
-                    write_u16(cdata + k * 10 + 0, splt->entries[k].red);
-                    write_u16(cdata + k * 10 + 2, splt->entries[k].green);
-                    write_u16(cdata + k * 10 + 4, splt->entries[k].blue);
-                    write_u16(cdata + k * 10 + 6, splt->entries[k].alpha);
-                    write_u16(cdata + k * 10 + 8, splt->entries[k].frequency);
-                }
-            }
-
-            ret = finish_chunk(ctx);
-            if(ret) return ret;
-        }
-    }
-
-    if(ctx->stored.time)
-    {
-        write_u16(data, ctx->time.year);
-        data[2] = ctx->time.month;
-        data[3] = ctx->time.day;
-        data[4] = ctx->time.hour;
-        data[5] = ctx->time.minute;
-        data[6] = ctx->time.second;
-
-        ret = write_chunk(ctx, type_time, data, 7);
-        if(ret) return ret;
-    }
-
-    if(ctx->stored.text)
-    {
-        unsigned char *cdata = NULL;
-        const struct spng_text2 *text;
-        const uint8_t *text_type_array[4] = { 0, type_text, type_ztxt, type_itxt };
-
-        for(i=0; i < ctx->n_text; i++)
-        {
-            text = &ctx->text_list[i];
-
-            const uint8_t *text_chunk_type = text_type_array[text->type];
-            Bytef *compressed_text = NULL;
-            size_t keyword_len = 0;
-            size_t language_tag_len = 0;
-            size_t translated_keyword_len = 0;
-            size_t compressed_length = 0;
-            size_t text_length = 0;
-
-            keyword_len = strlen(text->keyword);
-            text_length = strlen(text->text);
-
-            length = keyword_len + 1;
-
-            if(text->type == SPNG_ZTXT)
-            {
-                length += 1; /* compression method */
-            }
-            else if(text->type == SPNG_ITXT)
-            {
-                if(!text->language_tag || !text->translated_keyword) return SPNG_EINTERNAL;
-
-                language_tag_len = strlen(text->language_tag);
-                translated_keyword_len = strlen(text->translated_keyword);
-
-                length += language_tag_len;
-                if(length < language_tag_len) return SPNG_EOVERFLOW;
-
-                length += translated_keyword_len;
-                if(length < translated_keyword_len) return SPNG_EOVERFLOW;
-
-                length += 4; /* compression flag + method + nul for the two strings */
-                if(length < 4) return SPNG_EOVERFLOW;
-            }
-
-            if(text->compression_flag)
-            {
-                ret = spng__deflate_init(ctx, &ctx->text_options);
-                if(ret) return ret;
-
-                z_stream *zstream = &ctx->zstream;
-                uLongf dest_len = deflateBound(zstream, (uLong)text_length);
-
-                compressed_text = spng__malloc(ctx, dest_len);
-
-                if(compressed_text == NULL) return SPNG_EMEM;
-
-                zstream->next_in = (void*)text->text;
-                zstream->avail_in = (uInt)text_length;
-
-                zstream->next_out = compressed_text;
-                zstream->avail_out = dest_len;
-
-                ret = deflate(zstream, Z_FINISH);
-
-                if(ret != Z_STREAM_END)
-                {
-                    spng__free(ctx, compressed_text);
-                    return SPNG_EZLIB;
-                }
-
-                compressed_length = zstream->total_out;
-
-                length += compressed_length;
-                if(length < compressed_length) return SPNG_EOVERFLOW;
-            }
-            else
-            {
-                text_length = strlen(text->text);
-
-                length += text_length;
-                if(length < text_length) return SPNG_EOVERFLOW;
-            }
-
-            ret = write_header(ctx, text_chunk_type, length, &cdata);
-            if(ret)
-            {
-                spng__free(ctx, compressed_text);
-                return ret;
-            }
-
-            memcpy(cdata, text->keyword, keyword_len + 1);
-            cdata += keyword_len + 1;
-
-            if(text->type == SPNG_ITXT)
-            {
-                cdata[0] = text->compression_flag;
-                cdata[1] = 0; /* compression method */
-                cdata += 2;
-
-                memcpy(cdata, text->language_tag, language_tag_len + 1);
-                cdata += language_tag_len + 1;
-
-                memcpy(cdata, text->translated_keyword, translated_keyword_len + 1);
-                cdata += translated_keyword_len + 1;
-            }
-            else if(text->type == SPNG_ZTXT)
-            {
-                cdata[0] = 0; /* compression method */
-                cdata++;
-            }
-
-            if(text->compression_flag) memcpy(cdata, compressed_text, compressed_length);
-            else memcpy(cdata, text->text, text_length);
-
-            spng__free(ctx, compressed_text);
-
-            ret = finish_chunk(ctx);
-            if(ret) return ret;
-        }
-    }
-
-    if(ctx->stored.offs)
-    {
-        write_s32(data,     ctx->offs.x);
-        write_s32(data + 4, ctx->offs.y);
-        data[8] = ctx->offs.unit_specifier;
-
-        ret = write_chunk(ctx, type_offs, data, 9);
-        if(ret) return ret;
-    }
-
-    if(ctx->stored.exif)
-    {
-        ret = write_chunk(ctx, type_exif, ctx->exif.data, ctx->exif.length);
-        if(ret) return ret;
-    }
-
-    ret = write_unknown_chunks(ctx, SPNG_AFTER_PLTE);
-    if(ret) return ret;
-
-    return 0;
-}
-
-static int write_chunks_after_idat(spng_ctx *ctx)
-{
-    if(ctx == NULL) return SPNG_EINTERNAL;
-
-    int ret = write_unknown_chunks(ctx, SPNG_AFTER_IDAT);
-    if(ret) return ret;
-
-    return write_iend(ctx);
-}
-
-/* Compress and write scanline to IDAT stream */
-static int write_idat_bytes(spng_ctx *ctx, const void *scanline, size_t len, int flush)
-{
-    if(ctx == NULL || scanline == NULL) return SPNG_EINTERNAL;
-    if(len > UINT_MAX) return SPNG_EINTERNAL;
-
-    int ret = 0;
-    unsigned char *data = NULL;
-    z_stream *zstream = &ctx->zstream;
-    uint32_t idat_length = SPNG_WRITE_SIZE;
-
-    zstream->next_in = scanline;
-    zstream->avail_in = (uInt)len;
-
-    do
-    {
-        ret = deflate(zstream, flush);
-
-        if(zstream->avail_out == 0)
-        {
-            ret = finish_chunk(ctx);
-            if(ret) return encode_err(ctx, ret);
-
-            ret = write_header(ctx, type_idat, idat_length, &data);
-            if(ret) return encode_err(ctx, ret);
-
-            zstream->next_out = data;
-            zstream->avail_out = idat_length;
-        }
-
-    }while(zstream->avail_in);
-
-    if(ret != Z_OK) return SPNG_EZLIB;
-
-    return 0;
-}
-
-static int finish_idat(spng_ctx *ctx)
-{
-    int ret = 0;
-    unsigned char *data = NULL;
-    z_stream *zstream = &ctx->zstream;
-    uint32_t idat_length = SPNG_WRITE_SIZE;
-
-    while(ret != Z_STREAM_END)
-    {
-        ret = deflate(zstream, Z_FINISH);
-
-        if(ret)
-        {
-            if(ret == Z_STREAM_END) break;
-
-            if(ret != Z_BUF_ERROR) return SPNG_EZLIB;
-        }
-
-        if(zstream->avail_out == 0)
-        {
-            ret = finish_chunk(ctx);
-            if(ret) return encode_err(ctx, ret);
-
-            ret = write_header(ctx, type_idat, idat_length, &data);
-            if(ret) return encode_err(ctx, ret);
-
-            zstream->next_out = data;
-            zstream->avail_out = idat_length;
-        }
-    }
-
-    uint32_t trimmed_length = idat_length - zstream->avail_out;
-
-    ret = trim_chunk(ctx, trimmed_length);
-    if(ret) return ret;
-
-    return finish_chunk(ctx);
-}
-
-static int encode_scanline(spng_ctx *ctx, const void *scanline, size_t len)
-{
-    if(ctx == NULL || scanline == NULL) return SPNG_EINTERNAL;
-
-    int ret, pass = ctx->row_info.pass;
-    uint8_t filter = 0;
-    struct spng_row_info *ri = &ctx->row_info;
-    const struct spng_subimage *sub = ctx->subimage;
-    struct encode_flags f = ctx->encode_flags;
-    unsigned char *filtered_scanline = ctx->filtered_scanline;
-    size_t scanline_width = sub[pass].scanline_width;
-
-    if(len < scanline_width - 1) return SPNG_EINTERNAL;
-
-    /* encode_row() interlaces directly to ctx->scanline */
-    if(scanline != ctx->scanline) memcpy(ctx->scanline, scanline, scanline_width - 1);
-
-    if(f.to_bigendian) u16_row_to_bigendian(ctx->scanline, scanline_width - 1);
-    const int requires_previous = f.filter_choice & (SPNG_FILTER_CHOICE_UP | SPNG_FILTER_CHOICE_AVG | SPNG_FILTER_CHOICE_PAETH);
-
-    /* XXX: exclude 'requires_previous' filters by default for first scanline? */
-    if(!ri->scanline_idx && requires_previous)
-    {
-        /* prev_scanline is all zeros for the first scanline */
-        memset(ctx->prev_scanline, 0, scanline_width);
-    }
-
-    filter = get_best_filter(ctx->prev_scanline, ctx->scanline, scanline_width, ctx->bytes_per_pixel, f.filter_choice);
-
-    if(!filter) filtered_scanline = ctx->scanline;
-
-    filtered_scanline[-1] = filter;
-
-    if(filter)
-    {
-        ret = filter_scanline(filtered_scanline, ctx->prev_scanline, ctx->scanline, scanline_width, ctx->bytes_per_pixel, filter);
-        if(ret) return encode_err(ctx, ret);
-    }
-
-    ret = write_idat_bytes(ctx, filtered_scanline - 1, scanline_width, Z_NO_FLUSH);
-    if(ret) return encode_err(ctx, ret);
-
-    /* The previous scanline is always unfiltered */
-    void *t = ctx->prev_scanline;
-    ctx->prev_scanline = ctx->scanline;
-    ctx->scanline = t;
-
-    ret = update_row_info(ctx);
-
-    if(ret == SPNG_EOI)
-    {
-        int error = finish_idat(ctx);
-        if(error) encode_err(ctx, error);
-
-        if(f.finalize)
-        {
-            error = spng_encode_chunks(ctx);
-            if(error) return encode_err(ctx, error);
-        }
-    }
-
-    return ret;
-}
-
-static int encode_row(spng_ctx *ctx, const void *row, size_t len)
-{
-    if(ctx == NULL || row == NULL) return SPNG_EINTERNAL;
-
-    const int pass = ctx->row_info.pass;
-
-    if(!ctx->ihdr.interlace_method || pass == 6) return encode_scanline(ctx, row, len);
-
-    uint32_t k;
-    const unsigned pixel_size = ctx->pixel_size;
-    const unsigned bit_depth = ctx->ihdr.bit_depth;
-
-    if(bit_depth < 8)
-    {
-        const unsigned samples_per_byte = 8 / bit_depth;
-        const uint8_t mask = (1 << bit_depth) - 1;
-        const unsigned initial_shift = 8 - bit_depth;
-        unsigned shift_amount = initial_shift;
-
-        unsigned char *scanline = ctx->scanline;
-        const unsigned char *row_uc = row;
-        uint8_t sample;
-
-        memset(scanline, 0, ctx->subimage[pass].scanline_width);
-
-        for(k=0; k < ctx->subimage[pass].width; k++)
-        {
-            size_t ioffset = adam7_x_start[pass] + k * adam7_x_delta[pass];
-
-            sample = row_uc[ioffset / samples_per_byte];
-
-            sample = sample >> (initial_shift - ioffset * bit_depth % 8);
-            sample = sample & mask;
-            sample = sample << shift_amount;
-
-            scanline[0] |= sample;
-
-            shift_amount -= bit_depth;
-
-            if(shift_amount > 7)
-            {
-                shift_amount = initial_shift;
-                scanline++;
-            }
-        }
-
-        return encode_scanline(ctx, ctx->scanline, len);
-    }
-
-    for(k=0; k < ctx->subimage[pass].width; k++)
-    {
-        size_t ioffset = (adam7_x_start[pass] + (size_t) k * adam7_x_delta[pass]) * pixel_size;
-
-        memcpy(ctx->scanline + k * pixel_size, (unsigned char*)row + ioffset, pixel_size);
-    }
-
-    return encode_scanline(ctx, ctx->scanline, len);
-}
-
-int spng_encode_scanline(spng_ctx *ctx, const void *scanline, size_t len)
-{
-    if(ctx == NULL || scanline == NULL) return SPNG_EINVAL;
-    if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI;
-    if(len < (ctx->subimage[ctx->row_info.pass].scanline_width -1) ) return SPNG_EBUFSIZ;
-
-    return encode_scanline(ctx, scanline, len);
-}
-
-int spng_encode_row(spng_ctx *ctx, const void *row, size_t len)
-{
-    if(ctx == NULL || row == NULL) return SPNG_EINVAL;
-    if(ctx->state >= SPNG_STATE_EOI) return SPNG_EOI;
-    if(len < ctx->image_width) return SPNG_EBUFSIZ;
-
-    return encode_row(ctx, row, len);
-}
-
-int spng_encode_chunks(spng_ctx *ctx)
-{
-    if(ctx == NULL) return 1;
-    if(!ctx->state) return SPNG_EBADSTATE;
-    if(ctx->state < SPNG_STATE_OUTPUT) return SPNG_ENODST;
-    if(!ctx->encode_only) return SPNG_ECTXTYPE;
-
-    int ret = 0;
-
-    if(ctx->state < SPNG_STATE_FIRST_IDAT)
-    {
-        if(!ctx->stored.ihdr) return SPNG_ENOIHDR;
-
-        ret = write_chunks_before_idat(ctx);
-        if(ret) return encode_err(ctx, ret);
-
-        ctx->state = SPNG_STATE_FIRST_IDAT;
-    }
-    else if(ctx->state == SPNG_STATE_FIRST_IDAT)
-    {
-        return 0;
-    }
-    else if(ctx->state == SPNG_STATE_EOI)
-    {
-        ret = write_chunks_after_idat(ctx);
-        if(ret) return encode_err(ctx, ret);
-
-        ctx->state = SPNG_STATE_IEND;
-    }
-    else return SPNG_EOPSTATE;
-
-    return 0;
-}
-
-int spng_encode_image(spng_ctx *ctx, const void *img, size_t len, int fmt, int flags)
-{
-    if(ctx == NULL) return 1;
-    if(!ctx->state) return SPNG_EBADSTATE;
-    if(!ctx->encode_only) return SPNG_ECTXTYPE;
-    if(!ctx->stored.ihdr) return SPNG_ENOIHDR;
-    if( !(fmt == SPNG_FMT_PNG || fmt == SPNG_FMT_RAW) ) return SPNG_EFMT;
-
-    int ret = 0;
-    const struct spng_ihdr *ihdr = &ctx->ihdr;
-    struct encode_flags *encode_flags = &ctx->encode_flags;
-
-    if(ihdr->color_type == SPNG_COLOR_TYPE_INDEXED && !ctx->stored.plte) return SPNG_ENOPLTE;
-
-    ret = calculate_image_width(ihdr, fmt, &ctx->image_width);
-    if(ret) return encode_err(ctx, ret);
-
-    if(ctx->image_width > SIZE_MAX / ihdr->height) ctx->image_size = 0; /* overflow */
-    else ctx->image_size = ctx->image_width * ihdr->height;
-
-    if( !(flags & SPNG_ENCODE_PROGRESSIVE) )
-    {
-        if(img == NULL) return 1;
-        if(!ctx->image_size) return SPNG_EOVERFLOW;
-        if(len != ctx->image_size) return SPNG_EBUFSIZ;
-    }
-
-    ret = spng_encode_chunks(ctx);
-    if(ret) return encode_err(ctx, ret);
-
-    ret = calculate_subimages(ctx);
-    if(ret) return encode_err(ctx, ret);
-
-    if(ihdr->bit_depth < 8) ctx->bytes_per_pixel = 1;
-    else ctx->bytes_per_pixel = num_channels(ihdr) * (ihdr->bit_depth / 8);
-
-    if(spng__optimize(SPNG_FILTER_CHOICE))
-    {
-        /* Filtering would make no difference */
-        if(!ctx->image_options.compression_level)
-        {
-            encode_flags->filter_choice = SPNG_DISABLE_FILTERING;
-        }
-
-        /* Palette indices and low bit-depth images do not benefit from filtering */
-        if(ihdr->color_type == SPNG_COLOR_TYPE_INDEXED || ihdr->bit_depth < 8)
-        {
-            encode_flags->filter_choice = SPNG_DISABLE_FILTERING;
-        }
-    }
-
-    /* This is technically the same as disabling filtering */
-    if(encode_flags->filter_choice == SPNG_FILTER_CHOICE_NONE)
-    {
-        encode_flags->filter_choice = SPNG_DISABLE_FILTERING;
-    }
-
-    if(!encode_flags->filter_choice && spng__optimize(SPNG_IMG_COMPRESSION_STRATEGY))
-    {
-        ctx->image_options.strategy = Z_DEFAULT_STRATEGY;
-    }
-
-    ret = spng__deflate_init(ctx, &ctx->image_options);
-    if(ret) return encode_err(ctx, ret);
-
-    size_t scanline_buf_size = ctx->subimage[ctx->widest_pass].scanline_width;
-
-    scanline_buf_size += 32;
-
-    if(scanline_buf_size < 32) return SPNG_EOVERFLOW;
-
-    ctx->scanline_buf = spng__malloc(ctx, scanline_buf_size);
-    ctx->prev_scanline_buf = spng__malloc(ctx, scanline_buf_size);
-
-    if(ctx->scanline_buf == NULL || ctx->prev_scanline_buf == NULL) return encode_err(ctx, SPNG_EMEM);
-
-    /* Maintain alignment for pixels, filter at [-1] */
-    ctx->scanline = ctx->scanline_buf + 16;
-    ctx->prev_scanline = ctx->prev_scanline_buf + 16;
-
-    if(encode_flags->filter_choice)
-    {
-        ctx->filtered_scanline_buf = spng__malloc(ctx, scanline_buf_size);
-        if(ctx->filtered_scanline_buf == NULL) return encode_err(ctx, SPNG_EMEM);
-
-        ctx->filtered_scanline = ctx->filtered_scanline_buf + 16;
-    }
-
-    struct spng_subimage *sub = ctx->subimage;
-    struct spng_row_info *ri = &ctx->row_info;
-
-    ctx->fmt = fmt;
-
-    z_stream *zstream = &ctx->zstream;
-    zstream->avail_out = SPNG_WRITE_SIZE;
-
-    ret = write_header(ctx, type_idat, zstream->avail_out, &zstream->next_out);
-    if(ret) return encode_err(ctx, ret);
-
-    if(ihdr->interlace_method) encode_flags->interlace = 1;
-
-    if(fmt & (SPNG_FMT_PNG | SPNG_FMT_RAW) ) encode_flags->same_layout = 1;
-
-    if(ihdr->bit_depth == 16 && fmt != SPNG_FMT_RAW) encode_flags->to_bigendian = 1;
-
-    if(flags & SPNG_ENCODE_FINALIZE) encode_flags->finalize = 1;
-
-    while(!sub[ri->pass].width || !sub[ri->pass].height) ri->pass++;
-
-    if(encode_flags->interlace) ri->row_num = adam7_y_start[ri->pass];
-
-    ctx->pixel_size = 4; /* SPNG_FMT_RGBA8 */
-
-    if(fmt == SPNG_FMT_RGBA16) ctx->pixel_size = 8;
-    else if(fmt == SPNG_FMT_RGB8) ctx->pixel_size = 3;
-    else if(fmt == SPNG_FMT_G8) ctx->pixel_size = 1;
-    else if(fmt == SPNG_FMT_GA8) ctx->pixel_size = 2;
-    else if(fmt & (SPNG_FMT_PNG | SPNG_FMT_RAW)) ctx->pixel_size = ctx->bytes_per_pixel;
-
-    ctx->state = SPNG_STATE_ENCODE_INIT;
-
-    if(flags & SPNG_ENCODE_PROGRESSIVE)
-    {
-        encode_flags->progressive = 1;
-
-        return 0;
-    }
-
-    do
-    {
-        size_t ioffset = ri->row_num * ctx->image_width;
-
-        ret = encode_row(ctx, (unsigned char*)img + ioffset, ctx->image_width);
-
-    }while(!ret);
-
-    if(ret != SPNG_EOI) return encode_err(ctx, ret);
-
-    return 0;
-}
-
-spng_ctx *spng_ctx_new(int flags)
-{
-    struct spng_alloc alloc =
-    {
-        .malloc_fn = malloc,
-        .realloc_fn = realloc,
-        .calloc_fn = calloc,
-        .free_fn = free
-    };
-
-    return spng_ctx_new2(&alloc, flags);
-}
-
-spng_ctx *spng_ctx_new2(struct spng_alloc *alloc, int flags)
-{
-    if(alloc == NULL) return NULL;
-    if(flags != (flags & SPNG__CTX_FLAGS_ALL)) return NULL;
-
-    if(alloc->malloc_fn == NULL) return NULL;
-    if(alloc->realloc_fn == NULL) return NULL;
-    if(alloc->calloc_fn == NULL) return NULL;
-    if(alloc->free_fn == NULL) return NULL;
-
-    spng_ctx *ctx = alloc->calloc_fn(1, sizeof(spng_ctx));
-    if(ctx == NULL) return NULL;
-
-    ctx->alloc = *alloc;
-
-    ctx->max_width = spng_u32max;
-    ctx->max_height = spng_u32max;
-
-    ctx->max_chunk_size = spng_u32max;
-    ctx->chunk_cache_limit = SIZE_MAX;
-    ctx->chunk_count_limit = SPNG_MAX_CHUNK_COUNT;
-
-    ctx->state = SPNG_STATE_INIT;
-
-    ctx->crc_action_critical = SPNG_CRC_ERROR;
-    ctx->crc_action_ancillary = SPNG_CRC_DISCARD;
-
-    const struct spng__zlib_options image_defaults =
-    {
-        .compression_level = Z_DEFAULT_COMPRESSION,
-        .window_bits = 15,
-        .mem_level = 8,
-        .strategy = Z_FILTERED,
-        .data_type = 0 /* Z_BINARY */
-    };
-
-    const struct spng__zlib_options text_defaults =
-    {
-        .compression_level = Z_DEFAULT_COMPRESSION,
-        .window_bits = 15,
-        .mem_level = 8,
-        .strategy = Z_DEFAULT_STRATEGY,
-        .data_type = 1 /* Z_TEXT */
-    };
-
-    ctx->image_options = image_defaults;
-    ctx->text_options = text_defaults;
-
-    ctx->optimize_option = ~0;
-    ctx->encode_flags.filter_choice = SPNG_FILTER_CHOICE_ALL;
-
-    ctx->flags = flags;
-
-    if(flags & SPNG_CTX_ENCODER) ctx->encode_only = 1;
-
-    return ctx;
-}
-
-void spng_ctx_free(spng_ctx *ctx)
-{
-    if(ctx == NULL) return;
-
-    if(ctx->streaming && ctx->stream_buf != NULL) spng__free(ctx, ctx->stream_buf);
-
-    if(!ctx->user.exif) spng__free(ctx, ctx->exif.data);
-
-    if(!ctx->user.iccp) spng__free(ctx, ctx->iccp.profile);
-
-    uint32_t i;
-
-    if(ctx->splt_list != NULL && !ctx->user.splt)
-    {
-        for(i=0; i < ctx->n_splt; i++)
-        {
-            spng__free(ctx, ctx->splt_list[i].entries);
-        }
-        spng__free(ctx, ctx->splt_list);
-    }
-
-    if(ctx->text_list != NULL)
-    {
-        for(i=0; i< ctx->n_text; i++)
-        {
-            if(ctx->user.text) break;
-
-            spng__free(ctx, ctx->text_list[i].keyword);
-            if(ctx->text_list[i].compression_flag) spng__free(ctx, ctx->text_list[i].text);
-        }
-        spng__free(ctx, ctx->text_list);
-    }
-
-    if(ctx->chunk_list != NULL && !ctx->user.unknown)
-    {
-        for(i=0; i< ctx->n_chunks; i++)
-        {
-            spng__free(ctx, ctx->chunk_list[i].data);
-        }
-        spng__free(ctx, ctx->chunk_list);
-    }
-
-    if(ctx->deflate) deflateEnd(&ctx->zstream);
-    else inflateEnd(&ctx->zstream);
-
-    if(!ctx->user_owns_out_png) spng__free(ctx, ctx->out_png);
-
-    spng__free(ctx, ctx->gamma_lut16);
-
-    spng__free(ctx, ctx->row_buf);
-    spng__free(ctx, ctx->scanline_buf);
-    spng__free(ctx, ctx->prev_scanline_buf);
-    spng__free(ctx, ctx->filtered_scanline_buf);
-
-    spng_free_fn *free_fn = ctx->alloc.free_fn;
-
-    memset(ctx, 0, sizeof(spng_ctx));
-
-    free_fn(ctx);
-}
-
-static int buffer_read_fn(spng_ctx *ctx, void *user, void *data, size_t n)
-{
-    if(n > ctx->bytes_left) return SPNG_IO_EOF;
-
-    (void)user;
-    (void)data;
-    ctx->data = ctx->data + ctx->last_read_size;
-
-    ctx->last_read_size = n;
-    ctx->bytes_left -= n;
-
-    return 0;
-}
-
-static int file_read_fn(spng_ctx *ctx, void *user, void *data, size_t n)
-{
-    FILE *file = user;
-    (void)ctx;
-
-    if(fread(data, n, 1, file) != 1)
-    {
-        if(feof(file)) return SPNG_IO_EOF;
-        else return SPNG_IO_ERROR;
-    }
-
-    return 0;
-}
-
-static int file_write_fn(spng_ctx *ctx, void *user, void *data, size_t n)
-{
-    FILE *file = user;
-    (void)ctx;
-
-    if(fwrite(data, n, 1, file) != 1) return SPNG_IO_ERROR;
-
-    return 0;
-}
-
-int spng_set_png_buffer(spng_ctx *ctx, const void *buf, size_t size)
-{
-    if(ctx == NULL || buf == NULL) return 1;
-    if(!ctx->state) return SPNG_EBADSTATE;
-    if(ctx->encode_only) return SPNG_ECTXTYPE; /* not supported */
-
-    if(ctx->data != NULL) return SPNG_EBUF_SET;
-
-    ctx->data = buf;
-    ctx->png_base = buf;
-    ctx->data_size = size;
-    ctx->bytes_left = size;
-
-    ctx->read_fn = buffer_read_fn;
-
-    ctx->state = SPNG_STATE_INPUT;
-
-    return 0;
-}
-
-int spng_set_png_stream(spng_ctx *ctx, spng_rw_fn *rw_func, void *user)
-{
-    if(ctx == NULL || rw_func == NULL) return 1;
-    if(!ctx->state) return SPNG_EBADSTATE;
-
-    /* SPNG_STATE_OUTPUT shares the same value */
-    if(ctx->state >= SPNG_STATE_INPUT) return SPNG_EBUF_SET;
-
-    if(ctx->encode_only)
-    {
-        if(ctx->out_png != NULL) return SPNG_EBUF_SET;
-
-        ctx->write_fn = rw_func;
-        ctx->write_ptr = ctx->stream_buf;
-
-        ctx->state = SPNG_STATE_OUTPUT;
-    }
-    else
-    {
-        ctx->stream_buf = spng__malloc(ctx, SPNG_READ_SIZE);
-        if(ctx->stream_buf == NULL) return SPNG_EMEM;
-
-        ctx->read_fn = rw_func;
-        ctx->data = ctx->stream_buf;
-        ctx->data_size = SPNG_READ_SIZE;
-
-        ctx->state = SPNG_STATE_INPUT;
-    }
-
-    ctx->stream_user_ptr = user;
-
-    ctx->streaming = 1;
-
-    return 0;
-}
-
-int spng_set_png_file(spng_ctx *ctx, FILE *file)
-{
-    if(file == NULL) return 1;
-
-    if(ctx->encode_only) return spng_set_png_stream(ctx, file_write_fn, file);
-
-    return spng_set_png_stream(ctx, file_read_fn, file);
-}
-
-void *spng_get_png_buffer(spng_ctx *ctx, size_t *len, int *error)
-{
-    int tmp = 0;
-    error = error ? error : &tmp;
-    *error = 0;
-
-    if(ctx == NULL || !len) *error = SPNG_EINVAL;
-
-    if(*error) return NULL;
-
-    if(!ctx->encode_only) *error = SPNG_ECTXTYPE;
-    else if(!ctx->state) *error = SPNG_EBADSTATE;
-    else if(!ctx->internal_buffer) *error = SPNG_EOPSTATE;
-    else if(ctx->state < SPNG_STATE_EOI) *error = SPNG_EOPSTATE;
-    else if(ctx->state != SPNG_STATE_IEND) *error = SPNG_ENOTFINAL;
-
-    if(*error) return NULL;
-
-    ctx->user_owns_out_png = 1;
-
-    *len = ctx->bytes_encoded;
-
-    return ctx->out_png;
-}
-
-int spng_set_image_limits(spng_ctx *ctx, uint32_t width, uint32_t height)
-{
-    if(ctx == NULL) return 1;
-
-    if(width > spng_u32max || height > spng_u32max) return 1;
-
-    ctx->max_width = width;
-    ctx->max_height = height;
-
-    return 0;
-}
-
-int spng_get_image_limits(spng_ctx *ctx, uint32_t *width, uint32_t *height)
-{
-    if(ctx == NULL || width == NULL || height == NULL) return 1;
-
-    *width = ctx->max_width;
-    *height = ctx->max_height;
-
-    return 0;
-}
-
-int spng_set_chunk_limits(spng_ctx *ctx, size_t chunk_size, size_t cache_limit)
-{
-    if(ctx == NULL || chunk_size > spng_u32max || chunk_size > cache_limit) return 1;
-
-    ctx->max_chunk_size = chunk_size;
-
-    ctx->chunk_cache_limit = cache_limit;
-
-    return 0;
-}
-
-int spng_get_chunk_limits(spng_ctx *ctx, size_t *chunk_size, size_t *cache_limit)
-{
-    if(ctx == NULL || chunk_size == NULL || cache_limit == NULL) return 1;
-
-    *chunk_size = ctx->max_chunk_size;
-
-    *cache_limit = ctx->chunk_cache_limit;
-
-    return 0;
-}
-
-int spng_set_crc_action(spng_ctx *ctx, int critical, int ancillary)
-{
-    if(ctx == NULL) return 1;
-    if(ctx->encode_only) return SPNG_ECTXTYPE;
-
-    if(critical > 2 || critical < 0) return 1;
-    if(ancillary > 2 || ancillary < 0) return 1;
-
-    if(critical == SPNG_CRC_DISCARD) return 1;
-
-    ctx->crc_action_critical = critical;
-    ctx->crc_action_ancillary = ancillary;
-
-    return 0;
-}
-
-int spng_set_option(spng_ctx *ctx, enum spng_option option, int value)
-{
-    if(ctx == NULL) return 1;
-    if(!ctx->state) return SPNG_EBADSTATE;
-
-    switch(option)
-    {
-        case SPNG_KEEP_UNKNOWN_CHUNKS:
-        {
-            ctx->keep_unknown = value ? 1 : 0;
-            break;
-        }
-        case SPNG_IMG_COMPRESSION_LEVEL:
-        {
-            ctx->image_options.compression_level = value;
-            break;
-        }
-        case SPNG_IMG_WINDOW_BITS:
-        {
-            ctx->image_options.window_bits = value;
-            break;
-        }
-        case SPNG_IMG_MEM_LEVEL:
-        {
-            ctx->image_options.mem_level = value;
-            break;
-        }
-        case SPNG_IMG_COMPRESSION_STRATEGY:
-        {
-            ctx->image_options.strategy = value;
-            break;
-        }
-        case SPNG_TEXT_COMPRESSION_LEVEL:
-        {
-            ctx->text_options.compression_level = value;
-            break;
-        }
-        case SPNG_TEXT_WINDOW_BITS:
-        {
-            ctx->text_options.window_bits = value;
-            break;
-        }
-        case SPNG_TEXT_MEM_LEVEL:
-        {
-            ctx->text_options.mem_level = value;
-            break;
-        }
-        case SPNG_TEXT_COMPRESSION_STRATEGY:
-        {
-            ctx->text_options.strategy = value;
-            break;
-        }
-        case SPNG_FILTER_CHOICE:
-        {
-            if(value & ~SPNG_FILTER_CHOICE_ALL) return 1;
-            ctx->encode_flags.filter_choice = value;
-            break;
-        }
-        case SPNG_CHUNK_COUNT_LIMIT:
-        {
-            if(value < 0) return 1;
-            if(value > (int)ctx->chunk_count_total) return 1;
-            ctx->chunk_count_limit = value;
-            break;
-        }
-        case SPNG_ENCODE_TO_BUFFER:
-        {
-            if(value < 0) return 1;
-            if(!ctx->encode_only) return SPNG_ECTXTYPE;
-            if(ctx->state >= SPNG_STATE_OUTPUT) return SPNG_EOPSTATE;
-
-            if(!value) break;
-
-            ctx->internal_buffer = 1;
-            ctx->state = SPNG_STATE_OUTPUT;
-
-            break;
-        }
-        default: return 1;
-    }
-
-    /* Option can no longer be overriden by the library */
-    if(option < 32) ctx->optimize_option &= ~(1 << option);
-
-    return 0;
-}
-
-int spng_get_option(spng_ctx *ctx, enum spng_option option, int *value)
-{
-    if(ctx == NULL || value == NULL) return 1;
-    if(!ctx->state) return SPNG_EBADSTATE;
-
-    switch(option)
-    {
-        case SPNG_KEEP_UNKNOWN_CHUNKS:
-        {
-            *value = ctx->keep_unknown;
-            break;
-        }
-        case SPNG_IMG_COMPRESSION_LEVEL:
-        {
-            *value = ctx->image_options.compression_level;
-            break;
-        }
-            case SPNG_IMG_WINDOW_BITS:
-        {
-            *value = ctx->image_options.window_bits;
-            break;
-        }
-        case SPNG_IMG_MEM_LEVEL:
-        {
-            *value = ctx->image_options.mem_level;
-            break;
-        }
-        case SPNG_IMG_COMPRESSION_STRATEGY:
-        {
-            *value = ctx->image_options.strategy;
-            break;
-        }
-        case SPNG_TEXT_COMPRESSION_LEVEL:
-        {
-            *value = ctx->text_options.compression_level;
-            break;
-        }
-            case SPNG_TEXT_WINDOW_BITS:
-        {
-            *value = ctx->text_options.window_bits;
-            break;
-        }
-        case SPNG_TEXT_MEM_LEVEL:
-        {
-            *value = ctx->text_options.mem_level;
-            break;
-        }
-        case SPNG_TEXT_COMPRESSION_STRATEGY:
-        {
-            *value = ctx->text_options.strategy;
-            break;
-        }
-        case SPNG_FILTER_CHOICE:
-        {
-            *value = ctx->encode_flags.filter_choice;
-            break;
-        }
-        case SPNG_CHUNK_COUNT_LIMIT:
-        {
-            *value = ctx->chunk_count_limit;
-            break;
-        }
-        case SPNG_ENCODE_TO_BUFFER:
-        {
-            if(ctx->internal_buffer) *value = 1;
-            else *value = 0;
-
-            break;
-        }
-        default: return 1;
-    }
-
-    return 0;
-}
-
-int spng_decoded_image_size(spng_ctx *ctx, int fmt, size_t *len)
-{
-    if(ctx == NULL || len == NULL) return 1;
-
-    int ret = read_chunks(ctx, 1);
-    if(ret) return ret;
-
-    ret = check_decode_fmt(&ctx->ihdr, fmt);
-    if(ret) return ret;
-
-    return calculate_image_size(&ctx->ihdr, fmt, len);
-}
-
-int spng_get_ihdr(spng_ctx *ctx, struct spng_ihdr *ihdr)
-{
-    if(ctx == NULL) return 1;
-    int ret = read_chunks(ctx, 1);
-    if(ret) return ret;
-    if(ihdr == NULL) return 1;
-
-    *ihdr = ctx->ihdr;
-
-    return 0;
-}
-
-int spng_get_plte(spng_ctx *ctx, struct spng_plte *plte)
-{
-    SPNG_GET_CHUNK_BOILERPLATE(plte);
-
-    *plte = ctx->plte;
-
-    return 0;
-}
-
-int spng_get_trns(spng_ctx *ctx, struct spng_trns *trns)
-{
-    SPNG_GET_CHUNK_BOILERPLATE(trns);
-
-    *trns = ctx->trns;
-
-    return 0;
-}
-
-int spng_get_chrm(spng_ctx *ctx, struct spng_chrm *chrm)
-{
-    SPNG_GET_CHUNK_BOILERPLATE(chrm);
-
-    chrm->white_point_x = (double)ctx->chrm_int.white_point_x / 100000.0;
-    chrm->white_point_y = (double)ctx->chrm_int.white_point_y / 100000.0;
-    chrm->red_x = (double)ctx->chrm_int.red_x / 100000.0;
-    chrm->red_y = (double)ctx->chrm_int.red_y / 100000.0;
-    chrm->blue_y = (double)ctx->chrm_int.blue_y / 100000.0;
-    chrm->blue_x = (double)ctx->chrm_int.blue_x / 100000.0;
-    chrm->green_x = (double)ctx->chrm_int.green_x / 100000.0;
-    chrm->green_y = (double)ctx->chrm_int.green_y / 100000.0;
-
-    return 0;
-}
-
-int spng_get_chrm_int(spng_ctx *ctx, struct spng_chrm_int *chrm)
-{
-    SPNG_GET_CHUNK_BOILERPLATE(chrm);
-
-    *chrm = ctx->chrm_int;
-
-    return 0;
-}
-
-int spng_get_gama(spng_ctx *ctx, double *gamma)
-{
-    double *gama = gamma;
-    SPNG_GET_CHUNK_BOILERPLATE(gama);
-
-    *gama = (double)ctx->gama / 100000.0;
-
-    return 0;
-}
-
-int spng_get_gama_int(spng_ctx *ctx, uint32_t *gama_int)
-{
-    uint32_t *gama = gama_int;
-    SPNG_GET_CHUNK_BOILERPLATE(gama);
-
-    *gama_int = ctx->gama;
-
-    return 0;
-}
-
-int spng_get_iccp(spng_ctx *ctx, struct spng_iccp *iccp)
-{
-    SPNG_GET_CHUNK_BOILERPLATE(iccp);
-
-    *iccp = ctx->iccp;
-
-    return 0;
-}
-
-int spng_get_sbit(spng_ctx *ctx, struct spng_sbit *sbit)
-{
-    SPNG_GET_CHUNK_BOILERPLATE(sbit);
-
-    *sbit = ctx->sbit;
-
-    return 0;
-}
-
-int spng_get_srgb(spng_ctx *ctx, uint8_t *rendering_intent)
-{
-    uint8_t *srgb = rendering_intent;
-    SPNG_GET_CHUNK_BOILERPLATE(srgb);
-
-    *srgb = ctx->srgb_rendering_intent;
-
-    return 0;
-}
-
-int spng_get_text(spng_ctx *ctx, struct spng_text *text, uint32_t *n_text)
-{
-    if(ctx == NULL) return 1;
-    int ret = read_chunks(ctx, 0);
-    if(ret) return ret;
-    if(!ctx->stored.text) return SPNG_ECHUNKAVAIL;
-    if(n_text == NULL) return 1;
-
-    if(text == NULL)
-    {
-        *n_text = ctx->n_text;
-        return 0;
-    }
-
-    if(*n_text < ctx->n_text) return 1;
-
-    uint32_t i;
-    for(i=0; i< ctx->n_text; i++)
-    {
-        text[i].type = ctx->text_list[i].type;
-        memcpy(&text[i].keyword, ctx->text_list[i].keyword, strlen(ctx->text_list[i].keyword) + 1);
-        text[i].compression_method = 0;
-        text[i].compression_flag = ctx->text_list[i].compression_flag;
-        text[i].language_tag = ctx->text_list[i].language_tag;
-        text[i].translated_keyword = ctx->text_list[i].translated_keyword;
-        text[i].length = ctx->text_list[i].text_length;
-        text[i].text = ctx->text_list[i].text;
-    }
-
-    return ret;
-}
-
-int spng_get_bkgd(spng_ctx *ctx, struct spng_bkgd *bkgd)
-{
-    SPNG_GET_CHUNK_BOILERPLATE(bkgd);
-
-    *bkgd = ctx->bkgd;
-
-    return 0;
-}
-
-int spng_get_hist(spng_ctx *ctx, struct spng_hist *hist)
-{
-    SPNG_GET_CHUNK_BOILERPLATE(hist);
-
-    *hist = ctx->hist;
-
-    return 0;
-}
-
-int spng_get_phys(spng_ctx *ctx, struct spng_phys *phys)
-{
-    SPNG_GET_CHUNK_BOILERPLATE(phys);
-
-    *phys = ctx->phys;
-
-    return 0;
-}
-
-int spng_get_splt(spng_ctx *ctx, struct spng_splt *splt, uint32_t *n_splt)
-{
-    if(ctx == NULL) return 1;
-    int ret = read_chunks(ctx, 0);
-    if(ret) return ret;
-    if(!ctx->stored.splt) return SPNG_ECHUNKAVAIL;
-    if(n_splt == NULL) return 1;
-
-    if(splt == NULL)
-    {
-        *n_splt = ctx->n_splt;
-        return 0;
-    }
-
-    if(*n_splt < ctx->n_splt) return 1;
-
-    memcpy(splt, ctx->splt_list, ctx->n_splt * sizeof(struct spng_splt));
-
-    return 0;
-}
-
-int spng_get_time(spng_ctx *ctx, struct spng_time *time)
-{
-    SPNG_GET_CHUNK_BOILERPLATE(time);
-
-    *time = ctx->time;
-
-    return 0;
-}
-
-int spng_get_unknown_chunks(spng_ctx *ctx, struct spng_unknown_chunk *chunks, uint32_t *n_chunks)
-{
-    if(ctx == NULL) return 1;
-    int ret = read_chunks(ctx, 0);
-    if(ret) return ret;
-    if(!ctx->stored.unknown) return SPNG_ECHUNKAVAIL;
-    if(n_chunks == NULL) return 1;
-
-    if(chunks == NULL)
-    {
-        *n_chunks = ctx->n_chunks;
-        return 0;
-    }
-
-    if(*n_chunks < ctx->n_chunks) return 1;
-
-    memcpy(chunks, ctx->chunk_list, sizeof(struct spng_unknown_chunk));
-
-    return 0;
-}
-
-int spng_get_offs(spng_ctx *ctx, struct spng_offs *offs)
-{
-    SPNG_GET_CHUNK_BOILERPLATE(offs);
-
-    *offs = ctx->offs;
-
-    return 0;
-}
-
-int spng_get_exif(spng_ctx *ctx, struct spng_exif *exif)
-{
-    SPNG_GET_CHUNK_BOILERPLATE(exif);
-
-    *exif = ctx->exif;
-
-    return 0;
-}
-
-int spng_set_ihdr(spng_ctx *ctx, struct spng_ihdr *ihdr)
-{
-    SPNG_SET_CHUNK_BOILERPLATE(ihdr);
-
-    if(ctx->stored.ihdr) return 1;
-
-    ret = check_ihdr(ihdr, ctx->max_width, ctx->max_height);
-    if(ret) return ret;
-
-    ctx->ihdr = *ihdr;
-
-    ctx->stored.ihdr = 1;
-    ctx->user.ihdr = 1;
-
-    return 0;
-}
-
-int spng_set_plte(spng_ctx *ctx, struct spng_plte *plte)
-{
-    SPNG_SET_CHUNK_BOILERPLATE(plte);
-
-    if(!ctx->stored.ihdr) return 1;
-
-    if(check_plte(plte, &ctx->ihdr)) return 1;
-
-    ctx->plte.n_entries = plte->n_entries;
-
-    memcpy(ctx->plte.entries, plte->entries, plte->n_entries * sizeof(struct spng_plte_entry));
-
-    ctx->stored.plte = 1;
-    ctx->user.plte = 1;
-
-    return 0;
-}
-
-int spng_set_trns(spng_ctx *ctx, struct spng_trns *trns)
-{
-    SPNG_SET_CHUNK_BOILERPLATE(trns);
-
-    if(!ctx->stored.ihdr) return SPNG_ENOIHDR;
-
-    if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_GRAYSCALE)
-    {
-        ctx->trns.gray = trns->gray;
-    }
-    else if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_TRUECOLOR)
-    {
-        ctx->trns.red = trns->red;
-        ctx->trns.green = trns->green;
-        ctx->trns.blue = trns->blue;
-    }
-    else if(ctx->ihdr.color_type == SPNG_COLOR_TYPE_INDEXED)
-    {
-        if(!ctx->stored.plte) return SPNG_ETRNS_NO_PLTE;
-        if(trns->n_type3_entries > ctx->plte.n_entries) return 1;
-
-        ctx->trns.n_type3_entries = trns->n_type3_entries;
-        memcpy(ctx->trns.type3_alpha, trns->type3_alpha, trns->n_type3_entries);
-    }
-    else return SPNG_ETRNS_COLOR_TYPE;
-
-    ctx->stored.trns = 1;
-    ctx->user.trns = 1;
-
-    return 0;
-}
-
-int spng_set_chrm(spng_ctx *ctx, struct spng_chrm *chrm)
-{
-    SPNG_SET_CHUNK_BOILERPLATE(chrm);
-
-    struct spng_chrm_int chrm_int;
-
-    chrm_int.white_point_x = (uint32_t)(chrm->white_point_x * 100000.0);
-    chrm_int.white_point_y = (uint32_t)(chrm->white_point_y * 100000.0);
-    chrm_int.red_x = (uint32_t)(chrm->red_x * 100000.0);
-    chrm_int.red_y = (uint32_t)(chrm->red_y * 100000.0);
-    chrm_int.green_x = (uint32_t)(chrm->green_x * 100000.0);
-    chrm_int.green_y = (uint32_t)(chrm->green_y * 100000.0);
-    chrm_int.blue_x = (uint32_t)(chrm->blue_x * 100000.0);
-    chrm_int.blue_y = (uint32_t)(chrm->blue_y * 100000.0);
-
-    if(check_chrm_int(&chrm_int)) return SPNG_ECHRM;
-
-    ctx->chrm_int = chrm_int;
-
-    ctx->stored.chrm = 1;
-    ctx->user.chrm = 1;
-
-    return 0;
-}
-
-int spng_set_chrm_int(spng_ctx *ctx, struct spng_chrm_int *chrm_int)
-{
-    SPNG_SET_CHUNK_BOILERPLATE(chrm_int);
-
-    if(check_chrm_int(chrm_int)) return SPNG_ECHRM;
-
-    ctx->chrm_int = *chrm_int;
-
-    ctx->stored.chrm = 1;
-    ctx->user.chrm = 1;
-
-    return 0;
-}
-
-int spng_set_gama(spng_ctx *ctx, double gamma)
-{
-    SPNG_SET_CHUNK_BOILERPLATE(ctx);
-
-    uint32_t gama = gamma * 100000.0;
-
-    if(!gama) return 1;
-    if(gama > spng_u32max) return 1;
-
-    ctx->gama = gama;
-
-    ctx->stored.gama = 1;
-    ctx->user.gama = 1;
-
-    return 0;
-}
-
-int spng_set_gama_int(spng_ctx *ctx, uint32_t gamma)
-{
-    SPNG_SET_CHUNK_BOILERPLATE(ctx);
-
-    if(!gamma) return 1;
-    if(gamma > spng_u32max) return 1;
-
-    ctx->gama = gamma;
-
-    ctx->stored.gama = 1;
-    ctx->user.gama = 1;
-
-    return 0;
-}
-
-int spng_set_iccp(spng_ctx *ctx, struct spng_iccp *iccp)
-{
-    SPNG_SET_CHUNK_BOILERPLATE(iccp);
-
-    if(check_png_keyword(iccp->profile_name)) return SPNG_EICCP_NAME;
-    if(!iccp->profile_len) return SPNG_ECHUNK_SIZE;
-    if(iccp->profile_len > spng_u32max) return SPNG_ECHUNK_STDLEN;
-
-    if(ctx->iccp.profile && !ctx->user.iccp) spng__free(ctx, ctx->iccp.profile);
-
-    ctx->iccp = *iccp;
-
-    ctx->stored.iccp = 1;
-    ctx->user.iccp = 1;
-
-    return 0;
-}
-
-int spng_set_sbit(spng_ctx *ctx, struct spng_sbit *sbit)
-{
-    SPNG_SET_CHUNK_BOILERPLATE(sbit);
-
-    if(check_sbit(sbit, &ctx->ihdr)) return 1;
-
-    if(!ctx->stored.ihdr) return 1;
-
-    ctx->sbit = *sbit;
-
-    ctx->stored.sbit = 1;
-    ctx->user.sbit = 1;
-
-    return 0;
-}
-
-int spng_set_srgb(spng_ctx *ctx, uint8_t rendering_intent)
-{
-    SPNG_SET_CHUNK_BOILERPLATE(ctx);
-
-    if(rendering_intent > 3) return 1;
-
-    ctx->srgb_rendering_intent = rendering_intent;
-
-    ctx->stored.srgb = 1;
-    ctx->user.srgb = 1;
-
-    return 0;
-}
-
-int spng_set_text(spng_ctx *ctx, struct spng_text *text, uint32_t n_text)
-{
-    if(!n_text) return 1;
-    SPNG_SET_CHUNK_BOILERPLATE(text);
-
-    uint32_t i;
-    for(i=0; i < n_text; i++)
-    {
-        if(check_png_keyword(text[i].keyword)) return SPNG_ETEXT_KEYWORD;
-        if(!text[i].length) return 1;
-        if(text[i].length > UINT_MAX) return 1;
-        if(text[i].text == NULL) return 1;
-
-        if(text[i].type == SPNG_TEXT)
-        {
-            if(ctx->strict && check_png_text(text[i].text, text[i].length)) return 1;
-        }
-        else if(text[i].type == SPNG_ZTXT)
-        {
-            if(ctx->strict && check_png_text(text[i].text, text[i].length)) return 1;
-
-            if(text[i].compression_method != 0) return SPNG_EZTXT_COMPRESSION_METHOD;
-        }
-        else if(text[i].type == SPNG_ITXT)
-        {
-            if(text[i].compression_flag > 1) return SPNG_EITXT_COMPRESSION_FLAG;
-            if(text[i].compression_method != 0) return SPNG_EITXT_COMPRESSION_METHOD;
-            if(text[i].language_tag == NULL) return SPNG_EITXT_LANG_TAG;
-            if(text[i].translated_keyword == NULL) return SPNG_EITXT_TRANSLATED_KEY;
-        }
-        else return 1;
-
-    }
-
-    struct spng_text2 *text_list = spng__calloc(ctx, sizeof(struct spng_text2), n_text);
-
-    if(!text_list) return SPNG_EMEM;
-
-    if(ctx->text_list != NULL)
-    {
-        for(i=0; i < ctx->n_text; i++)
-        {
-            if(ctx->user.text) break;
-
-            spng__free(ctx, ctx->text_list[i].keyword);
-            if(ctx->text_list[i].compression_flag) spng__free(ctx, ctx->text_list[i].text);
-        }
-        spng__free(ctx, ctx->text_list);
-    }
-
-    for(i=0; i < n_text; i++)
-    {
-        text_list[i].type = text[i].type;
-        /* Prevent issues with spng_text.keyword[80] going out of scope */
-        text_list[i].keyword = text_list[i].user_keyword_storage;
-        memcpy(text_list[i].user_keyword_storage, text[i].keyword, strlen(text[i].keyword));
-        text_list[i].text = text[i].text;
-        text_list[i].text_length = text[i].length;
-
-        if(text[i].type == SPNG_ZTXT)
-        {
-            text_list[i].compression_flag = 1;
-        }
-        else if(text[i].type == SPNG_ITXT)
-        {
-            text_list[i].compression_flag = text[i].compression_flag;
-            text_list[i].language_tag = text[i].language_tag;
-            text_list[i].translated_keyword = text[i].translated_keyword;
-        }
-    }
-
-    ctx->text_list = text_list;
-    ctx->n_text = n_text;
-
-    ctx->stored.text = 1;
-    ctx->user.text = 1;
-
-    return 0;
-}
-
-int spng_set_bkgd(spng_ctx *ctx, struct spng_bkgd *bkgd)
-{
-    SPNG_SET_CHUNK_BOILERPLATE(bkgd);
-
-    if(!ctx->stored.ihdr)  return 1;
-
-    if(ctx->ihdr.color_type == 0 || ctx->ihdr.color_type == 4)
-    {
-        ctx->bkgd.gray = bkgd->gray;
-    }
-    else if(ctx->ihdr.color_type == 2 || ctx->ihdr.color_type == 6)
-    {
-        ctx->bkgd.red = bkgd->red;
-        ctx->bkgd.green = bkgd->green;
-        ctx->bkgd.blue = bkgd->blue;
-    }
-    else if(ctx->ihdr.color_type == 3)
-    {
-        if(!ctx->stored.plte) return SPNG_EBKGD_NO_PLTE;
-        if(bkgd->plte_index >= ctx->plte.n_entries) return SPNG_EBKGD_PLTE_IDX;
-
-        ctx->bkgd.plte_index = bkgd->plte_index;
-    }
-
-    ctx->stored.bkgd = 1;
-    ctx->user.bkgd = 1;
-
-    return 0;
-}
-
-int spng_set_hist(spng_ctx *ctx, struct spng_hist *hist)
-{
-    SPNG_SET_CHUNK_BOILERPLATE(hist);
-
-    if(!ctx->stored.plte) return SPNG_EHIST_NO_PLTE;
-
-    ctx->hist = *hist;
-
-    ctx->stored.hist = 1;
-    ctx->user.hist = 1;
-
-    return 0;
-}
-
-int spng_set_phys(spng_ctx *ctx, struct spng_phys *phys)
-{
-    SPNG_SET_CHUNK_BOILERPLATE(phys);
-
-    if(check_phys(phys)) return SPNG_EPHYS;
-
-    ctx->phys = *phys;
-
-    ctx->stored.phys = 1;
-    ctx->user.phys = 1;
-
-    return 0;
-}
-
-int spng_set_splt(spng_ctx *ctx, struct spng_splt *splt, uint32_t n_splt)
-{
-    if(!n_splt) return 1;
-    SPNG_SET_CHUNK_BOILERPLATE(splt);
-
-    uint32_t i;
-    for(i=0; i < n_splt; i++)
-    {
-        if(check_png_keyword(splt[i].name)) return SPNG_ESPLT_NAME;
-        if( !(splt[i].sample_depth == 8 || splt[i].sample_depth == 16) ) return SPNG_ESPLT_DEPTH;
-    }
-
-    if(ctx->stored.splt && !ctx->user.splt)
-    {
-        for(i=0; i < ctx->n_splt; i++)
-        {
-            if(ctx->splt_list[i].entries != NULL) spng__free(ctx, ctx->splt_list[i].entries);
-        }
-        spng__free(ctx, ctx->splt_list);
-    }
-
-    ctx->splt_list = splt;
-    ctx->n_splt = n_splt;
-
-    ctx->stored.splt = 1;
-    ctx->user.splt = 1;
-
-    return 0;
-}
-
-int spng_set_time(spng_ctx *ctx, struct spng_time *time)
-{
-    SPNG_SET_CHUNK_BOILERPLATE(time);
-
-    if(check_time(time)) return SPNG_ETIME;
-
-    ctx->time = *time;
-
-    ctx->stored.time = 1;
-    ctx->user.time = 1;
-
-    return 0;
-}
-
-int spng_set_unknown_chunks(spng_ctx *ctx, struct spng_unknown_chunk *chunks, uint32_t n_chunks)
-{
-    if(!n_chunks) return 1;
-    SPNG_SET_CHUNK_BOILERPLATE(chunks);
-
-    uint32_t i;
-    for(i=0; i < n_chunks; i++)
-    {
-        if(chunks[i].length > spng_u32max) return SPNG_ECHUNK_STDLEN;
-        if(chunks[i].length && chunks[i].data == NULL) return 1;
-
-        switch(chunks[i].location)
-        {
-            case SPNG_AFTER_IHDR:
-            case SPNG_AFTER_PLTE:
-            case SPNG_AFTER_IDAT:
-            break;
-            default: return SPNG_ECHUNK_POS;
-        }
-    }
-
-    if(ctx->stored.unknown && !ctx->user.unknown)
-    {
-        for(i=0; i < ctx->n_chunks; i++)
-        {
-            spng__free(ctx, ctx->chunk_list[i].data);
-        }
-        spng__free(ctx, ctx->chunk_list);
-    }
-
-    ctx->chunk_list = chunks;
-    ctx->n_chunks = n_chunks;
-
-    ctx->stored.unknown = 1;
-    ctx->user.unknown = 1;
-
-    return 0;
-}
-
-int spng_set_offs(spng_ctx *ctx, struct spng_offs *offs)
-{
-    SPNG_SET_CHUNK_BOILERPLATE(offs);
-
-    if(check_offs(offs)) return SPNG_EOFFS;
-
-    ctx->offs = *offs;
-
-    ctx->stored.offs = 1;
-    ctx->user.offs = 1;
-
-    return 0;
-}
-
-int spng_set_exif(spng_ctx *ctx, struct spng_exif *exif)
-{
-    SPNG_SET_CHUNK_BOILERPLATE(exif);
-
-    if(check_exif(exif)) return SPNG_EEXIF;
-
-    if(ctx->exif.data != NULL && !ctx->user.exif) spng__free(ctx, ctx->exif.data);
-
-    ctx->exif = *exif;
-
-    ctx->stored.exif = 1;
-    ctx->user.exif = 1;
-
-    return 0;
-}
-
-const char *spng_strerror(int err)
-{
-    switch(err)
-    {
-        case SPNG_IO_EOF: return "end of stream";
-        case SPNG_IO_ERROR: return "stream error";
-        case SPNG_OK: return "success";
-        case SPNG_EINVAL: return "invalid argument";
-        case SPNG_EMEM: return "out of memory";
-        case SPNG_EOVERFLOW: return "arithmetic overflow";
-        case SPNG_ESIGNATURE: return "invalid signature";
-        case SPNG_EWIDTH: return "invalid image width";
-        case SPNG_EHEIGHT: return "invalid image height";
-        case SPNG_EUSER_WIDTH: return "image width exceeds user limit";
-        case SPNG_EUSER_HEIGHT: return "image height exceeds user limit";
-        case SPNG_EBIT_DEPTH: return "invalid bit depth";
-        case SPNG_ECOLOR_TYPE: return "invalid color type";
-        case SPNG_ECOMPRESSION_METHOD: return "invalid compression method";
-        case SPNG_EFILTER_METHOD: return "invalid filter method";
-        case SPNG_EINTERLACE_METHOD: return "invalid interlace method";
-        case SPNG_EIHDR_SIZE: return "invalid IHDR chunk size";
-        case SPNG_ENOIHDR: return "missing IHDR chunk";
-        case SPNG_ECHUNK_POS: return "invalid chunk position";
-        case SPNG_ECHUNK_SIZE: return "invalid chunk length";
-        case SPNG_ECHUNK_CRC: return "invalid chunk checksum";
-        case SPNG_ECHUNK_TYPE: return "invalid chunk type";
-        case SPNG_ECHUNK_UNKNOWN_CRITICAL: return "unknown critical chunk";
-        case SPNG_EDUP_PLTE: return "duplicate PLTE chunk";
-        case SPNG_EDUP_CHRM: return "duplicate cHRM chunk";
-        case SPNG_EDUP_GAMA: return "duplicate gAMA chunk";
-        case SPNG_EDUP_ICCP: return "duplicate iCCP chunk";
-        case SPNG_EDUP_SBIT: return "duplicate sBIT chunk";
-        case SPNG_EDUP_SRGB: return "duplicate sRGB chunk";
-        case SPNG_EDUP_BKGD: return "duplicate bKGD chunk";
-        case SPNG_EDUP_HIST: return "duplicate hIST chunk";
-        case SPNG_EDUP_TRNS: return "duplicate tRNS chunk";
-        case SPNG_EDUP_PHYS: return "duplicate pHYs chunk";
-        case SPNG_EDUP_TIME: return "duplicate tIME chunk";
-        case SPNG_EDUP_OFFS: return "duplicate oFFs chunk";
-        case SPNG_EDUP_EXIF: return "duplicate eXIf chunk";
-        case SPNG_ECHRM: return "invalid cHRM chunk";
-        case SPNG_EPLTE_IDX: return "invalid palette (PLTE) index";
-        case SPNG_ETRNS_COLOR_TYPE: return "tRNS chunk with incompatible color type";
-        case SPNG_ETRNS_NO_PLTE: return "missing palette (PLTE) for tRNS chunk";
-        case SPNG_EGAMA: return "invalid gAMA chunk";
-        case SPNG_EICCP_NAME: return "invalid iCCP profile name";
-        case SPNG_EICCP_COMPRESSION_METHOD: return "invalid iCCP compression method";
-        case SPNG_ESBIT: return "invalid sBIT chunk";
-        case SPNG_ESRGB: return "invalid sRGB chunk";
-        case SPNG_ETEXT: return "invalid tEXt chunk";
-        case SPNG_ETEXT_KEYWORD: return "invalid tEXt keyword";
-        case SPNG_EZTXT: return "invalid zTXt chunk";
-        case SPNG_EZTXT_COMPRESSION_METHOD: return "invalid zTXt compression method";
-        case SPNG_EITXT: return "invalid iTXt chunk";
-        case SPNG_EITXT_COMPRESSION_FLAG: return "invalid iTXt compression flag";
-        case SPNG_EITXT_COMPRESSION_METHOD: return "invalid iTXt compression method";
-        case SPNG_EITXT_LANG_TAG: return "invalid iTXt language tag";
-        case SPNG_EITXT_TRANSLATED_KEY: return "invalid iTXt translated key";
-        case SPNG_EBKGD_NO_PLTE: return "missing palette for bKGD chunk";
-        case SPNG_EBKGD_PLTE_IDX: return "invalid palette index for bKGD chunk";
-        case SPNG_EHIST_NO_PLTE: return "missing palette for hIST chunk";
-        case SPNG_EPHYS: return "invalid pHYs chunk";
-        case SPNG_ESPLT_NAME: return "invalid suggested palette name";
-        case SPNG_ESPLT_DUP_NAME: return "duplicate suggested palette (sPLT) name";
-        case SPNG_ESPLT_DEPTH: return "invalid suggested palette (sPLT) sample depth";
-        case SPNG_ETIME: return "invalid tIME chunk";
-        case SPNG_EOFFS: return "invalid oFFs chunk";
-        case SPNG_EEXIF: return "invalid eXIf chunk";
-        case SPNG_EIDAT_TOO_SHORT: return "IDAT stream too short";
-        case SPNG_EIDAT_STREAM: return "IDAT stream error";
-        case SPNG_EZLIB: return "zlib error";
-        case SPNG_EFILTER: return "invalid scanline filter";
-        case SPNG_EBUFSIZ: return "invalid buffer size";
-        case SPNG_EIO: return "i/o error";
-        case SPNG_EOF: return "end of file";
-        case SPNG_EBUF_SET: return "buffer already set";
-        case SPNG_EBADSTATE: return "non-recoverable state";
-        case SPNG_EFMT: return "invalid format";
-        case SPNG_EFLAGS: return "invalid flags";
-        case SPNG_ECHUNKAVAIL: return "chunk not available";
-        case SPNG_ENCODE_ONLY: return "encode only context";
-        case SPNG_EOI: return "reached end-of-image state";
-        case SPNG_ENOPLTE: return "missing PLTE for indexed image";
-        case SPNG_ECHUNK_LIMITS: return "reached chunk/cache limits";
-        case SPNG_EZLIB_INIT: return "zlib init error";
-        case SPNG_ECHUNK_STDLEN: return "chunk exceeds maximum standard length";
-        case SPNG_EINTERNAL: return "internal error";
-        case SPNG_ECTXTYPE: return "invalid operation for context type";
-        case SPNG_ENOSRC: return "source PNG not set";
-        case SPNG_ENODST: return "PNG output not set";
-        case SPNG_EOPSTATE: return "invalid operation for state";
-        case SPNG_ENOTFINAL: return "PNG not finalized";
-        default: return "unknown error";
-    }
-}
-
-const char *spng_version_string(void)
-{
-    return SPNG_VERSION_STRING;
-}
-
-#if defined(_MSC_VER)
-    #pragma warning(pop)
-#endif
-
-/* The following SIMD optimizations are derived from libpng source code. */
-
-/*
-* PNG Reference Library License version 2
-*
-* Copyright (c) 1995-2019 The PNG Reference Library Authors.
-* Copyright (c) 2018-2019 Cosmin Truta.
-* Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson.
-* Copyright (c) 1996-1997 Andreas Dilger.
-* Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
-*
-* The software is supplied "as is", without warranty of any kind,
-* express or implied, including, without limitation, the warranties
-* of merchantability, fitness for a particular purpose, title, and
-* non-infringement.  In no event shall the Copyright owners, or
-* anyone distributing the software, be liable for any damages or
-* other liability, whether in contract, tort or otherwise, arising
-* from, out of, or in connection with the software, or the use or
-* other dealings in the software, even if advised of the possibility
-* of such damage.
-*
-* Permission is hereby granted to use, copy, modify, and distribute
-* this software, or portions hereof, for any purpose, without fee,
-* subject to the following restrictions:
-*
-*  1. The origin of this software must not be misrepresented; you
-*     must not claim that you wrote the original software.  If you
-*     use this software in a product, an acknowledgment in the product
-*     documentation would be appreciated, but is not required.
-*
-*  2. Altered source versions must be plainly marked as such, and must
-*     not be misrepresented as being the original software.
-*
-*  3. This Copyright notice may not be removed or altered from any
-*     source or altered source distribution.
-*/
-
-#if defined(SPNG_X86)
-
-#ifndef SPNG_SSE
-    #define SPNG_SSE 1
-#endif
-
-#if defined(__GNUC__) && !defined(__clang__)
-    #if SPNG_SSE == 3
-        #pragma GCC target("ssse3")
-    #elif SPNG_SSE == 4
-        #pragma GCC target("sse4.1")
-    #else
-        #pragma GCC target("sse2")
-    #endif
-#endif
-
-/* SSE2 optimised filter functions
- * Derived from filter_neon_intrinsics.c
- *
- * Copyright (c) 2018 Cosmin Truta
- * Copyright (c) 2016-2017 Glenn Randers-Pehrson
- * Written by Mike Klein and Matt Sarett
- * Derived from arm/filter_neon_intrinsics.c
- *
- * This code is derived from libpng source code.
- * For conditions of distribution and use, see the disclaimer
- * and license above.
- */
-
-#include <immintrin.h>
-#include <inttypes.h>
-#include <string.h>
-
-/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d).
- * They're positioned like this:
- *    prev:  c b
- *    row:   a d
- * The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be
- * whichever of a, b, or c is closest to p=a+b-c.
- */
-
-static __m128i load4(const void* p)
-{
-    int tmp;
-    memcpy(&tmp, p, sizeof(tmp));
-    return _mm_cvtsi32_si128(tmp);
-}
-
-static void store4(void* p, __m128i v)
-{
-    int tmp = _mm_cvtsi128_si32(v);
-    memcpy(p, &tmp, sizeof(int));
-}
-
-static __m128i load3(const void* p)
-{
-    uint32_t tmp = 0;
-    memcpy(&tmp, p, 3);
-    return _mm_cvtsi32_si128(tmp);
-}
-
-static void store3(void* p, __m128i v)
-{
-    int tmp = _mm_cvtsi128_si32(v);
-    memcpy(p, &tmp, 3);
-}
-
-static void defilter_sub3(size_t rowbytes, unsigned char *row)
-{
-    /* The Sub filter predicts each pixel as the previous pixel, a.
-     * There is no pixel to the left of the first pixel.  It's encoded directly.
-     * That works with our main loop if we just say that left pixel was zero.
-     */
-    size_t rb = rowbytes;
-
-    __m128i a, d = _mm_setzero_si128();
-
-    while(rb >= 4)
-    {
-        a = d; d = load4(row);
-        d = _mm_add_epi8(d, a);
-        store3(row, d);
-
-        row += 3;
-        rb  -= 3;
-    }
-
-    if(rb > 0)
-    {
-        a = d; d = load3(row);
-        d = _mm_add_epi8(d, a);
-        store3(row, d);
-    }
-}
-
-static void defilter_sub4(size_t rowbytes, unsigned char *row)
-{
-    /* The Sub filter predicts each pixel as the previous pixel, a.
-     * There is no pixel to the left of the first pixel.  It's encoded directly.
-     * That works with our main loop if we just say that left pixel was zero.
-     */
-    size_t rb = rowbytes+4;
-
-    __m128i a, d = _mm_setzero_si128();
-
-    while(rb > 4)
-    {
-        a = d; d = load4(row);
-        d = _mm_add_epi8(d, a);
-        store4(row, d);
-
-        row += 4;
-        rb  -= 4;
-    }
-}
-
-static void defilter_avg3(size_t rowbytes, unsigned char *row, const unsigned char *prev)
-{
-    /* The Avg filter predicts each pixel as the (truncated) average of a and b.
-     * There's no pixel to the left of the first pixel.  Luckily, it's
-     * predicted to be half of the pixel above it.  So again, this works
-     * perfectly with our loop if we make sure a starts at zero.
-     */
-
-    size_t rb = rowbytes;
-
-    const __m128i zero = _mm_setzero_si128();
-
-    __m128i b;
-    __m128i a, d = zero;
-
-    while(rb >= 4)
-    {
-        __m128i avg;
-               b = load4(prev);
-        a = d; d = load4(row );
-
-        /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
-        avg = _mm_avg_epu8(a,b);
-        /* ...but we can fix it up by subtracting off 1 if it rounded up. */
-        avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a, b),
-                                            _mm_set1_epi8(1)));
-        d = _mm_add_epi8(d, avg);
-        store3(row, d);
-
-        prev += 3;
-        row  += 3;
-        rb   -= 3;
-    }
-
-    if(rb > 0)
-    {
-        __m128i avg;
-               b = load3(prev);
-        a = d; d = load3(row );
-
-        /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
-        avg = _mm_avg_epu8(a, b);
-        /* ...but we can fix it up by subtracting off 1 if it rounded up. */
-        avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a, b),
-                                            _mm_set1_epi8(1)));
-
-        d = _mm_add_epi8(d, avg);
-        store3(row, d);
-    }
-}
-
-static void defilter_avg4(size_t rowbytes, unsigned char *row, const unsigned char *prev)
-{
-    /* The Avg filter predicts each pixel as the (truncated) average of a and b.
-     * There's no pixel to the left of the first pixel.  Luckily, it's
-     * predicted to be half of the pixel above it.  So again, this works
-     * perfectly with our loop if we make sure a starts at zero.
-     */
-    size_t rb = rowbytes+4;
-
-    const __m128i zero = _mm_setzero_si128();
-    __m128i    b;
-    __m128i a, d = zero;
-
-    while(rb > 4)
-    {
-        __m128i avg;
-               b = load4(prev);
-        a = d; d = load4(row );
-
-        /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
-        avg = _mm_avg_epu8(a,b);
-        /* ...but we can fix it up by subtracting off 1 if it rounded up. */
-        avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a, b),
-                                            _mm_set1_epi8(1)));
-
-        d = _mm_add_epi8(d, avg);
-        store4(row, d);
-
-        prev += 4;
-        row  += 4;
-        rb   -= 4;
-    }
-}
-
-/* Returns |x| for 16-bit lanes. */
-#if (SPNG_SSE >= 3) && !defined(_MSC_VER)
-__attribute__((target("ssse3")))
-#endif
-static __m128i abs_i16(__m128i x)
-{
-#if SPNG_SSE >= 3
-    return _mm_abs_epi16(x);
-#else
-    /* Read this all as, return x<0 ? -x : x.
-     * To negate two's complement, you flip all the bits then add 1.
-     */
-    __m128i is_negative = _mm_cmplt_epi16(x, _mm_setzero_si128());
-
-    /* Flip negative lanes. */
-    x = _mm_xor_si128(x, is_negative);
-
-    /* +1 to negative lanes, else +0. */
-    x = _mm_sub_epi16(x, is_negative);
-    return x;
-#endif
-}
-
-/* Bytewise c ? t : e. */
-static __m128i if_then_else(__m128i c, __m128i t, __m128i e)
-{
-#if SPNG_SSE >= 4
-    return _mm_blendv_epi8(e, t, c);
-#else
-    return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e));
-#endif
-}
-
-static void defilter_paeth3(size_t rowbytes, unsigned char *row, const unsigned char *prev)
-{
-    /* Paeth tries to predict pixel d using the pixel to the left of it, a,
-     * and two pixels from the previous row, b and c:
-     *   prev: c b
-     *   row:  a d
-     * The Paeth function predicts d to be whichever of a, b, or c is nearest to
-     * p=a+b-c.
-     *
-     * The first pixel has no left context, and so uses an Up filter, p = b.
-     * This works naturally with our main loop's p = a+b-c if we force a and c
-     * to zero.
-     * Here we zero b and d, which become c and a respectively at the start of
-     * the loop.
-     */
-    size_t rb = rowbytes;
-    const __m128i zero = _mm_setzero_si128();
-    __m128i c, b = zero,
-            a, d = zero;
-
-    while(rb >= 4)
-    {
-        /* It's easiest to do this math (particularly, deal with pc) with 16-bit
-         * intermediates.
-         */
-        __m128i pa,pb,pc,smallest,nearest;
-        c = b; b = _mm_unpacklo_epi8(load4(prev), zero);
-        a = d; d = _mm_unpacklo_epi8(load4(row ), zero);
-
-        /* (p-a) == (a+b-c - a) == (b-c) */
-
-        pa = _mm_sub_epi16(b, c);
-
-        /* (p-b) == (a+b-c - b) == (a-c) */
-        pb = _mm_sub_epi16(a, c);
-
-        /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
-        pc = _mm_add_epi16(pa, pb);
-
-        pa = abs_i16(pa);  /* |p-a| */
-        pb = abs_i16(pb);  /* |p-b| */
-        pc = abs_i16(pc);  /* |p-c| */
-
-        smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
-
-        /* Paeth breaks ties favoring a over b over c. */
-        nearest  = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
-                            if_then_else(_mm_cmpeq_epi16(smallest, pb), b, c));
-
-        /* Note `_epi8`: we need addition to wrap modulo 255. */
-        d = _mm_add_epi8(d, nearest);
-        store3(row, _mm_packus_epi16(d, d));
-
-        prev += 3;
-        row  += 3;
-        rb   -= 3;
-    }
-
-    if(rb > 0)
-    {
-        /* It's easiest to do this math (particularly, deal with pc) with 16-bit
-         * intermediates.
-         */
-        __m128i pa, pb, pc, smallest, nearest;
-        c = b; b = _mm_unpacklo_epi8(load3(prev), zero);
-        a = d; d = _mm_unpacklo_epi8(load3(row ), zero);
-
-        /* (p-a) == (a+b-c - a) == (b-c) */
-        pa = _mm_sub_epi16(b, c);
-
-        /* (p-b) == (a+b-c - b) == (a-c) */
-        pb = _mm_sub_epi16(a, c);
-
-        /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
-        pc = _mm_add_epi16(pa, pb);
-
-        pa = abs_i16(pa);  /* |p-a| */
-        pb = abs_i16(pb);  /* |p-b| */
-        pc = abs_i16(pc);  /* |p-c| */
-
-        smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
-
-        /* Paeth breaks ties favoring a over b over c. */
-        nearest  = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
-                            if_then_else(_mm_cmpeq_epi16(smallest, pb), b, c));
-
-        /* Note `_epi8`: we need addition to wrap modulo 255. */
-        d = _mm_add_epi8(d, nearest);
-        store3(row, _mm_packus_epi16(d, d));
-    }
-}
-
-static void defilter_paeth4(size_t rowbytes, unsigned char *row, const unsigned char *prev)
-{
-    /* Paeth tries to predict pixel d using the pixel to the left of it, a,
-     * and two pixels from the previous row, b and c:
-     *   prev: c b
-     *   row:  a d
-     * The Paeth function predicts d to be whichever of a, b, or c is nearest to
-     * p=a+b-c.
-     *
-     * The first pixel has no left context, and so uses an Up filter, p = b.
-     * This works naturally with our main loop's p = a+b-c if we force a and c
-     * to zero.
-     * Here we zero b and d, which become c and a respectively at the start of
-     * the loop.
-     */
-    size_t rb = rowbytes+4;
-
-    const __m128i zero = _mm_setzero_si128();
-    __m128i pa, pb, pc, smallest, nearest;
-    __m128i c, b = zero,
-            a, d = zero;
-
-    while(rb > 4)
-    {
-        /* It's easiest to do this math (particularly, deal with pc) with 16-bit
-         * intermediates.
-         */
-        c = b; b = _mm_unpacklo_epi8(load4(prev), zero);
-        a = d; d = _mm_unpacklo_epi8(load4(row ), zero);
-
-        /* (p-a) == (a+b-c - a) == (b-c) */
-        pa = _mm_sub_epi16(b, c);
-
-        /* (p-b) == (a+b-c - b) == (a-c) */
-        pb = _mm_sub_epi16(a, c);
-
-        /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
-        pc = _mm_add_epi16(pa, pb);
-
-        pa = abs_i16(pa);  /* |p-a| */
-        pb = abs_i16(pb);  /* |p-b| */
-        pc = abs_i16(pc);  /* |p-c| */
-
-        smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
-
-        /* Paeth breaks ties favoring a over b over c. */
-        nearest  = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
-                            if_then_else(_mm_cmpeq_epi16(smallest, pb), b, c));
-
-        /* Note `_epi8`: we need addition to wrap modulo 255. */
-        d = _mm_add_epi8(d, nearest);
-        store4(row, _mm_packus_epi16(d, d));
-
-        prev += 4;
-        row  += 4;
-        rb   -= 4;
-    }
-}
-
-#endif /* SPNG_X86 */
-
-
-#if defined(SPNG_ARM)
-
-/* NEON optimised filter functions
- * Derived from filter_neon_intrinsics.c
- *
- * Copyright (c) 2018 Cosmin Truta
- * Copyright (c) 2014,2016 Glenn Randers-Pehrson
- * Written by James Yu <james.yu at linaro.org>, October 2013.
- * Based on filter_neon.S, written by Mans Rullgard, 2011.
- *
- * This code is derived from libpng source code.
- * For conditions of distribution and use, see the disclaimer
- * and license in this file.
- */
-
-#define png_aligncast(type, value) ((void*)(value))
-#define png_aligncastconst(type, value) ((const void*)(value))
-
-/* libpng row pointers are not necessarily aligned to any particular boundary,
- * however this code will only work with appropriate alignment. mips/mips_init.c
- * checks for this (and will not compile unless it is done). This code uses
- * variants of png_aligncast to avoid compiler warnings.
- */
-#define png_ptr(type,pointer) png_aligncast(type *,pointer)
-#define png_ptrc(type,pointer) png_aligncastconst(const type *,pointer)
-
-/* The following relies on a variable 'temp_pointer' being declared with type
- * 'type'.  This is written this way just to hide the GCC strict aliasing
- * warning; note that the code is safe because there never is an alias between
- * the input and output pointers.
- */
-#define png_ldr(type,pointer)\
-   (temp_pointer = png_ptr(type,pointer), *temp_pointer)
-
-
-#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_ARM64)
-    #include <arm64_neon.h>
-#else
-    #include <arm_neon.h>
-#endif
-
-static void defilter_sub3(size_t rowbytes, unsigned char *row)
-{
-    unsigned char *rp = row;
-    unsigned char *rp_stop = row + rowbytes;
-
-    uint8x16_t vtmp = vld1q_u8(rp);
-    uint8x8x2_t *vrpt = png_ptr(uint8x8x2_t, &vtmp);
-    uint8x8x2_t vrp = *vrpt;
-
-    uint8x8x4_t vdest;
-    vdest.val[3] = vdup_n_u8(0);
-
-    for (; rp < rp_stop;)
-    {
-        uint8x8_t vtmp1, vtmp2;
-        uint32x2_t *temp_pointer;
-
-        vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3);
-        vdest.val[0] = vadd_u8(vdest.val[3], vrp.val[0]);
-        vtmp2 = vext_u8(vrp.val[0], vrp.val[1], 6);
-        vdest.val[1] = vadd_u8(vdest.val[0], vtmp1);
-
-        vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
-        vdest.val[2] = vadd_u8(vdest.val[1], vtmp2);
-        vdest.val[3] = vadd_u8(vdest.val[2], vtmp1);
-
-        vtmp = vld1q_u8(rp + 12);
-        vrpt = png_ptr(uint8x8x2_t, &vtmp);
-        vrp = *vrpt;
-
-        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0);
-        rp += 3;
-        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0);
-        rp += 3;
-        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0);
-        rp += 3;
-        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0);
-        rp += 3;
-    }
-}
-
-static void defilter_sub4(size_t rowbytes, unsigned char *row)
-{
-    unsigned char *rp = row;
-    unsigned char *rp_stop = row + rowbytes;
-
-    uint8x8x4_t vdest;
-    vdest.val[3] = vdup_n_u8(0);
-
-    for (; rp < rp_stop; rp += 16)
-    {
-        uint32x2x4_t vtmp = vld4_u32(png_ptr(uint32_t,rp));
-        uint8x8x4_t *vrpt = png_ptr(uint8x8x4_t,&vtmp);
-        uint8x8x4_t vrp = *vrpt;
-        uint32x2x4_t *temp_pointer;
-        uint32x2x4_t vdest_val;
-
-        vdest.val[0] = vadd_u8(vdest.val[3], vrp.val[0]);
-        vdest.val[1] = vadd_u8(vdest.val[0], vrp.val[1]);
-        vdest.val[2] = vadd_u8(vdest.val[1], vrp.val[2]);
-        vdest.val[3] = vadd_u8(vdest.val[2], vrp.val[3]);
-
-        vdest_val = png_ldr(uint32x2x4_t, &vdest);
-        vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0);
-    }
-}
-
-static void defilter_avg3(size_t rowbytes, unsigned char *row, const unsigned char *prev_row)
-{
-    unsigned char *rp = row;
-    const unsigned char *pp = prev_row;
-    unsigned char *rp_stop = row + rowbytes;
-
-    uint8x16_t vtmp;
-    uint8x8x2_t *vrpt;
-    uint8x8x2_t vrp;
-    uint8x8x4_t vdest;
-    vdest.val[3] = vdup_n_u8(0);
-
-    vtmp = vld1q_u8(rp);
-    vrpt = png_ptr(uint8x8x2_t,&vtmp);
-    vrp = *vrpt;
-
-    for (; rp < rp_stop; pp += 12)
-    {
-        uint8x8_t vtmp1, vtmp2, vtmp3;
-
-        uint8x8x2_t *vppt;
-        uint8x8x2_t vpp;
-
-        uint32x2_t *temp_pointer;
-
-        vtmp = vld1q_u8(pp);
-        vppt = png_ptr(uint8x8x2_t,&vtmp);
-        vpp = *vppt;
-
-        vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3);
-        vdest.val[0] = vhadd_u8(vdest.val[3], vpp.val[0]);
-        vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
-
-        vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 3);
-        vtmp3 = vext_u8(vrp.val[0], vrp.val[1], 6);
-        vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2);
-        vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
-
-        vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 6);
-        vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
-
-        vtmp = vld1q_u8(rp + 12);
-        vrpt = png_ptr(uint8x8x2_t,&vtmp);
-        vrp = *vrpt;
-
-        vdest.val[2] = vhadd_u8(vdest.val[1], vtmp2);
-        vdest.val[2] = vadd_u8(vdest.val[2], vtmp3);
-
-        vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1);
-
-        vdest.val[3] = vhadd_u8(vdest.val[2], vtmp2);
-        vdest.val[3] = vadd_u8(vdest.val[3], vtmp1);
-
-        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0);
-        rp += 3;
-        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0);
-        rp += 3;
-        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0);
-        rp += 3;
-        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0);
-        rp += 3;
-    }
-}
-
-static void defilter_avg4(size_t rowbytes, unsigned char *row, const unsigned char *prev_row)
-{
-    unsigned char *rp = row;
-    unsigned char *rp_stop = row + rowbytes;
-    const unsigned char *pp = prev_row;
-
-    uint8x8x4_t vdest;
-    vdest.val[3] = vdup_n_u8(0);
-
-    for (; rp < rp_stop; rp += 16, pp += 16)
-    {
-        uint32x2x4_t vtmp;
-        uint8x8x4_t *vrpt, *vppt;
-        uint8x8x4_t vrp, vpp;
-        uint32x2x4_t *temp_pointer;
-        uint32x2x4_t vdest_val;
-
-        vtmp = vld4_u32(png_ptr(uint32_t,rp));
-        vrpt = png_ptr(uint8x8x4_t,&vtmp);
-        vrp = *vrpt;
-        vtmp = vld4_u32(png_ptrc(uint32_t,pp));
-        vppt = png_ptr(uint8x8x4_t,&vtmp);
-        vpp = *vppt;
-
-        vdest.val[0] = vhadd_u8(vdest.val[3], vpp.val[0]);
-        vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
-        vdest.val[1] = vhadd_u8(vdest.val[0], vpp.val[1]);
-        vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]);
-        vdest.val[2] = vhadd_u8(vdest.val[1], vpp.val[2]);
-        vdest.val[2] = vadd_u8(vdest.val[2], vrp.val[2]);
-        vdest.val[3] = vhadd_u8(vdest.val[2], vpp.val[3]);
-        vdest.val[3] = vadd_u8(vdest.val[3], vrp.val[3]);
-
-        vdest_val = png_ldr(uint32x2x4_t, &vdest);
-        vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0);
-    }
-}
-
-static uint8x8_t paeth_arm(uint8x8_t a, uint8x8_t b, uint8x8_t c)
-{
-    uint8x8_t d, e;
-    uint16x8_t p1, pa, pb, pc;
-
-    p1 = vaddl_u8(a, b); /* a + b */
-    pc = vaddl_u8(c, c); /* c * 2 */
-    pa = vabdl_u8(b, c); /* pa */
-    pb = vabdl_u8(a, c); /* pb */
-    pc = vabdq_u16(p1, pc); /* pc */
-
-    p1 = vcleq_u16(pa, pb); /* pa <= pb */
-    pa = vcleq_u16(pa, pc); /* pa <= pc */
-    pb = vcleq_u16(pb, pc); /* pb <= pc */
-
-    p1 = vandq_u16(p1, pa); /* pa <= pb && pa <= pc */
-
-    d = vmovn_u16(pb);
-    e = vmovn_u16(p1);
-
-    d = vbsl_u8(d, b, c);
-    e = vbsl_u8(e, a, d);
-
-    return e;
-}
-
-static void defilter_paeth3(size_t rowbytes, unsigned char *row, const unsigned char *prev_row)
-{
-    unsigned char *rp = row;
-    const unsigned char *pp = prev_row;
-    unsigned char *rp_stop = row + rowbytes;
-
-    uint8x16_t vtmp;
-    uint8x8x2_t *vrpt;
-    uint8x8x2_t vrp;
-    uint8x8_t vlast = vdup_n_u8(0);
-    uint8x8x4_t vdest;
-    vdest.val[3] = vdup_n_u8(0);
-
-    vtmp = vld1q_u8(rp);
-    vrpt = png_ptr(uint8x8x2_t,&vtmp);
-    vrp = *vrpt;
-
-    for (; rp < rp_stop; pp += 12)
-    {
-        uint8x8x2_t *vppt;
-        uint8x8x2_t vpp;
-        uint8x8_t vtmp1, vtmp2, vtmp3;
-        uint32x2_t *temp_pointer;
-
-        vtmp = vld1q_u8(pp);
-        vppt = png_ptr(uint8x8x2_t,&vtmp);
-        vpp = *vppt;
-
-        vdest.val[0] = paeth_arm(vdest.val[3], vpp.val[0], vlast);
-        vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
-
-        vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3);
-        vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 3);
-        vdest.val[1] = paeth_arm(vdest.val[0], vtmp2, vpp.val[0]);
-        vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
-
-        vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 6);
-        vtmp3 = vext_u8(vpp.val[0], vpp.val[1], 6);
-        vdest.val[2] = paeth_arm(vdest.val[1], vtmp3, vtmp2);
-        vdest.val[2] = vadd_u8(vdest.val[2], vtmp1);
-
-        vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
-        vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1);
-
-        vtmp = vld1q_u8(rp + 12);
-        vrpt = png_ptr(uint8x8x2_t,&vtmp);
-        vrp = *vrpt;
-
-        vdest.val[3] = paeth_arm(vdest.val[2], vtmp2, vtmp3);
-        vdest.val[3] = vadd_u8(vdest.val[3], vtmp1);
-
-        vlast = vtmp2;
-
-        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0);
-        rp += 3;
-        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0);
-        rp += 3;
-        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0);
-        rp += 3;
-        vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0);
-        rp += 3;
-    }
-}
-
-static void defilter_paeth4(size_t rowbytes, unsigned char *row, const unsigned char *prev_row)
-{
-    unsigned char *rp = row;
-    unsigned char *rp_stop = row + rowbytes;
-    const unsigned char *pp = prev_row;
-
-    uint8x8_t vlast = vdup_n_u8(0);
-    uint8x8x4_t vdest;
-    vdest.val[3] = vdup_n_u8(0);
-
-    for (; rp < rp_stop; rp += 16, pp += 16)
-    {
-        uint32x2x4_t vtmp;
-        uint8x8x4_t *vrpt, *vppt;
-        uint8x8x4_t vrp, vpp;
-        uint32x2x4_t *temp_pointer;
-        uint32x2x4_t vdest_val;
-
-        vtmp = vld4_u32(png_ptr(uint32_t,rp));
-        vrpt = png_ptr(uint8x8x4_t,&vtmp);
-        vrp = *vrpt;
-        vtmp = vld4_u32(png_ptrc(uint32_t,pp));
-        vppt = png_ptr(uint8x8x4_t,&vtmp);
-        vpp = *vppt;
-
-        vdest.val[0] = paeth_arm(vdest.val[3], vpp.val[0], vlast);
-        vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
-        vdest.val[1] = paeth_arm(vdest.val[0], vpp.val[1], vpp.val[0]);
-        vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]);
-        vdest.val[2] = paeth_arm(vdest.val[1], vpp.val[2], vpp.val[1]);
-        vdest.val[2] = vadd_u8(vdest.val[2], vrp.val[2]);
-        vdest.val[3] = paeth_arm(vdest.val[2], vpp.val[3], vpp.val[2]);
-        vdest.val[3] = vadd_u8(vdest.val[3], vrp.val[3]);
-
-        vlast = vpp.val[3];
-
-        vdest_val = png_ldr(uint32x2x4_t, &vdest);
-        vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0);
-    }
-}
-
-/* NEON optimised palette expansion functions
- * Derived from palette_neon_intrinsics.c
- *
- * Copyright (c) 2018-2019 Cosmin Truta
- * Copyright (c) 2017-2018 Arm Holdings. All rights reserved.
- * Written by Richard Townsend <Richard.Townsend@arm.com>, February 2017.
- *
- * This code is derived from libpng source code.
- * For conditions of distribution and use, see the disclaimer
- * and license in this file.
- *
- * Related: https://developer.arm.com/documentation/101964/latest/Color-palette-expansion
- *
- * The functions were refactored to iterate forward.
- *
- */
-
-/* Expands a palettized row into RGBA8. */
-static uint32_t expand_palette_rgba8_neon(unsigned char *row, const unsigned char *scanline, const unsigned char *plte, uint32_t width)
-{
-    const uint32_t scanline_stride = 4;
-    const uint32_t row_stride = scanline_stride * 4;
-    const uint32_t count = width / scanline_stride;
-    const uint32_t *palette = (const uint32_t*)plte;
-
-    if(!count) return 0;
-
-    uint32_t i;
-    uint32x4_t cur;
-    for(i=0; i < count; i++, scanline += scanline_stride)
-    {
-        cur = vld1q_dup_u32 (palette + scanline[0]);
-        cur = vld1q_lane_u32(palette + scanline[1], cur, 1);
-        cur = vld1q_lane_u32(palette + scanline[2], cur, 2);
-        cur = vld1q_lane_u32(palette + scanline[3], cur, 3);
-        vst1q_u32((uint32_t*)(row + i * row_stride), cur);
-    }
-
-    return count * scanline_stride;
-}
-
-/* Expands a palettized row into RGB8. */
-static uint32_t expand_palette_rgb8_neon(unsigned char *row, const unsigned char *scanline, const unsigned char *plte, uint32_t width)
-{
-    const uint32_t scanline_stride = 8;
-    const uint32_t row_stride = scanline_stride * 3;
-    const uint32_t count = width / scanline_stride;
-
-    if(!count) return 0;
-
-    uint32_t i;
-    uint8x8x3_t cur;
-    for(i=0; i < count; i++, scanline += scanline_stride)
-    {
-        cur = vld3_dup_u8 (plte + 3 * scanline[0]);
-        cur = vld3_lane_u8(plte + 3 * scanline[1], cur, 1);
-        cur = vld3_lane_u8(plte + 3 * scanline[2], cur, 2);
-        cur = vld3_lane_u8(plte + 3 * scanline[3], cur, 3);
-        cur = vld3_lane_u8(plte + 3 * scanline[4], cur, 4);
-        cur = vld3_lane_u8(plte + 3 * scanline[5], cur, 5);
-        cur = vld3_lane_u8(plte + 3 * scanline[6], cur, 6);
-        cur = vld3_lane_u8(plte + 3 * scanline[7], cur, 7);
-        vst3_u8(row + i * row_stride, cur);
-    }
-
-    return count * scanline_stride;
-}
-
-#endif /* SPNG_ARM */
diff --git a/cmake/externals/libspng/spng.h b/cmake/externals/libspng/spng.h
deleted file mode 100644
index 8f946337b..000000000
--- a/cmake/externals/libspng/spng.h
+++ /dev/null
@@ -1,537 +0,0 @@
-/* SPDX-License-Identifier: BSD-2-Clause */
-#ifndef SPNG_H
-#define SPNG_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(SPNG_STATIC)
-    #if defined(SPNG__BUILD)
-        #define SPNG_API __declspec(dllexport)
-    #else
-        #define SPNG_API __declspec(dllimport)
-    #endif
-#else
-    #define SPNG_API
-#endif
-
-#if defined(_MSC_VER)
-    #define SPNG_CDECL __cdecl
-#else
-    #define SPNG_CDECL
-#endif
-
-#include <stdlib.h>
-#include <stdint.h>
-#include <stdio.h>
-
-#define SPNG_VERSION_MAJOR 0
-#define SPNG_VERSION_MINOR 7
-#define SPNG_VERSION_PATCH 4
-
-enum spng_errno
-{
-    SPNG_IO_ERROR = -2,
-    SPNG_IO_EOF = -1,
-    SPNG_OK = 0,
-    SPNG_EINVAL,
-    SPNG_EMEM,
-    SPNG_EOVERFLOW,
-    SPNG_ESIGNATURE,
-    SPNG_EWIDTH,
-    SPNG_EHEIGHT,
-    SPNG_EUSER_WIDTH,
-    SPNG_EUSER_HEIGHT,
-    SPNG_EBIT_DEPTH,
-    SPNG_ECOLOR_TYPE,
-    SPNG_ECOMPRESSION_METHOD,
-    SPNG_EFILTER_METHOD,
-    SPNG_EINTERLACE_METHOD,
-    SPNG_EIHDR_SIZE,
-    SPNG_ENOIHDR,
-    SPNG_ECHUNK_POS,
-    SPNG_ECHUNK_SIZE,
-    SPNG_ECHUNK_CRC,
-    SPNG_ECHUNK_TYPE,
-    SPNG_ECHUNK_UNKNOWN_CRITICAL,
-    SPNG_EDUP_PLTE,
-    SPNG_EDUP_CHRM,
-    SPNG_EDUP_GAMA,
-    SPNG_EDUP_ICCP,
-    SPNG_EDUP_SBIT,
-    SPNG_EDUP_SRGB,
-    SPNG_EDUP_BKGD,
-    SPNG_EDUP_HIST,
-    SPNG_EDUP_TRNS,
-    SPNG_EDUP_PHYS,
-    SPNG_EDUP_TIME,
-    SPNG_EDUP_OFFS,
-    SPNG_EDUP_EXIF,
-    SPNG_ECHRM,
-    SPNG_EPLTE_IDX,
-    SPNG_ETRNS_COLOR_TYPE,
-    SPNG_ETRNS_NO_PLTE,
-    SPNG_EGAMA,
-    SPNG_EICCP_NAME,
-    SPNG_EICCP_COMPRESSION_METHOD,
-    SPNG_ESBIT,
-    SPNG_ESRGB,
-    SPNG_ETEXT,
-    SPNG_ETEXT_KEYWORD,
-    SPNG_EZTXT,
-    SPNG_EZTXT_COMPRESSION_METHOD,
-    SPNG_EITXT,
-    SPNG_EITXT_COMPRESSION_FLAG,
-    SPNG_EITXT_COMPRESSION_METHOD,
-    SPNG_EITXT_LANG_TAG,
-    SPNG_EITXT_TRANSLATED_KEY,
-    SPNG_EBKGD_NO_PLTE,
-    SPNG_EBKGD_PLTE_IDX,
-    SPNG_EHIST_NO_PLTE,
-    SPNG_EPHYS,
-    SPNG_ESPLT_NAME,
-    SPNG_ESPLT_DUP_NAME,
-    SPNG_ESPLT_DEPTH,
-    SPNG_ETIME,
-    SPNG_EOFFS,
-    SPNG_EEXIF,
-    SPNG_EIDAT_TOO_SHORT,
-    SPNG_EIDAT_STREAM,
-    SPNG_EZLIB,
-    SPNG_EFILTER,
-    SPNG_EBUFSIZ,
-    SPNG_EIO,
-    SPNG_EOF,
-    SPNG_EBUF_SET,
-    SPNG_EBADSTATE,
-    SPNG_EFMT,
-    SPNG_EFLAGS,
-    SPNG_ECHUNKAVAIL,
-    SPNG_ENCODE_ONLY,
-    SPNG_EOI,
-    SPNG_ENOPLTE,
-    SPNG_ECHUNK_LIMITS,
-    SPNG_EZLIB_INIT,
-    SPNG_ECHUNK_STDLEN,
-    SPNG_EINTERNAL,
-    SPNG_ECTXTYPE,
-    SPNG_ENOSRC,
-    SPNG_ENODST,
-    SPNG_EOPSTATE,
-    SPNG_ENOTFINAL,
-};
-
-enum spng_text_type
-{
-    SPNG_TEXT = 1,
-    SPNG_ZTXT = 2,
-    SPNG_ITXT = 3
-};
-
-enum spng_color_type
-{
-    SPNG_COLOR_TYPE_GRAYSCALE = 0,
-    SPNG_COLOR_TYPE_TRUECOLOR = 2,
-    SPNG_COLOR_TYPE_INDEXED = 3,
-    SPNG_COLOR_TYPE_GRAYSCALE_ALPHA = 4,
-    SPNG_COLOR_TYPE_TRUECOLOR_ALPHA = 6
-};
-
-enum spng_filter
-{
-    SPNG_FILTER_NONE = 0,
-    SPNG_FILTER_SUB = 1,
-    SPNG_FILTER_UP = 2,
-    SPNG_FILTER_AVERAGE = 3,
-    SPNG_FILTER_PAETH = 4
-};
-
-enum spng_filter_choice
-{
-    SPNG_DISABLE_FILTERING = 0,
-    SPNG_FILTER_CHOICE_NONE = 8,
-    SPNG_FILTER_CHOICE_SUB = 16,
-    SPNG_FILTER_CHOICE_UP = 32,
-    SPNG_FILTER_CHOICE_AVG = 64,
-    SPNG_FILTER_CHOICE_PAETH = 128,
-    SPNG_FILTER_CHOICE_ALL = (8|16|32|64|128)
-};
-
-enum spng_interlace_method
-{
-    SPNG_INTERLACE_NONE = 0,
-    SPNG_INTERLACE_ADAM7 = 1
-};
-
-/* Channels are always in byte-order */
-enum spng_format
-{
-    SPNG_FMT_RGBA8 = 1,
-    SPNG_FMT_RGBA16 = 2,
-    SPNG_FMT_RGB8 = 4,
-
-    /* Partially implemented, see documentation */
-    SPNG_FMT_GA8 = 16,
-    SPNG_FMT_GA16 = 32,
-    SPNG_FMT_G8 = 64,
-
-    /* No conversion or scaling */
-    SPNG_FMT_PNG = 256,
-    SPNG_FMT_RAW = 512  /* big-endian (everything else is host-endian) */
-};
-
-enum spng_ctx_flags
-{
-    SPNG_CTX_IGNORE_ADLER32 = 1, /* Ignore checksum in DEFLATE streams */
-    SPNG_CTX_ENCODER = 2 /* Create an encoder context */
-};
-
-enum spng_decode_flags
-{
-    SPNG_DECODE_USE_TRNS = 1, /* Deprecated */
-    SPNG_DECODE_USE_GAMA = 2, /* Deprecated */
-    SPNG_DECODE_USE_SBIT = 8, /* Undocumented */
-
-    SPNG_DECODE_TRNS = 1, /* Apply transparency */
-    SPNG_DECODE_GAMMA = 2, /* Apply gamma correction */
-    SPNG_DECODE_PROGRESSIVE = 256 /* Initialize for progressive reads */
-};
-
-enum spng_crc_action
-{
-    /* Default for critical chunks */
-    SPNG_CRC_ERROR = 0,
-
-    /* Discard chunk, invalid for critical chunks.
-       Since v0.6.2: default for ancillary chunks */
-    SPNG_CRC_DISCARD = 1,
-
-    /* Ignore and don't calculate checksum.
-       Since v0.6.2: also ignores checksums in DEFLATE streams */
-    SPNG_CRC_USE = 2
-};
-
-enum spng_encode_flags
-{
-    SPNG_ENCODE_PROGRESSIVE = 1, /* Initialize for progressive writes */
-    SPNG_ENCODE_FINALIZE = 2, /* Finalize PNG after encoding image */
-};
-
-struct spng_ihdr
-{
-    uint32_t width;
-    uint32_t height;
-    uint8_t bit_depth;
-    uint8_t color_type;
-    uint8_t compression_method;
-    uint8_t filter_method;
-    uint8_t interlace_method;
-};
-
-struct spng_plte_entry
-{
-    uint8_t red;
-    uint8_t green;
-    uint8_t blue;
-
-    uint8_t alpha; /* Reserved for internal use */
-};
-
-struct spng_plte
-{
-    uint32_t n_entries;
-    struct spng_plte_entry entries[256];
-};
-
-struct spng_trns
-{
-    uint16_t gray;
-
-    uint16_t red;
-    uint16_t green;
-    uint16_t blue;
-
-    uint32_t n_type3_entries;
-    uint8_t type3_alpha[256];
-};
-
-struct spng_chrm_int
-{
-    uint32_t white_point_x;
-    uint32_t white_point_y;
-    uint32_t red_x;
-    uint32_t red_y;
-    uint32_t green_x;
-    uint32_t green_y;
-    uint32_t blue_x;
-    uint32_t blue_y;
-};
-
-struct spng_chrm
-{
-    double white_point_x;
-    double white_point_y;
-    double red_x;
-    double red_y;
-    double green_x;
-    double green_y;
-    double blue_x;
-    double blue_y;
-};
-
-struct spng_iccp
-{
-    char profile_name[80];
-    size_t profile_len;
-    char *profile;
-};
-
-struct spng_sbit
-{
-    uint8_t grayscale_bits;
-    uint8_t red_bits;
-    uint8_t green_bits;
-    uint8_t blue_bits;
-    uint8_t alpha_bits;
-};
-
-struct spng_text
-{
-    char keyword[80];
-    int type;
-
-    size_t length;
-    char *text;
-
-    uint8_t compression_flag; /* iTXt only */
-    uint8_t compression_method; /* iTXt, ztXt only */
-    char *language_tag; /* iTXt only */
-    char *translated_keyword; /* iTXt only */
-};
-
-struct spng_bkgd
-{
-    uint16_t gray; /* Only for gray/gray alpha */
-    uint16_t red;
-    uint16_t green;
-    uint16_t blue;
-    uint16_t plte_index; /* Only for indexed color */
-};
-
-struct spng_hist
-{
-    uint16_t frequency[256];
-};
-
-struct spng_phys
-{
-    uint32_t ppu_x, ppu_y;
-    uint8_t unit_specifier;
-};
-
-struct spng_splt_entry
-{
-    uint16_t red;
-    uint16_t green;
-    uint16_t blue;
-    uint16_t alpha;
-    uint16_t frequency;
-};
-
-struct spng_splt
-{
-    char name[80];
-    uint8_t sample_depth;
-    uint32_t n_entries;
-    struct spng_splt_entry *entries;
-};
-
-struct spng_time
-{
-    uint16_t year;
-    uint8_t month;
-    uint8_t day;
-    uint8_t hour;
-    uint8_t minute;
-    uint8_t second;
-};
-
-struct spng_offs
-{
-    int32_t x, y;
-    uint8_t unit_specifier;
-};
-
-struct spng_exif
-{
-    size_t length;
-    char *data;
-};
-
-struct spng_chunk
-{
-    size_t offset;
-    uint32_t length;
-    uint8_t type[4];
-    uint32_t crc;
-};
-
-enum spng_location
-{
-    SPNG_AFTER_IHDR = 1,
-    SPNG_AFTER_PLTE = 2,
-    SPNG_AFTER_IDAT = 8,
-};
-
-struct spng_unknown_chunk
-{
-    uint8_t type[4];
-    size_t length;
-    void *data;
-    enum spng_location location;
-};
-
-enum spng_option
-{
-    SPNG_KEEP_UNKNOWN_CHUNKS = 1,
-
-    SPNG_IMG_COMPRESSION_LEVEL,
-    SPNG_IMG_WINDOW_BITS,
-    SPNG_IMG_MEM_LEVEL,
-    SPNG_IMG_COMPRESSION_STRATEGY,
-
-    SPNG_TEXT_COMPRESSION_LEVEL,
-    SPNG_TEXT_WINDOW_BITS,
-    SPNG_TEXT_MEM_LEVEL,
-    SPNG_TEXT_COMPRESSION_STRATEGY,
-
-    SPNG_FILTER_CHOICE,
-    SPNG_CHUNK_COUNT_LIMIT,
-    SPNG_ENCODE_TO_BUFFER,
-};
-
-typedef void* SPNG_CDECL spng_malloc_fn(size_t size);
-typedef void* SPNG_CDECL spng_realloc_fn(void* ptr, size_t size);
-typedef void* SPNG_CDECL spng_calloc_fn(size_t count, size_t size);
-typedef void SPNG_CDECL spng_free_fn(void* ptr);
-
-struct spng_alloc
-{
-    spng_malloc_fn *malloc_fn;
-    spng_realloc_fn *realloc_fn;
-    spng_calloc_fn *calloc_fn;
-    spng_free_fn *free_fn;
-};
-
-struct spng_row_info
-{
-    uint32_t scanline_idx;
-    uint32_t row_num; /* deinterlaced row index */
-    int pass;
-    uint8_t filter;
-};
-
-typedef struct spng_ctx spng_ctx;
-
-typedef int spng_read_fn(spng_ctx *ctx, void *user, void *dest, size_t length);
-typedef int spng_write_fn(spng_ctx *ctx, void *user, void *src, size_t length);
-
-typedef int spng_rw_fn(spng_ctx *ctx, void *user, void *dst_src, size_t length);
-
-SPNG_API spng_ctx *spng_ctx_new(int flags);
-SPNG_API spng_ctx *spng_ctx_new2(struct spng_alloc *alloc, int flags);
-SPNG_API void spng_ctx_free(spng_ctx *ctx);
-
-SPNG_API int spng_set_png_buffer(spng_ctx *ctx, const void *buf, size_t size);
-SPNG_API int spng_set_png_stream(spng_ctx *ctx, spng_rw_fn *rw_func, void *user);
-SPNG_API int spng_set_png_file(spng_ctx *ctx, FILE *file);
-
-SPNG_API void *spng_get_png_buffer(spng_ctx *ctx, size_t *len, int *error);
-
-SPNG_API int spng_set_image_limits(spng_ctx *ctx, uint32_t width, uint32_t height);
-SPNG_API int spng_get_image_limits(spng_ctx *ctx, uint32_t *width, uint32_t *height);
-
-SPNG_API int spng_set_chunk_limits(spng_ctx *ctx, size_t chunk_size, size_t cache_size);
-SPNG_API int spng_get_chunk_limits(spng_ctx *ctx, size_t *chunk_size, size_t *cache_size);
-
-SPNG_API int spng_set_crc_action(spng_ctx *ctx, int critical, int ancillary);
-
-SPNG_API int spng_set_option(spng_ctx *ctx, enum spng_option option, int value);
-SPNG_API int spng_get_option(spng_ctx *ctx, enum spng_option option, int *value);
-
-SPNG_API int spng_decoded_image_size(spng_ctx *ctx, int fmt, size_t *len);
-
-/* Decode */
-SPNG_API int spng_decode_image(spng_ctx *ctx, void *out, size_t len, int fmt, int flags);
-
-/* Progressive decode */
-SPNG_API int spng_decode_scanline(spng_ctx *ctx, void *out, size_t len);
-SPNG_API int spng_decode_row(spng_ctx *ctx, void *out, size_t len);
-SPNG_API int spng_decode_chunks(spng_ctx *ctx);
-
-/* Encode/decode */
-SPNG_API int spng_get_row_info(spng_ctx *ctx, struct spng_row_info *row_info);
-
-/* Encode */
-SPNG_API int spng_encode_image(spng_ctx *ctx, const void *img, size_t len, int fmt, int flags);
-
-/* Progressive encode */
-SPNG_API int spng_encode_scanline(spng_ctx *ctx, const void *scanline, size_t len);
-SPNG_API int spng_encode_row(spng_ctx *ctx, const void *row, size_t len);
-SPNG_API int spng_encode_chunks(spng_ctx *ctx);
-
-SPNG_API int spng_get_ihdr(spng_ctx *ctx, struct spng_ihdr *ihdr);
-SPNG_API int spng_get_plte(spng_ctx *ctx, struct spng_plte *plte);
-SPNG_API int spng_get_trns(spng_ctx *ctx, struct spng_trns *trns);
-SPNG_API int spng_get_chrm(spng_ctx *ctx, struct spng_chrm *chrm);
-SPNG_API int spng_get_chrm_int(spng_ctx *ctx, struct spng_chrm_int *chrm_int);
-SPNG_API int spng_get_gama(spng_ctx *ctx, double *gamma);
-SPNG_API int spng_get_gama_int(spng_ctx *ctx, uint32_t *gama_int);
-SPNG_API int spng_get_iccp(spng_ctx *ctx, struct spng_iccp *iccp);
-SPNG_API int spng_get_sbit(spng_ctx *ctx, struct spng_sbit *sbit);
-SPNG_API int spng_get_srgb(spng_ctx *ctx, uint8_t *rendering_intent);
-SPNG_API int spng_get_text(spng_ctx *ctx, struct spng_text *text, uint32_t *n_text);
-SPNG_API int spng_get_bkgd(spng_ctx *ctx, struct spng_bkgd *bkgd);
-SPNG_API int spng_get_hist(spng_ctx *ctx, struct spng_hist *hist);
-SPNG_API int spng_get_phys(spng_ctx *ctx, struct spng_phys *phys);
-SPNG_API int spng_get_splt(spng_ctx *ctx, struct spng_splt *splt, uint32_t *n_splt);
-SPNG_API int spng_get_time(spng_ctx *ctx, struct spng_time *time);
-SPNG_API int spng_get_unknown_chunks(spng_ctx *ctx, struct spng_unknown_chunk *chunks, uint32_t *n_chunks);
-
-/* Official extensions */
-SPNG_API int spng_get_offs(spng_ctx *ctx, struct spng_offs *offs);
-SPNG_API int spng_get_exif(spng_ctx *ctx, struct spng_exif *exif);
-
-
-SPNG_API int spng_set_ihdr(spng_ctx *ctx, struct spng_ihdr *ihdr);
-SPNG_API int spng_set_plte(spng_ctx *ctx, struct spng_plte *plte);
-SPNG_API int spng_set_trns(spng_ctx *ctx, struct spng_trns *trns);
-SPNG_API int spng_set_chrm(spng_ctx *ctx, struct spng_chrm *chrm);
-SPNG_API int spng_set_chrm_int(spng_ctx *ctx, struct spng_chrm_int *chrm_int);
-SPNG_API int spng_set_gama(spng_ctx *ctx, double gamma);
-SPNG_API int spng_set_gama_int(spng_ctx *ctx, uint32_t gamma);
-SPNG_API int spng_set_iccp(spng_ctx *ctx, struct spng_iccp *iccp);
-SPNG_API int spng_set_sbit(spng_ctx *ctx, struct spng_sbit *sbit);
-SPNG_API int spng_set_srgb(spng_ctx *ctx, uint8_t rendering_intent);
-SPNG_API int spng_set_text(spng_ctx *ctx, struct spng_text *text, uint32_t n_text);
-SPNG_API int spng_set_bkgd(spng_ctx *ctx, struct spng_bkgd *bkgd);
-SPNG_API int spng_set_hist(spng_ctx *ctx, struct spng_hist *hist);
-SPNG_API int spng_set_phys(spng_ctx *ctx, struct spng_phys *phys);
-SPNG_API int spng_set_splt(spng_ctx *ctx, struct spng_splt *splt, uint32_t n_splt);
-SPNG_API int spng_set_time(spng_ctx *ctx, struct spng_time *time);
-SPNG_API int spng_set_unknown_chunks(spng_ctx *ctx, struct spng_unknown_chunk *chunks, uint32_t n_chunks);
-
-/* Official extensions */
-SPNG_API int spng_set_offs(spng_ctx *ctx, struct spng_offs *offs);
-SPNG_API int spng_set_exif(spng_ctx *ctx, struct spng_exif *exif);
-
-
-SPNG_API const char *spng_strerror(int err);
-SPNG_API const char *spng_version_string(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* SPNG_H */
diff --git a/cmake/externals/opencv.cmake b/cmake/externals/opencv.cmake
index e3fa7e904..7dc49e6fb 100644
--- a/cmake/externals/opencv.cmake
+++ b/cmake/externals/opencv.cmake
@@ -1,16 +1,171 @@
+set(BUILD_ANDROID_EXAMPLES      OFF CACHE INTERNAL "")
+set(BUILD_ANDROID_PROJECTS      OFF CACHE INTERNAL "")
+set(BUILD_ANDROID_SERVICE       OFF CACHE INTERNAL "")
+set(BUILD_DOCS                  OFF CACHE INTERNAL "")
+set(BUILD_FAT_JAVA_LIB          OFF CACHE INTERNAL "")
+set(BUILD_IPP_IW                OFF CACHE INTERNAL "")
+set(BUILD_ITT                   OFF CACHE INTERNAL "")
+set(BUILD_JASPER                OFF CACHE INTERNAL "")
+set(BUILD_JAVA                  OFF CACHE INTERNAL "")
+set(BUILD_JPEG                  OFF CACHE INTERNAL "")
+set(BUILD_OBJC                  OFF CACHE INTERNAL "")
+set(BUILD_OPENJPEG              OFF CACHE INTERNAL "")
+set(BUILD_PNG                   OFF CACHE INTERNAL "")
+set(BUILD_opencv_apps           OFF CACHE INTERNAL "")
+set(BUILD_opencv_calib3d        OFF CACHE INTERNAL "")
+set(BUILD_opencv_dnn            OFF CACHE INTERNAL "")
+set(BUILD_opencv_features2d     OFF CACHE INTERNAL "")
+set(BUILD_opencv_flann          OFF CACHE INTERNAL "")
+set(BUILD_opencv_gapi           OFF CACHE INTERNAL "")
+set(BUILD_opencv_highgui        OFF CACHE INTERNAL "")
+set(BUILD_opencv_imgcodecs      OFF CACHE INTERNAL "")
+set(BUILD_opencv_java           OFF CACHE INTERNAL "")
+set(BUILD_opencv_js             OFF CACHE INTERNAL "")
+set(BUILD_opencv_ml             OFF CACHE INTERNAL "")
+set(BUILD_opencv_objc           OFF CACHE INTERNAL "")
+set(BUILD_opencv_objdetect      OFF CACHE INTERNAL "")
+set(BUILD_opencv_photo          OFF CACHE INTERNAL "")
+set(BUILD_opencv_python2        OFF CACHE INTERNAL "")
+set(BUILD_opencv_python3        OFF CACHE INTERNAL "")
+set(BUILD_opencv_stitching      OFF CACHE INTERNAL "")
+set(BUILD_opencv_ts             OFF CACHE INTERNAL "")
+set(BUILD_opencv_video          OFF CACHE INTERNAL "")
+set(BUILD_opencv_videoio        OFF CACHE INTERNAL "")
+set(BUILD_opencv_world          OFF CACHE INTERNAL "")
+set(BUILD_OPENEXR               OFF CACHE INTERNAL "")
+set(BUILD_TBB                   OFF CACHE INTERNAL "")
+set(BUILD_TIFF                  OFF CACHE INTERNAL "")
+set(BUILD_WEBP                  OFF CACHE INTERNAL "")
+set(BUILD_WITH_STATIC_CRT       OFF CACHE INTERNAL "")
+if(OCOS_BUILD_APPLE_FRAMEWORK)
+  # tell OpenCV to build zlib so we can link to the static library
+  set(BUILD_ZLIB                ON  CACHE INTERNAL "")
+else()
+  set(BUILD_ZLIB                OFF CACHE INTERNAL "")
+endif()
+set(ENABLE_FAST_MATH            OFF CACHE INTERNAL "")
+set(ENABLE_PRECOMPILED_HEADERS  OFF CACHE INTERNAL "")
+set(WITH_ANDROID_MEDIANDK       OFF CACHE INTERNAL "")
+set(WITH_AVFOUNDATION           OFF CACHE INTERNAL "")
+set(WITH_CAP_IOS                OFF CACHE INTERNAL "")
+set(WITH_CAROTENE               OFF CACHE INTERNAL "")
+set(WITH_CLP                    OFF CACHE INTERNAL "")
+set(WITH_CPUFEATURES            OFF CACHE INTERNAL "")
+set(WITH_DIRECTX                OFF CACHE INTERNAL "")
+set(WITH_DSHOW                  OFF CACHE INTERNAL "")
+set(WITH_EIGEN                  OFF CACHE INTERNAL "")
+set(WITH_FFMPEG                 OFF CACHE INTERNAL "")
+set(WITH_GDCM                   OFF CACHE INTERNAL "")
+set(WITH_GSTREAMER              OFF CACHE INTERNAL "")
+set(WITH_GTK                    OFF CACHE INTERNAL "")
+set(WITH_HALIDE                 OFF CACHE INTERNAL "")
+set(WITH_HPX                    OFF CACHE INTERNAL "")
+set(WITH_IMGCODEC_HDR           OFF CACHE INTERNAL "")
+set(WITH_IMGCODEC_PFM           OFF CACHE INTERNAL "")
+set(WITH_IMGCODEC_PXM           OFF CACHE INTERNAL "")
+set(WITH_IMGCODEC_SUNRASTER     OFF CACHE INTERNAL "")
+set(WITH_INF_ENGINE             OFF CACHE INTERNAL "")
+set(WITH_IPP                    OFF CACHE INTERNAL "")
+set(WITH_ITT                    OFF CACHE INTERNAL "")
+set(WITH_JASPER                 OFF CACHE INTERNAL "")
+set(WITH_JPEG                   OFF CACHE INTERNAL "")
+set(WITH_MSMF                   OFF CACHE INTERNAL "")
+set(WITH_NGRAPH                 OFF CACHE INTERNAL "")
+set(WITH_ONNX                   OFF CACHE INTERNAL "")
+set(WITH_OPENCL                 OFF CACHE INTERNAL "")
+set(WITH_OPENCL_SVM             OFF CACHE INTERNAL "")
+set(WITH_OPENEXR                OFF CACHE INTERNAL "")
+set(WITH_OPENJPEG               OFF CACHE INTERNAL "")
+set(WITH_OPENMP                 OFF CACHE INTERNAL "")
+set(WITH_OPENVX                 OFF CACHE INTERNAL "")
+set(WITH_PNG                    OFF CACHE INTERNAL "")
+set(WITH_PROTOBUF               OFF CACHE INTERNAL "")
+set(WITH_PTHREADS_PF            OFF CACHE INTERNAL "")
+set(WITH_QUIRC                  OFF CACHE INTERNAL "")
+set(WITH_TBB                    OFF CACHE INTERNAL "")
+set(WITH_TENGINE                OFF CACHE INTERNAL "")
+set(WITH_TIFF                   OFF CACHE INTERNAL "")
+set(WITH_V4L                    OFF CACHE INTERNAL "")
+set(WITH_VULKAN                 OFF CACHE INTERNAL "")
+set(WITH_WEBP                   OFF CACHE INTERNAL "")
+set(WITH_WIN32UI                OFF CACHE INTERNAL "")
+
 if (OCOS_ENABLE_OPENCV_CODECS)
-    # Include the subdirectories for libspng and libjpeg
-    add_subdirectory(${PROJECT_SOURCE_DIR}/cmake/externals/libspng)
-    add_subdirectory(${PROJECT_SOURCE_DIR}/cmake/externals/libjpeg)
+  set(BUILD_opencv_imgcodecs  ON CACHE INTERNAL "")
+
+  set(BUILD_JPEG              ON CACHE INTERNAL "")
+  set(BUILD_PNG               ON CACHE INTERNAL "")
+
+  set(WITH_JPEG               ON CACHE INTERNAL "")
+  set(WITH_PNG                ON CACHE INTERNAL "")
+endif()
+
+set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "")
+set(BUILD_DOCS        OFF CACHE INTERNAL "")
+set(BUILD_EXAMPLES    OFF CACHE INTERNAL "")
+set(BUILD_TESTS       OFF CACHE INTERNAL "")
+
+if(IOS)
+  # copy what OpenCV's platforms/ios/build_framework.py does and set CPU_BASELINE=DETECT
+  # https://github.com/opencv/opencv/blob/4223495e6cd67011f86b8ecd9be1fa105018f3b1/platforms/ios/build_framework.py#L253
+  set(CPU_BASELINE DETECT)
+endif()
+
+if (CMAKE_SYSTEM_NAME MATCHES "Darwin")
+  # error   _png_do_read_transformations in liblibpng.a(pngrtran.c.o) not found for architecture arm64
+  # workaround to disable NEON optimizations
+  add_definitions(-DPNG_ARM_NEON_IMPLEMENTATION=0)
+  add_definitions(-DPNG_ARM_NEON_OPT=0)
+endif()
 
-    # Specify where the static libraries are built
-    set(LIBRARY_OUTPUT_PATH ${CMAKE_CURRENT_BINARY_DIR}/cvout)
+if (MSVC AND CMAKE_GENERATOR_PLATFORM)
+  string(TOLOWER ${CMAKE_GENERATOR_PLATFORM} _GEN_PLATFORM)
+  if (${_GEN_PLATFORM} MATCHES "arm|arm64")
+    set(OPENCV_SKIP_SYSTEM_PROCESSOR_DETECTION ON)
+  endif()
+endif()
 
-    # Add the libraries
-    add_library(libspng STATIC IMPORTED)
-    add_library(libjpeg STATIC IMPORTED)
+FetchContent_Declare(
+    opencv
+    GIT_REPOSITORY https://github.com/opencv/opencv.git
+    SOURCE_DIR opencv_source
+    GIT_TAG        4.5.4
+    GIT_SHALLOW    TRUE
+    -DBUILD_DOCS:BOOL=FALSE
+    -DBUILD_EXAMPLES:BOOL=FALSE
+    -DBUILD_TESTS:BOOL=FALSE
+    -DBUILD_SHARED_LIBS:BOOL=FALSE
+    -DCMAKE_INSTALL_PREFIX:PATH=${CMAKE_CURRENT_BINARY_DIR}/opencv
+    -DCV_TRACE:BOOL=FALSE
+    PATCH_COMMAND git checkout . && git apply --whitespace=fix --ignore-space-change --ignore-whitespace ${CMAKE_CURRENT_SOURCE_DIR}/cmake/externals/opencv-no-rtti.patch
+)
+
+string(TOLOWER "opencv" lcName)
+if(NOT ${lcName}_POPULATED)
+  FetchContent_Populate(opencv)
+
+  # Set variablles to paths of libspng and libjpeg (we only these 2 folders for our vision operators in Extensions)
+  set(libspng ${opencv_source}/3rdparty/libspng/)
+  set(libjpeg ${opencv_source}/3rdparty/libjpeg/)
+
+  # Get a list of all files in the folders
+  file(GLOB_RECURSE LIBSPNG_FILES "${libspng}/*")
+  file(GLOB_RECURSE LIBJPEG_FILES "${libjpeg}/*")
+endif()
+
+set(opencv_INCLUDE_DIRS "")
+list(APPEND opencv_INCLUDE_DIRS ${OPENCV_CONFIG_FILE_INCLUDE_DIR})
+list(APPEND opencv_INCLUDE_DIRS
+    ${OPENCV_MODULE_opencv_core_LOCATION}/include
+    ${OPENCV_MODULE_opencv_imgproc_LOCATION}/include)
+set(opencv_LIBS "")
+list(APPEND opencv_LIBS opencv_core opencv_imgproc)
+
+if (OCOS_ENABLE_OPENCV_CODECS)
+    list(APPEND opencv_INCLUDE_DIRS ${OPENCV_MODULE_opencv_imgcodecs_LOCATION}/include)
+    list(APPEND opencv_LIBS opencv_imgcodecs)
 
-    # Set the properties for the libraries
-    set_property(TARGET libspng PROPERTY IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/cmake/externals/cvout/libspng.a)
-    set_property(TARGET libjpeg PROPERTY IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/cmake/externals/cvout/libjpeg.a)
-endif()
\ No newline at end of file
+    # Add libspng and libjpeg files to our build target
+    list(APPEND opencv_LIBS LIBSPNG_FILES)
+    list(APPEND opencv_LIBS LIBJPEG_FILES)
+endif()

From 065e329a4a5fffb6e958453bb06f5ca8aacb6097 Mon Sep 17 00:00:00 2001
From: Sayan Shaw <sayanshaw@microsoft.com>
Date: Wed, 15 May 2024 16:27:40 -0700
Subject: [PATCH 3/3] readd unset

---
 cmake/externals/opencv.cmake | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/cmake/externals/opencv.cmake b/cmake/externals/opencv.cmake
index ae1a9883c..470694b2c 100644
--- a/cmake/externals/opencv.cmake
+++ b/cmake/externals/opencv.cmake
@@ -171,3 +171,6 @@ if (OCOS_ENABLE_OPENCV_CODECS)
     list(APPEND opencv_LIBS LIBSPNG_FILES)
     list(APPEND opencv_LIBS LIBJPEG_FILES)
 endif()
+
+# unset it to avoid affecting other projects.
+unset(EXECUTABLE_OUTPUT_PATH CACHE)
\ No newline at end of file