diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3a300ff98d..c0f0a2d8f3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -173,6 +173,9 @@ target_include_directories(exiv2lib SYSTEM PRIVATE $ ) +target_include_directories(exiv2lib PRIVATE ${EXPAT_INCLUDE_DIR}) +target_link_libraries(exiv2lib PRIVATE EXPAT::EXPAT) + if (EXIV2_ENABLE_XMP) target_link_libraries(exiv2lib PRIVATE exiv2-xmp) elseif(EXIV2_ENABLE_EXTERNAL_XMP) diff --git a/src/xmp.cpp b/src/xmp.cpp index 2e0e9d9182..7146105cab 100644 --- a/src/xmp.cpp +++ b/src/xmp.cpp @@ -30,6 +30,7 @@ #include #include #include +#include // Adobe XMP Toolkit #ifdef EXV_HAVE_XMP_TOOLKIT @@ -42,6 +43,168 @@ # include #endif // EXV_HAVE_XMP_TOOLKIT +// This anonymous namespace contains a class named XMLValidator, which uses +// libexpat to do a basic validation check on an XML document. This is to +// reduce the chance of hitting a bug in the (third-party) xmpsdk +// library. For example, it is easy to a trigger a stack overflow in xmpsdk +// with a deeply nested tree. +namespace { + using namespace Exiv2; + + class XMLValidator { + size_t element_depth_ = 0; + size_t namespace_depth_ = 0; + + // These fields are used to record whether an error occurred during + // parsing. Why do we need to store the error for later, rather + // than throw an exception immediately? Because expat is a C + // library, so it isn't designed to be able to handle exceptions + // thrown by the callback functions. Throwing exceptions during + // parsing is an example of one of the things that xmpsdk does + // wrong, leading to problems like https://github.com/Exiv2/exiv2/issues/1821. + bool haserror_ = false; + std::string errmsg_; + XML_Size errlinenum_ = 0; + XML_Size errcolnum_ = 0; + + // Very deeply nested XML trees can cause a stack overflow in + // xmpsdk. They are also very unlikely to be valid XMP, so we + // error out if the depth exceeds this limit. + static const size_t max_recursion_limit_ = 1000; + + const XML_Parser parser_; + + public: + // Runs an XML parser on `buf`. Throws an exception if the XML is invalid. + static void check(const char* buf, size_t buflen) { + XMLValidator validator; + validator.check_internal(buf, buflen); + } + + private: + // Private constructor, because this class is only constructed by + // the (static) check method. + XMLValidator() : parser_(XML_ParserCreateNS(0, '@')) { + if (!parser_) { + throw Error(kerXMPToolkitError, "Could not create expat parser"); + } + } + + ~XMLValidator() { + XML_ParserFree(parser_); + } + + void setError(const char* msg) { + const XML_Size errlinenum = XML_GetCurrentLineNumber(parser_); + const XML_Size errcolnum = XML_GetCurrentColumnNumber(parser_); +#ifndef SUPPRESS_WARNINGS + EXV_INFO << "Invalid XML at line " << errlinenum + << ", column " << errcolnum + << ": " << msg << "\n"; +#endif + // If this is the first error, then save it. + if (!haserror_) { + haserror_ = true; + errmsg_ = msg; + errlinenum_ = errlinenum; + errcolnum_ = errcolnum; + } + } + + void check_internal(const char* buf, size_t buflen) { + if (buflen > static_cast(std::numeric_limits::max())) { + throw Error(kerXMPToolkitError, "Buffer length is greater than INT_MAX"); + } + + XML_SetUserData(parser_, this); + XML_SetElementHandler(parser_, startElement_cb, endElement_cb); + XML_SetNamespaceDeclHandler(parser_, startNamespace_cb, endNamespace_cb); + XML_SetStartDoctypeDeclHandler(parser_, startDTD_cb); + + const XML_Status result = XML_Parse(parser_, buf, static_cast(buflen), true); + if (result == XML_STATUS_ERROR) { + setError(XML_ErrorString(XML_GetErrorCode(parser_))); + } + + if (haserror_) { + throw XMP_Error(kXMPErr_BadXML, "Error in XMLValidator"); + } + } + + void startElement(const XML_Char*, const XML_Char**) noexcept { + if (element_depth_ > max_recursion_limit_) { + setError("Too deeply nested"); + } + ++element_depth_; + } + + void endElement(const XML_Char*) noexcept { + if (element_depth_ > 0) { + --element_depth_; + } else { + setError("Negative depth"); + } + } + + void startNamespace(const XML_Char*, const XML_Char*) noexcept { + if (namespace_depth_ > max_recursion_limit_) { + setError("Too deeply nested"); + } + ++namespace_depth_; + } + + void endNamespace(const XML_Char*) noexcept { + if (namespace_depth_ > 0) { + --namespace_depth_; + } else { + setError("Negative depth"); + } + } + + void startDTD(const XML_Char*, const XML_Char*, const XML_Char*, int) noexcept { + // DOCTYPE is used for XXE attacks. + setError("DOCTYPE not supported"); + } + + // This callback function is called by libexpat. It's a static wrapper + // around startElement(). + static void XMLCALL startElement_cb( + void* userData, const XML_Char* name, const XML_Char* *attrs + ) noexcept { + static_cast(userData)->startElement(name, attrs); + } + + // This callback function is called by libexpat. It's a static wrapper + // around endElement(). + static void XMLCALL endElement_cb(void* userData, const XML_Char* name) noexcept { + static_cast(userData)->endElement(name); + } + + // This callback function is called by libexpat. It's a static wrapper + // around startNamespace(). + static void XMLCALL startNamespace_cb( + void* userData, const XML_Char* prefix, const XML_Char* uri + ) noexcept { + static_cast(userData)->startNamespace(prefix, uri); + } + + // This callback function is called by libexpat. It's a static wrapper + // around endNamespace(). + static void XMLCALL endNamespace_cb(void* userData, const XML_Char* prefix) noexcept { + static_cast(userData)->endNamespace(prefix); + } + + static void XMLCALL startDTD_cb( + void *userData, const XML_Char *doctypeName, const XML_Char *sysid, + const XML_Char *pubid, int has_internal_subset + ) noexcept { + static_cast(userData)->startDTD( + doctypeName, sysid, pubid, has_internal_subset); + } + }; +} // namespace + + // ***************************************************************************** // local declarations namespace { @@ -601,6 +764,7 @@ namespace Exiv2 { return 2; } + XMLValidator::check(xmpPacket.data(), xmpPacket.size()); SXMPMeta meta(xmpPacket.data(), static_cast(xmpPacket.size())); SXMPIterator iter(meta); std::string schemaNs, propPath, propValue; diff --git a/tests/bugfixes/github/test_CVE_2017_14857.py b/tests/bugfixes/github/test_CVE_2017_14857.py index f7c0fb2abd..f31bfd2d9f 100644 --- a/tests/bugfixes/github/test_CVE_2017_14857.py +++ b/tests/bugfixes/github/test_CVE_2017_14857.py @@ -52,7 +52,7 @@ class TestCvePoC(metaclass=system_tests.CaseMeta): Error: Offset of directory Image, entry 0x0132 is out of bounds: Offset = 0x30003030; truncating the entry Error: Directory Image, entry 0x8649 has invalid size 4294967295*1; skipping entry. Error: Directory Image, entry 0x8769 Sub-IFD pointer 0 is out of bounds; ignoring it. -Error: XMP Toolkit error 201: XML parsing failure +Error: XMP Toolkit error 201: Error in XMLValidator Warning: Failed to decode XMP metadata. """ ] diff --git a/tests/bugfixes/github/test_CVE_2017_14858.py b/tests/bugfixes/github/test_CVE_2017_14858.py index 52610530ba..17c97b5002 100644 --- a/tests/bugfixes/github/test_CVE_2017_14858.py +++ b/tests/bugfixes/github/test_CVE_2017_14858.py @@ -42,7 +42,7 @@ class TestCvePoC(metaclass=system_tests.CaseMeta): Warning: Directory Image, entry 0x0111: Strip 17 is outside of the data area; ignored. Error: Directory Photo with 8224 entries considered invalid; not read. Warning: Removing 913 characters from the beginning of the XMP packet -Error: XMP Toolkit error 201: XML parsing failure +Error: XMP Toolkit error 201: Error in XMLValidator Warning: Failed to decode XMP metadata. """ ] diff --git a/tests/bugfixes/github/test_coverage_xmpsidecar_isXmpType.py b/tests/bugfixes/github/test_coverage_xmpsidecar_isXmpType.py index 24123b2059..3aa44ccd73 100644 --- a/tests/bugfixes/github/test_coverage_xmpsidecar_isXmpType.py +++ b/tests/bugfixes/github/test_coverage_xmpsidecar_isXmpType.py @@ -11,7 +11,7 @@ class coverage_xmpsidecar_isXmpType(metaclass=CaseMeta): filename = path("$data_path/coverage_xmpsidecar_isXmpType.xmp") commands = ["$exiv2 $filename"] - stderr = ["""Error: XMP Toolkit error 201: XML parsing failure + stderr = ["""Error: XMP Toolkit error 201: Error in XMLValidator Warning: Failed to decode XMP metadata. $filename: No Exif data found in the file """] diff --git a/tests/bugfixes/github/test_issue_1713.py b/tests/bugfixes/github/test_issue_1713.py index 00fd51b57f..bd39e0058a 100644 --- a/tests/bugfixes/github/test_issue_1713.py +++ b/tests/bugfixes/github/test_issue_1713.py @@ -14,12 +14,11 @@ class InvalidDateXMP(metaclass=CaseMeta): commands = ["$exiv2 -Ph $filename"] stderr = [ -"""Warning: Failed to convert Xmp.xmp.CreateDate to Exif.Photo.DateTimeDigitized (Day is out of range) -Exiv2 exception in print action for file $filename: -Xmpdatum::copy: Not supported +"""Error: XMP Toolkit error 201: Error in XMLValidator +Warning: Failed to decode XMP metadata. """ ] - retval = [1] + retval = [0] def compare_stdout(self, i, command, got_stdout, expected_stdout): """ We don't care about the stdout, just don't crash """ diff --git a/tests/bugfixes/github/test_issue_1819.py b/tests/bugfixes/github/test_issue_1819.py index a2e5d3e681..10f2500cfe 100644 --- a/tests/bugfixes/github/test_issue_1819.py +++ b/tests/bugfixes/github/test_issue_1819.py @@ -15,27 +15,7 @@ class EmptyStringXmpTextValueRead(metaclass=CaseMeta): File size : 1088 Bytes MIME type : application/rdf+xml Image size : 0 x 0 -Thumbnail : None -Camera make : -Camera model : -Image timestamp : -File number : -Exposure time : -Aperture : -Exposure bias : -Flash : -Flash bias : -Focal length : -Subject distance: -ISO speed : -Exposure mode : -Metering mode : -Macro mode : -Image quality : -White balance : -Copyright : -Exif comment : - """] - stderr = [""] - retval = [0] + stderr = ["""$filename: No Exif data found in the file +"""] + retval = [253] diff --git a/tests/bugfixes/github/test_issue_428.py b/tests/bugfixes/github/test_issue_428.py index 24cddd2c9f..9e87562175 100644 --- a/tests/bugfixes/github/test_issue_428.py +++ b/tests/bugfixes/github/test_issue_428.py @@ -29,7 +29,7 @@ class PngReadRawProfile(metaclass=system_tests.CaseMeta): stderr.append("""$exiv2_exception_message """ + filenames[5] + """: $kerInputDataReadFailed """) - stderr.append("""Error: XMP Toolkit error 201: XML parsing failure + stderr.append("""Error: XMP Toolkit error 201: Error in XMLValidator Warning: Failed to decode XMP metadata. """ + stderr_exception(filenames[6])) stderr.append("""Warning: Failed to decode Exif metadata. diff --git a/tests/bugfixes/github/test_issue_851.py b/tests/bugfixes/github/test_issue_851.py index ca575e5e40..0549d29927 100644 --- a/tests/bugfixes/github/test_issue_851.py +++ b/tests/bugfixes/github/test_issue_851.py @@ -24,8 +24,8 @@ class DenialOfServiceInAdjustTimeOverflow(metaclass=CaseMeta): Image size : 0 x 0 """ ] - stderr = [ - """Warning: Failed to convert Xmp.xmp.CreateDate to Exif.Photo.DateTimeDigitized (Day is out of range) + stderr = ["""Error: XMP Toolkit error 201: Error in XMLValidator +Warning: Failed to decode XMP metadata. $filename: No Exif data found in the file """] retval = [253] diff --git a/tests/bugfixes/github/test_issue_ghsa_8949_hhfh_j7rj.py b/tests/bugfixes/github/test_issue_ghsa_8949_hhfh_j7rj.py index 44f6a906cb..0b1dc6c48b 100644 --- a/tests/bugfixes/github/test_issue_ghsa_8949_hhfh_j7rj.py +++ b/tests/bugfixes/github/test_issue_ghsa_8949_hhfh_j7rj.py @@ -15,7 +15,7 @@ class Jp2ImageEncodeJp2HeaderOutOfBoundsRead(metaclass=CaseMeta): commands = ["$exiv2 in $filename1"] stdout = [""] stderr = [ -"""Error: XMP Toolkit error 201: XML parsing failure +"""Error: XMP Toolkit error 201: Error in XMLValidator Warning: Failed to decode XMP metadata. """] retval = [0] diff --git a/tests/bugfixes/github/test_issue_ghsa_v5g7_46xf_h728.py b/tests/bugfixes/github/test_issue_ghsa_v5g7_46xf_h728.py index 5f3424f350..b65a58e347 100644 --- a/tests/bugfixes/github/test_issue_ghsa_v5g7_46xf_h728.py +++ b/tests/bugfixes/github/test_issue_ghsa_v5g7_46xf_h728.py @@ -16,6 +16,8 @@ class Jp2ImageEncodeJp2HeaderOutOfBoundsRead2(metaclass=CaseMeta): MIME type : application/rdf+xml Image size : 0 x 0 """] - stderr = ["""$filename: No Exif data found in the file + stderr = ["""Error: XMP Toolkit error 201: Error in XMLValidator +Warning: Failed to decode XMP metadata. +$filename: No Exif data found in the file """] retval = [253]