Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add some XML validation to avoid xmpsdk bugs (backport #1878) #1880

Merged
merged 5 commits into from
Aug 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,15 @@ target_include_directories(exiv2lib SYSTEM PRIVATE
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/xmpsdk/include>
)

if (EXIV2_ENABLE_XMP OR EXIV2_ENABLE_EXTERNAL_XMP)
target_include_directories(exiv2lib PRIVATE ${EXPAT_INCLUDE_DIR})
target_link_libraries(exiv2lib PRIVATE ${EXPAT_LIBRARIES})

if (WIN32)
target_compile_definitions(exiv2lib PRIVATE XML_STATIC)
endif()
endif()

if (EXIV2_ENABLE_XMP)
target_link_libraries(exiv2lib PRIVATE exiv2-xmp)
elseif(EXIV2_ENABLE_EXTERNAL_XMP)
Expand Down
169 changes: 169 additions & 0 deletions src/xmp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <algorithm>
#include <cassert>
#include <string>
#include <expat.h>

// Adobe XMP Toolkit
#ifdef EXV_HAVE_XMP_TOOLKIT
Expand All @@ -42,6 +43,173 @@
# include <XMP.incl_cpp>
#endif // EXV_HAVE_XMP_TOOLKIT

#ifdef EXV_HAVE_XMP_TOOLKIT
// This anonymous namespace contains a class named XMLValidator, which uses
// libexpat to do a basic validation check on an XML document. This is to
// reduce the chance of hitting a bug in the (third-party) xmpsdk
// library. For example, it is easy to a trigger a stack overflow in xmpsdk
// with a deeply nested tree.
namespace {
using namespace Exiv2;

class XMLValidator {
size_t element_depth_;
size_t namespace_depth_;

// These fields are used to record whether an error occurred during
// parsing. Why do we need to store the error for later, rather
// than throw an exception immediately? Because expat is a C
// library, so it isn't designed to be able to handle exceptions
// thrown by the callback functions. Throwing exceptions during
// parsing is an example of one of the things that xmpsdk does
// wrong, leading to problems like https://github.com/Exiv2/exiv2/issues/1821.
bool haserror_;
std::string errmsg_;
XML_Size errlinenum_;
XML_Size errcolnum_;

// Very deeply nested XML trees can cause a stack overflow in
// xmpsdk. They are also very unlikely to be valid XMP, so we
// error out if the depth exceeds this limit.
static const size_t max_recursion_limit_ = 1000;

const XML_Parser parser_;

public:
// Runs an XML parser on `buf`. Throws an exception if the XML is invalid.
static void check(const char* buf, size_t buflen) {
XMLValidator validator;
validator.check_internal(buf, buflen);
}

private:
// Private constructor, because this class is only constructed by
// the (static) check method.
XMLValidator() :
element_depth_(0), namespace_depth_(0), haserror_(false),
errlinenum_(0), errcolnum_(0),
parser_(XML_ParserCreateNS(0, '@'))
{
if (!parser_) {
throw Error(kerXMPToolkitError, "Could not create expat parser");
}
}

~XMLValidator() {
XML_ParserFree(parser_);
}

void setError(const char* msg) {
const XML_Size errlinenum = XML_GetCurrentLineNumber(parser_);
const XML_Size errcolnum = XML_GetCurrentColumnNumber(parser_);
#ifndef SUPPRESS_WARNINGS
EXV_INFO << "Invalid XML at line " << errlinenum
<< ", column " << errcolnum
<< ": " << msg << "\n";
#endif
// If this is the first error, then save it.
if (!haserror_) {
haserror_ = true;
errmsg_ = msg;
errlinenum_ = errlinenum;
errcolnum_ = errcolnum;
}
}

void check_internal(const char* buf, size_t buflen) {
if (buflen > static_cast<size_t>(std::numeric_limits<int>::max())) {
throw Error(kerXMPToolkitError, "Buffer length is greater than INT_MAX");
}

XML_SetUserData(parser_, this);
XML_SetElementHandler(parser_, startElement_cb, endElement_cb);
XML_SetNamespaceDeclHandler(parser_, startNamespace_cb, endNamespace_cb);
XML_SetStartDoctypeDeclHandler(parser_, startDTD_cb);

const XML_Status result = XML_Parse(parser_, buf, static_cast<int>(buflen), true);
if (result == XML_STATUS_ERROR) {
setError(XML_ErrorString(XML_GetErrorCode(parser_)));
}

if (haserror_) {
throw XMP_Error(kXMPErr_BadXML, "Error in XMLValidator");
}
}

void startElement(const XML_Char*, const XML_Char**) {
if (element_depth_ > max_recursion_limit_) {
setError("Too deeply nested");
}
++element_depth_;
}

void endElement(const XML_Char*) {
if (element_depth_ > 0) {
--element_depth_;
} else {
setError("Negative depth");
}
}

void startNamespace(const XML_Char*, const XML_Char*) {
if (namespace_depth_ > max_recursion_limit_) {
setError("Too deeply nested");
}
++namespace_depth_;
}

void endNamespace(const XML_Char*) {
if (namespace_depth_ > 0) {
--namespace_depth_;
} else {
setError("Negative depth");
}
}

void startDTD(const XML_Char*, const XML_Char*, const XML_Char*, int) {
// DOCTYPE is used for XXE attacks.
setError("DOCTYPE not supported");
}

// This callback function is called by libexpat. It's a static wrapper
// around startElement().
static void XMLCALL startElement_cb(
void* userData, const XML_Char* name, const XML_Char* *attrs
) {
static_cast<XMLValidator*>(userData)->startElement(name, attrs);
}

// This callback function is called by libexpat. It's a static wrapper
// around endElement().
static void XMLCALL endElement_cb(void* userData, const XML_Char* name) {
static_cast<XMLValidator*>(userData)->endElement(name);
}

// This callback function is called by libexpat. It's a static wrapper
// around startNamespace().
static void XMLCALL startNamespace_cb(
void* userData, const XML_Char* prefix, const XML_Char* uri
) {
static_cast<XMLValidator*>(userData)->startNamespace(prefix, uri);
}

// This callback function is called by libexpat. It's a static wrapper
// around endNamespace().
static void XMLCALL endNamespace_cb(void* userData, const XML_Char* prefix) {
static_cast<XMLValidator*>(userData)->endNamespace(prefix);
}

static void XMLCALL startDTD_cb(
void *userData, const XML_Char *doctypeName, const XML_Char *sysid,
const XML_Char *pubid, int has_internal_subset
) {
static_cast<XMLValidator*>(userData)->startDTD(
doctypeName, sysid, pubid, has_internal_subset);
}
};
} // namespace
#endif // EXV_HAVE_XMP_TOOLKIT

// *****************************************************************************
// local declarations
namespace {
Expand Down Expand Up @@ -597,6 +765,7 @@ namespace Exiv2 {
return 2;
}

XMLValidator::check(xmpPacket.data(), xmpPacket.size());
SXMPMeta meta(xmpPacket.data(), static_cast<XMP_StringLen>(xmpPacket.size()));
SXMPIterator iter(meta);
std::string schemaNs, propPath, propValue;
Expand Down
9 changes: 9 additions & 0 deletions test/data/coverage_xmp_doctype.exv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
<!DOCTYPE foo [
<!ELEMENT foo ANY >
<!ENTITY xxe SYSTEM "file:///dev/random" >]>
<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="XMP Core 4.4.0-Exiv2">
<foo>
</foo>
</x:xmpmeta>
<?xpacket end="w"?>
2 changes: 1 addition & 1 deletion tests/bugfixes/github/test_CVE_2017_14857.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class TestCvePoC(metaclass=system_tests.CaseMeta):
Error: Offset of directory Image, entry 0x0132 is out of bounds: Offset = 0x30003030; truncating the entry
Error: Directory Image, entry 0x8649 has invalid size 4294967295*1; skipping entry.
Error: Directory Image, entry 0x8769 Sub-IFD pointer 0 is out of bounds; ignoring it.
Error: XMP Toolkit error 201: XML parsing failure
Error: XMP Toolkit error 201: Error in XMLValidator
Warning: Failed to decode XMP metadata.
"""
]
2 changes: 1 addition & 1 deletion tests/bugfixes/github/test_CVE_2017_14858.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class TestCvePoC(metaclass=system_tests.CaseMeta):
Warning: Directory Image, entry 0x0111: Strip 17 is outside of the data area; ignored.
Error: Directory Photo with 8224 entries considered invalid; not read.
Warning: Removing 913 characters from the beginning of the XMP packet
Error: XMP Toolkit error 201: XML parsing failure
Error: XMP Toolkit error 201: Error in XMLValidator
Warning: Failed to decode XMP metadata.
"""
]
Expand Down
20 changes: 20 additions & 0 deletions tests/bugfixes/github/test_coverage_xmp_doctype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-

from system_tests import CaseMeta, path, check_no_ASAN_UBSAN_errors

class coverage_xmp_doctype(metaclass=CaseMeta):
"""
Test added to improve code coverage in xmpsidecar.cpp after
Codecov complained about a lack of code coverage in this PR:
https://github.com/Exiv2/exiv2/pull/1878
"""

filename = path("$data_path/coverage_xmp_doctype.exv")
commands = ["$exiv2 $filename"]
stderr = ["""Error: XMP Toolkit error 201: Error in XMLValidator
Warning: Failed to decode XMP metadata.
$filename: No Exif data found in the file
"""]
retval = [253]

compare_stdout = check_no_ASAN_UBSAN_errors
7 changes: 3 additions & 4 deletions tests/bugfixes/github/test_issue_1713.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,11 @@ class InvalidDateXMP(metaclass=CaseMeta):
commands = ["$exiv2 -Ph $filename"]

stderr = [
"""Warning: Failed to convert Xmp.xmp.CreateDate to Exif.Photo.DateTimeDigitized (Day is out of range)
Exiv2 exception in print action for file $filename:
Xmpdatum::copy: Not supported
"""Error: XMP Toolkit error 201: Error in XMLValidator
Warning: Failed to decode XMP metadata.
"""
]
retval = [1]
retval = [0]

def compare_stdout(self, i, command, got_stdout, expected_stdout):
""" We don't care about the stdout, just don't crash """
Expand Down
26 changes: 3 additions & 23 deletions tests/bugfixes/github/test_issue_1819.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,27 +15,7 @@ class EmptyStringXmpTextValueRead(metaclass=CaseMeta):
File size : 1088 Bytes
MIME type : application/rdf+xml
Image size : 0 x 0
Thumbnail : None
Camera make :
Camera model :
Image timestamp :
File number :
Exposure time :
Aperture :
Exposure bias :
Flash :
Flash bias :
Focal length :
Subject distance:
ISO speed :
Exposure mode :
Metering mode :
Macro mode :
Image quality :
White balance :
Copyright :
Exif comment :
"""]
stderr = [""]
retval = [0]
stderr = ["""$filename: No Exif data found in the file
"""]
retval = [253]
2 changes: 1 addition & 1 deletion tests/bugfixes/github/test_issue_428.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class PngReadRawProfile(metaclass=system_tests.CaseMeta):
stderr.append("""$exiv2_exception_message """ + filenames[5] + """:
$kerInputDataReadFailed
""")
stderr.append("""Error: XMP Toolkit error 201: XML parsing failure
stderr.append("""Error: XMP Toolkit error 201: Error in XMLValidator
Warning: Failed to decode XMP metadata.
""" + stderr_exception(filenames[6]))
stderr.append("""Warning: Failed to decode Exif metadata.
Expand Down
4 changes: 2 additions & 2 deletions tests/bugfixes/github/test_issue_851.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ class DenialOfServiceInAdjustTimeOverflow(metaclass=CaseMeta):
Image size : 0 x 0
"""
]
stderr = [
"""Warning: Failed to convert Xmp.xmp.CreateDate to Exif.Photo.DateTimeDigitized (Day is out of range)
stderr = ["""Error: XMP Toolkit error 201: Error in XMLValidator
Warning: Failed to decode XMP metadata.
$filename: No Exif data found in the file
"""]
retval = [253]
2 changes: 1 addition & 1 deletion tests/bugfixes/github/test_issue_ghsa_8949_hhfh_j7rj.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class Jp2ImageEncodeJp2HeaderOutOfBoundsRead(metaclass=CaseMeta):
commands = ["$exiv2 in $filename1"]
stdout = [""]
stderr = [
"""Error: XMP Toolkit error 201: XML parsing failure
"""Error: XMP Toolkit error 201: Error in XMLValidator
Warning: Failed to decode XMP metadata.
"""]
retval = [0]
4 changes: 3 additions & 1 deletion tests/bugfixes/github/test_issue_ghsa_v5g7_46xf_h728.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ class Jp2ImageEncodeJp2HeaderOutOfBoundsRead2(metaclass=CaseMeta):
MIME type : application/rdf+xml
Image size : 0 x 0
"""]
stderr = ["""$filename: No Exif data found in the file
stderr = ["""Error: XMP Toolkit error 201: Error in XMLValidator
Warning: Failed to decode XMP metadata.
$filename: No Exif data found in the file
"""]
retval = [253]