Skip to content

Commit

Permalink
Work around a bug in libxml2
Browse files Browse the repository at this point in the history
This commit works around a bug in libxml2 where parsing schemas can
result in dangling pointers which can lead to a segv.

Upstream bug is here:  https://gitlab.gnome.org/GNOME/libxml2/issues/148

Fixes #1985
  • Loading branch information
tenderlove committed Feb 26, 2020
1 parent d852d97 commit 74abb4f
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 0 deletions.
29 changes: 29 additions & 0 deletions ext/nokogiri/xml_schema.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,31 @@ static VALUE read_memory(VALUE klass, VALUE content)
return rb_schema;
}

/* Schema creation will remove and deallocate "blank" nodes.
* If those blank nodes have been exposed to Ruby, they could get freed
* out from under the VALUE pointer. This function checks to see if any of
* those nodes have been exposed to Ruby, and if so we should raise an exception.
*/
static int has_blank_nodes_p(VALUE cache)
{
long i;

if (NIL_P(cache)) {
return 0;
}

for (i = 0; i < RARRAY_LEN(cache); i++) {
xmlNodePtr node;
VALUE element = rb_ary_entry(cache, i);
Data_Get_Struct(element, xmlNode, node);
if (xmlIsBlankNode(node)) {
return 1;
}
}

return 0;
}

/*
* call-seq:
* from_document(doc)
Expand All @@ -152,6 +177,10 @@ static VALUE from_document(VALUE klass, VALUE document)
/* In case someone passes us a node. ugh. */
doc = doc->doc;

if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) {
rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous");
}

ctx = xmlSchemaNewDocParserCtxt(doc);

errors = rb_ary_new();
Expand Down
28 changes: 28 additions & 0 deletions test/xml/test_schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,34 @@ def setup
assert @xsd = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE))
end

def test_segv
skip("Pure Java version shouldn't have this bug") unless Nokogiri.uses_libxml?

# This is a test for a workaround for a bug in LibXML2. The upstream
# bug is here: https://gitlab.gnome.org/GNOME/libxml2/issues/148
# Schema creation can result in dangling pointers. If no nodes have
# been exposed, then it should be fine to create a schema. If nodes
# have been exposed to Ruby, then we need to make sure they won't be
# freed out from under us.
doc = <<~doc
<?xml version="1.0" encoding="UTF-8" ?><xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="foo" type="xs:string"/></xs:schema>
doc

# This is OK, no nodes have been exposed
xsd_doc = Nokogiri::XML(doc)
assert Nokogiri::XML::Schema.from_document(xsd_doc)

# This is not OK, nodes have been exposed to Ruby
xsd_doc = Nokogiri::XML(doc)
node = xsd_doc.root.children.find(&:blank?) # Finds a node

ex = assert_raise(ArgumentError) do
Nokogiri::XML::Schema.from_document(xsd_doc)
end
assert_match(/blank nodes/, ex.message)
end

def test_schema_from_document
doc = Nokogiri::XML(File.open(PO_SCHEMA_FILE))
assert doc
Expand Down

0 comments on commit 74abb4f

Please sign in to comment.