Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Work around a bug in libxml2 #2001

Merged
merged 1 commit into from
Mar 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions ext/nokogiri/xml_schema.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,31 @@ static VALUE read_memory(VALUE klass, VALUE content)
return rb_schema;
}

/* Schema creation will remove and deallocate "blank" nodes.
* If those blank nodes have been exposed to Ruby, they could get freed
* out from under the VALUE pointer. This function checks to see if any of
* those nodes have been exposed to Ruby, and if so we should raise an exception.
*/
static int has_blank_nodes_p(VALUE cache)
{
long i;

if (NIL_P(cache)) {
return 0;
}

for (i = 0; i < RARRAY_LEN(cache); i++) {
xmlNodePtr node;
VALUE element = rb_ary_entry(cache, i);
Data_Get_Struct(element, xmlNode, node);
if (xmlIsBlankNode(node)) {
return 1;
}
}

return 0;
}

/*
* call-seq:
* from_document(doc)
Expand All @@ -152,6 +177,10 @@ static VALUE from_document(VALUE klass, VALUE document)
/* In case someone passes us a node. ugh. */
doc = doc->doc;

if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) {
rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous");
}

ctx = xmlSchemaNewDocParserCtxt(doc);

errors = rb_ary_new();
Expand Down
28 changes: 28 additions & 0 deletions test/xml/test_schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,34 @@ def setup
assert @xsd = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE))
end

def test_segv
skip("Pure Java version shouldn't have this bug") unless Nokogiri.uses_libxml?

# This is a test for a workaround for a bug in LibXML2. The upstream
# bug is here: https://gitlab.gnome.org/GNOME/libxml2/issues/148
# Schema creation can result in dangling pointers. If no nodes have
# been exposed, then it should be fine to create a schema. If nodes
# have been exposed to Ruby, then we need to make sure they won't be
# freed out from under us.
doc = <<~doc
<?xml version="1.0" encoding="UTF-8" ?><xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="foo" type="xs:string"/></xs:schema>
doc

# This is OK, no nodes have been exposed
xsd_doc = Nokogiri::XML(doc)
assert Nokogiri::XML::Schema.from_document(xsd_doc)

# This is not OK, nodes have been exposed to Ruby
xsd_doc = Nokogiri::XML(doc)
node = xsd_doc.root.children.find(&:blank?) # Finds a node

ex = assert_raise(ArgumentError) do
Nokogiri::XML::Schema.from_document(xsd_doc)
end
assert_match(/blank nodes/, ex.message)
end

def test_schema_from_document
doc = Nokogiri::XML(File.open(PO_SCHEMA_FILE))
assert doc
Expand Down