Skip to content

Commit

Permalink
feat: XML::Schema and RelaxNG creation accept optional ParseOptions
Browse files Browse the repository at this point in the history
I'm trying out a new pattern, which is that the parsed object carries
around the ParseOptions it was created with, which should make some
testing a bit easier.

I'm also not implementing the "config block" pattern in use for
Documents, because I think the UX is weird and I'm hoping to change
everything to use kwargs in a 2.0 release, anyway.
  • Loading branch information
flavorjones committed Dec 3, 2020
1 parent 025e891 commit 9c87439
Show file tree
Hide file tree
Showing 9 changed files with 182 additions and 44 deletions.
11 changes: 9 additions & 2 deletions ext/java/nokogiri/XmlRelaxng.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
import org.jruby.RubyClass;
import org.jruby.anno.JRubyClass;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.w3c.dom.Document;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
Expand All @@ -78,11 +79,17 @@ private void setVerifier(Verifier verifier) {
this.verifier = verifier;
}

static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, Source source) {
static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions) {
Ruby runtime = context.getRuntime();
XmlRelaxng xmlRelaxng = (XmlRelaxng) NokogiriService.XML_RELAXNG_ALLOCATOR.allocate(runtime, klazz);

if (parseOptions == null) {
parseOptions = defaultParseOptions(context.getRuntime());
}

xmlRelaxng.setInstanceVariable("@errors", runtime.newEmptyArray());

xmlRelaxng.setInstanceVariable("@parse_options", parseOptions);

try {
Schema schema = xmlRelaxng.getSchema(source, context);
xmlRelaxng.setVerifier(schema.newVerifier());
Expand Down
47 changes: 34 additions & 13 deletions ext/java/nokogiri/XmlSchema.java
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,16 @@ private void setValidator(Validator validator) {
this.validator = validator;
}

static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, Source source) {
static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions) {
Ruby runtime = context.getRuntime();
XmlSchema xmlSchema = (XmlSchema) NokogiriService.XML_SCHEMA_ALLOCATOR.allocate(runtime, klazz);

if (parseOptions == null) {
parseOptions = defaultParseOptions(context.getRuntime());
}

xmlSchema.setInstanceVariable("@errors", runtime.newEmptyArray());
xmlSchema.setInstanceVariable("@parse_options", parseOptions);

try {
SchemaErrorHandler error_handler = new SchemaErrorHandler(context.getRuntime(), (RubyArray)xmlSchema.getInstanceVariable("@errors"));
Expand All @@ -121,14 +127,24 @@ static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, So
}
}

protected static IRubyObject defaultParseOptions(Ruby runtime) {
return ((RubyClass)runtime.getClassFromPath("Nokogiri::XML::ParseOptions")).getConstant("DEFAULT_SCHEMA");
}

/*
* call-seq:
* from_document(doc)
*
* Create a new Schema from the Nokogiri::XML::Document +doc+
*/
@JRubyMethod(meta=true)
public static IRubyObject from_document(ThreadContext context, IRubyObject klazz, IRubyObject document) {
@JRubyMethod(meta=true, required=1, optional=1)
public static IRubyObject from_document(ThreadContext context, IRubyObject klazz, IRubyObject[] args) {
IRubyObject document = args[0];
IRubyObject parseOptions = null;
if (args.length > 1) {
parseOptions = args[1];
}

XmlDocument doc = ((XmlDocument) ((XmlNode) document).document(context));

RubyArray errors = (RubyArray) doc.getInstanceVariable("@errors");
Expand All @@ -144,25 +160,30 @@ public static IRubyObject from_document(ThreadContext context, IRubyObject klazz
source.setSystemId(uri.convertToString().asJavaString());
}

return getSchema(context, (RubyClass)klazz, source);
return getSchema(context, (RubyClass)klazz, source, parseOptions);
}

private static IRubyObject getSchema(ThreadContext context, RubyClass klazz, Source source) {
@JRubyMethod(meta=true, required=1, optional=1)
public static IRubyObject read_memory(ThreadContext context, IRubyObject klazz, IRubyObject[] args) {
IRubyObject content = args[0];
IRubyObject parseOptions = null;
if (args.length > 1) {
parseOptions = args[1];
}
String data = content.convertToString().asJavaString();
return getSchema(context, (RubyClass) klazz, new StreamSource(new StringReader(data)), parseOptions);
}

private static IRubyObject getSchema(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions) {
String moduleName = klazz.getName();
if ("Nokogiri::XML::Schema".equals(moduleName)) {
return XmlSchema.createSchemaInstance(context, klazz, source);
return XmlSchema.createSchemaInstance(context, klazz, source, parseOptions);
} else if ("Nokogiri::XML::RelaxNG".equals(moduleName)) {
return XmlRelaxng.createSchemaInstance(context, klazz, source);
return XmlRelaxng.createSchemaInstance(context, klazz, source, parseOptions);
}
return context.getRuntime().getNil();
}

@JRubyMethod(meta=true)
public static IRubyObject read_memory(ThreadContext context, IRubyObject klazz, IRubyObject content) {
String data = content.convertToString().asJavaString();
return getSchema(context, (RubyClass) klazz, new StreamSource(new StringReader(data)));
}

@JRubyMethod(visibility=Visibility.PRIVATE)
public IRubyObject validate_document(ThreadContext context, IRubyObject document) {
return validate_document_or_file(context, (XmlDocument)document);
Expand Down
39 changes: 28 additions & 11 deletions ext/nokogiri/xml_relax_ng.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,24 @@ static VALUE validate_document(VALUE self, VALUE document)
*
* Create a new RelaxNG from the contents of +string+
*/
static VALUE read_memory(VALUE klass, VALUE content)
static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
{
xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt(
(const char *)StringValuePtr(content),
(int)RSTRING_LEN(content)
);
VALUE content;
VALUE parse_options;
xmlRelaxNGParserCtxtPtr ctx;
xmlRelaxNGPtr schema;
VALUE errors = rb_ary_new();
VALUE errors;
VALUE rb_schema;
int scanned_args = 0;

scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
if (scanned_args == 1) {
parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
}

ctx = xmlRelaxNGNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));

errors = rb_ary_new();
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);

#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
Expand Down Expand Up @@ -90,6 +98,7 @@ static VALUE read_memory(VALUE klass, VALUE content)

rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
rb_iv_set(rb_schema, "@errors", errors);
rb_iv_set(rb_schema, "@parse_options", parse_options);

return rb_schema;
}
Expand All @@ -100,18 +109,25 @@ static VALUE read_memory(VALUE klass, VALUE content)
*
* Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
*/
static VALUE from_document(VALUE klass, VALUE document)
static VALUE from_document(int argc, VALUE *argv, VALUE klass)
{
VALUE document;
VALUE parse_options;
xmlDocPtr doc;
xmlRelaxNGParserCtxtPtr ctx;
xmlRelaxNGPtr schema;
VALUE errors;
VALUE rb_schema;
int scanned_args = 0;

scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);

Data_Get_Struct(document, xmlDoc, doc);
doc = doc->doc; /* In case someone passes us a node. ugh. */

/* In case someone passes us a node. ugh. */
doc = doc->doc;
if (scanned_args == 1) {
parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
}

ctx = xmlRelaxNGNewDocParserCtxt(doc);

Expand Down Expand Up @@ -143,6 +159,7 @@ static VALUE from_document(VALUE klass, VALUE document)

rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
rb_iv_set(rb_schema, "@errors", errors);
rb_iv_set(rb_schema, "@parse_options", parse_options);

return rb_schema;
}
Expand All @@ -156,7 +173,7 @@ void init_xml_relax_ng()

cNokogiriXmlRelaxNG = klass;

rb_define_singleton_method(klass, "read_memory", read_memory, 1);
rb_define_singleton_method(klass, "from_document", from_document, 1);
rb_define_singleton_method(klass, "read_memory", read_memory, -1);
rb_define_singleton_method(klass, "from_document", from_document, -1);
rb_define_private_method(klass, "validate_document", validate_document, 1);
}
46 changes: 34 additions & 12 deletions ext/nokogiri/xml_schema.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,23 +93,34 @@ static VALUE validate_file(VALUE self, VALUE rb_filename)
*
* Create a new Schema from the contents of +string+
*/
static VALUE read_memory(VALUE klass, VALUE content)
static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
{
VALUE content;
VALUE parse_options;
int parse_options_int;
xmlSchemaParserCtxtPtr ctx;
xmlSchemaPtr schema;
xmlSchemaParserCtxtPtr ctx = xmlSchemaNewMemParserCtxt(
(const char *)StringValuePtr(content),
(int)RSTRING_LEN(content)
);
VALUE errors;
VALUE rb_schema;
VALUE errors = rb_ary_new();
int scanned_args = 0;

scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
if (scanned_args == 1) {
parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
}
parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));

ctx = xmlSchemaNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));

errors = rb_ary_new();
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);

#ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
xmlSchemaSetParserStructuredErrors(
ctx,
Nokogiri_error_array_pusher,
(void *)errors
);
);
#endif

schema = xmlSchemaParse(ctx);
Expand All @@ -129,6 +140,7 @@ static VALUE read_memory(VALUE klass, VALUE content)

rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
rb_iv_set(rb_schema, "@errors", errors);
rb_iv_set(rb_schema, "@parse_options", parse_options);

return rb_schema;
}
Expand Down Expand Up @@ -164,18 +176,27 @@ static int has_blank_nodes_p(VALUE cache)
*
* Create a new Schema from the Nokogiri::XML::Document +doc+
*/
static VALUE from_document(VALUE klass, VALUE document)
static VALUE from_document(int argc, VALUE *argv, VALUE klass)
{
VALUE document;
VALUE parse_options;
int parse_options_int;
xmlDocPtr doc;
xmlSchemaParserCtxtPtr ctx;
xmlSchemaPtr schema;
VALUE errors;
VALUE rb_schema;
int scanned_args = 0;

scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);

Data_Get_Struct(document, xmlDoc, doc);
doc = doc->doc; /* In case someone passes us a node. ugh. */

/* In case someone passes us a node. ugh. */
doc = doc->doc;
if (scanned_args == 1) {
parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
}
parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));

if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) {
rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous");
Expand Down Expand Up @@ -211,6 +232,7 @@ static VALUE from_document(VALUE klass, VALUE document)

rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
rb_iv_set(rb_schema, "@errors", errors);
rb_iv_set(rb_schema, "@parse_options", parse_options);

return rb_schema;

Expand All @@ -226,8 +248,8 @@ void init_xml_schema()

cNokogiriXmlSchema = klass;

rb_define_singleton_method(klass, "read_memory", read_memory, 1);
rb_define_singleton_method(klass, "from_document", from_document, 1);
rb_define_singleton_method(klass, "read_memory", read_memory, -1);
rb_define_singleton_method(klass, "from_document", from_document, -1);

rb_define_private_method(klass, "validate_document", validate_document, 1);
rb_define_private_method(klass, "validate_file", validate_file, 1);
Expand Down
2 changes: 2 additions & 0 deletions lib/nokogiri/xml/parse_options.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ class ParseOptions
DEFAULT_XML = RECOVER | NONET
# the default options used for parsing HTML documents
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
# the default options used for parsing XML schemas
DEFAULT_SCHEMA = NONET

attr_accessor :options
def initialize options = STRICT
Expand Down
4 changes: 2 additions & 2 deletions lib/nokogiri/xml/relax_ng.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ class << self
###
# Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
# See Nokogiri::XML::RelaxNG for an example.
def RelaxNG string_or_io
RelaxNG.new(string_or_io)
def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
RelaxNG.new(string_or_io, options)
end
end

Expand Down
10 changes: 6 additions & 4 deletions lib/nokogiri/xml/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ class << self
###
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
# object.
def Schema string_or_io
Schema.new(string_or_io)
def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
Schema.new(string_or_io, options)
end
end

Expand All @@ -30,12 +30,14 @@ def Schema string_or_io
class Schema
# Errors while parsing the schema file
attr_accessor :errors
# The Nokogiri::XML::ParseOptions used to parse the schema
attr_accessor :parse_options

###
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
# object.
def self.new string_or_io
from_document Nokogiri::XML(string_or_io)
def self.new string_or_io, options = ParseOptions::DEFAULT_SCHEMA
from_document(Nokogiri::XML(string_or_io), options)
end

###
Expand Down
34 changes: 34 additions & 0 deletions test/xml/test_relax_ng.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,40 @@ def test_parse_with_io
assert_equal 0, xsd.errors.length
end

def test_constructor_method_with_parse_options
schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE))
assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options

schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE), Nokogiri::XML::ParseOptions.new.recover)
assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
end

def test_new_with_parse_options
schema = Nokogiri::XML::RelaxNG.new(File.read(ADDRESS_SCHEMA_FILE))
assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options

schema = Nokogiri::XML::RelaxNG.new(File.read(ADDRESS_SCHEMA_FILE), Nokogiri::XML::ParseOptions.new.recover)
assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
end

def test_from_document_with_parse_options
schema = Nokogiri::XML::RelaxNG.from_document(Nokogiri::XML::Document.parse(File.read(ADDRESS_SCHEMA_FILE)))
assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options

schema = Nokogiri::XML::RelaxNG.from_document(Nokogiri::XML::Document.parse(File.read(ADDRESS_SCHEMA_FILE)),
Nokogiri::XML::ParseOptions.new.recover)
assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
end

def test_read_memory_with_parse_options
schema = Nokogiri::XML::RelaxNG.read_memory(File.read(ADDRESS_SCHEMA_FILE))
assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options

schema = Nokogiri::XML::RelaxNG.read_memory(File.read(ADDRESS_SCHEMA_FILE),
Nokogiri::XML::ParseOptions.new.recover)
assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options
end

def test_parse_with_errors
xml = File.read(ADDRESS_SCHEMA_FILE).sub(/name="/, 'name=')
assert_raises(Nokogiri::XML::SyntaxError) {
Expand Down
Loading

0 comments on commit 9c87439

Please sign in to comment.