Skip to content

Commit

Permalink
Merge pull request #2174 from sparklemotion/flavorjones-fix-java-deco…
Browse files Browse the repository at this point in the history
…rators

fix: Document#initialize should be called exactly once

---

**What problem is this PR intended to solve?**

Originally this PR was started to address errors in Loofah's test suite on JRuby (see flavorjones/loofah#88) related to Nokogiri object decorators not being applied correctly. The root cause of these errors was that `{XML,HTML}Document#initialize` was not being called in the JRuby implementation. Surprising! And breaks the subclassing behavior that Loofah relies on.

As I erected tests in Nokogiri's suite to make this failure obvious, I uncovered the fact that in CRuby, `Document#initialize` was actually being called twice from the `.parse` method. Even more surprising! But doesn't obviously break anything.

This PR addresses both of these issues, with the result that `Document#initialize` is called exactly once on all platforms.


**Have you included adequate test coverage?**

Yes! Thorough testing is introduced around subclassing `XML::Document`, `HTML::Document`, `XML::DocumentFragment`, and `HTML::DocumentFragment` constructor calls `.new` and `.parse`.


**Does this change affect the behavior of either the C or the Java implementations?**

Yes, but as noted above the changed behavior is now correct and consistent across the platforms.
  • Loading branch information
flavorjones authored Jan 17, 2021
2 parents de426b4 + 30c3a57 commit d79c2d0
Show file tree
Hide file tree
Showing 11 changed files with 2,277 additions and 2,087 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA

---

## next / unreleased

### Fixed

* [CRuby] `{XML,HTML}::Document.parse` now invokes `#initialize` exactly once. Previously `#initialize` was invoked twice on each object.
* [JRuby] `{XML,HTML}::Document.parse` now invokes `#initialize` exactly once. Previously `#initialize` was not called, which was a problem for subclassing such as done by `Loofah`.


## v1.11.1 / 2021-01-06

### Fixed
Expand Down
3 changes: 3 additions & 0 deletions ext/java/nokogiri/internals/HtmlDomParserContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import org.jruby.Ruby;
import org.jruby.RubyClass;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.Helpers;
import org.jruby.runtime.builtin.IRubyObject;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
Expand Down Expand Up @@ -133,6 +134,8 @@ public XmlDocument parse(ThreadContext context, RubyClass klass, IRubyObject url
protected XmlDocument wrapDocument(ThreadContext context, RubyClass klass, Document document) {
HtmlDocument htmlDocument = new HtmlDocument(context.runtime, klass, document);
htmlDocument.setDocumentNode(context.runtime, document);
Helpers.invoke(context, htmlDocument, "initialize");

if (ruby_encoding.isNil()) {
// ruby_encoding might have detected by HtmlDocument::EncodingReader
if (detected_encoding != null && !detected_encoding.isNil()) {
Expand Down
2 changes: 2 additions & 0 deletions ext/java/nokogiri/internals/XmlDomParserContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.jruby.RubyFixnum;
import org.jruby.exceptions.RaiseException;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.Helpers;
import org.jruby.runtime.builtin.IRubyObject;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
Expand Down Expand Up @@ -213,6 +214,7 @@ private XmlDocument getInterruptedOrNewXmlDocument(ThreadContext context, RubyCl
*/
protected XmlDocument wrapDocument(ThreadContext context, RubyClass klass, Document doc) {
XmlDocument xmlDocument = new XmlDocument(context.runtime, klass, doc);
Helpers.invoke(context, xmlDocument, "initialize");
xmlDocument.setEncoding(ruby_encoding);

if (options.dtdLoad) {
Expand Down
7 changes: 3 additions & 4 deletions ext/nokogiri/html_document.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ rb_html_document_s_new(int argc, VALUE *argv, VALUE klass)
RTEST(uri) ? (const xmlChar *)StringValueCStr(uri) : NULL,
RTEST(external_id) ? (const xmlChar *)StringValueCStr(external_id) : NULL
);
rb_doc = Nokogiri_wrap_xml_document(klass, doc);
rb_obj_call_init(rb_doc, argc, argv);
rb_doc = nokogiri_xml_document_wrap_with_init_args(klass, doc, argc, argv);
return rb_doc ;
}

Expand Down Expand Up @@ -81,7 +80,7 @@ rb_html_document_s_read_io(VALUE klass, VALUE rb_io, VALUE rb_url, VALUE rb_enco
return Qnil;
}

rb_doc = Nokogiri_wrap_xml_document(klass, c_doc);
rb_doc = nokogiri_xml_document_wrap(klass, c_doc);
rb_iv_set(rb_doc, "@errors", rb_error_list);
return rb_doc;
}
Expand Down Expand Up @@ -129,7 +128,7 @@ rb_html_document_s_read_memory(VALUE klass, VALUE rb_html, VALUE rb_url, VALUE r
return Qnil;
}

rb_doc = Nokogiri_wrap_xml_document(klass, c_doc);
rb_doc = nokogiri_xml_document_wrap(klass, c_doc);
rb_iv_set(rb_doc, "@errors", rb_error_list);
return rb_doc;
}
Expand Down
73 changes: 38 additions & 35 deletions ext/nokogiri/xml_document.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ static VALUE read_io( VALUE klass,
return Qnil;
}

document = Nokogiri_wrap_xml_document(klass, doc);
document = nokogiri_xml_document_wrap(klass, doc);
rb_iv_set(document, "@errors", error_list);
return document;
}
Expand Down Expand Up @@ -322,7 +322,7 @@ static VALUE read_memory( VALUE klass,
return Qnil;
}

document = Nokogiri_wrap_xml_document(klass, doc);
document = nokogiri_xml_document_wrap(klass, doc);
rb_iv_set(document, "@errors", error_list);
return document;
}
Expand All @@ -339,7 +339,6 @@ static VALUE duplicate_document(int argc, VALUE *argv, VALUE self)
xmlDocPtr doc, dup;
VALUE copy;
VALUE level;
VALUE error_list;

if(rb_scan_args(argc, argv, "01", &level) == 0)
level = INT2NUM((long)1);
Expand All @@ -351,9 +350,8 @@ static VALUE duplicate_document(int argc, VALUE *argv, VALUE self)
if(dup == NULL) return Qnil;

dup->type = doc->type;
copy = Nokogiri_wrap_xml_document(rb_obj_class(self), dup);
error_list = rb_iv_get(self, "@errors");
rb_iv_set(copy, "@errors", error_list);
copy = nokogiri_xml_document_wrap(rb_obj_class(self), dup);
rb_iv_set(copy, "@errors", rb_iv_get(self, "@errors"));
return copy ;
}

Expand All @@ -373,8 +371,7 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
if (NIL_P(version)) version = rb_str_new2("1.0");

doc = xmlNewDoc((xmlChar *)StringValueCStr(version));
rb_doc = Nokogiri_wrap_xml_document(klass, doc);
rb_obj_call_init(rb_doc, argc, argv);
rb_doc = nokogiri_xml_document_wrap_with_init_args(klass, doc, argc, argv);
return rb_doc ;
}

Expand Down Expand Up @@ -564,6 +561,39 @@ static VALUE nokogiri_xml_document_canonicalize(int argc, VALUE* argv, VALUE sel
return rb_funcall(io, rb_intern("string"), 0);
}

VALUE nokogiri_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv)
{
nokogiriTuplePtr tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple));

VALUE rb_doc = Data_Wrap_Struct(
klass ? klass : cNokogiriXmlDocument,
mark,
dealloc,
doc
);

VALUE cache = rb_ary_new();
rb_iv_set(rb_doc, "@decorators", Qnil);
rb_iv_set(rb_doc, "@errors", Qnil);
rb_iv_set(rb_doc, "@node_cache", cache);

tuple->doc = rb_doc;
tuple->unlinkedNodes = st_init_numtable_with_size(128);
tuple->node_cache = cache;
doc->_private = tuple ;

rb_obj_call_init(rb_doc, argc, argv);

return rb_doc ;
}


VALUE nokogiri_xml_document_wrap(VALUE klass, xmlDocPtr doc)
{
return nokogiri_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
}


VALUE cNokogiriXmlDocument ;
void init_xml_document()
{
Expand Down Expand Up @@ -593,30 +623,3 @@ void init_xml_document()
rb_define_method(klass, "create_entity", create_entity, -1);
rb_define_method(klass, "remove_namespaces!", remove_namespaces_bang, 0);
}


/* this takes klass as a param because it's used for HtmlDocument, too. */
VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
{
nokogiriTuplePtr tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple));

VALUE rb_doc = Data_Wrap_Struct(
klass ? klass : cNokogiriXmlDocument,
mark,
dealloc,
doc
);

VALUE cache = rb_ary_new();
rb_iv_set(rb_doc, "@decorators", Qnil);
rb_iv_set(rb_doc, "@node_cache", cache);

tuple->doc = rb_doc;
tuple->unlinkedNodes = st_init_numtable_with_size(128);
tuple->node_cache = cache;
doc->_private = tuple ;

rb_obj_call_init(rb_doc, 0, NULL);

return rb_doc ;
}
3 changes: 2 additions & 1 deletion ext/nokogiri/xml_document.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ typedef struct _nokogiriTuple nokogiriTuple;
typedef nokogiriTuple * nokogiriTuplePtr;

void init_xml_document();
VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc);
VALUE nokogiri_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv);
VALUE nokogiri_xml_document_wrap(VALUE klass, xmlDocPtr doc);

#define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
#define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
Expand Down
2 changes: 1 addition & 1 deletion ext/nokogiri/xslt_stylesheet.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ static VALUE transform(int argc, VALUE* argv, VALUE self)
rb_exc_raise(exception);
}

return Nokogiri_wrap_xml_document((VALUE)0, result) ;
return nokogiri_xml_document_wrap((VALUE)0, result) ;
}

static void method_caller(xmlXPathParserContextPtr ctxt, int nargs)
Expand Down
Loading

0 comments on commit d79c2d0

Please sign in to comment.