From 76017be56c89c95e504486991f34a259eae980c2 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sun, 17 Jan 2021 11:35:42 -0500 Subject: [PATCH 1/5] test: expand testing for subclassing {XML,HTML}Document{Fragment} Also update {xml,html}/test_document_fragment.rb with rubocop format and minispec "describe" wrapper. --- test/html/test_document.rb | 1414 +++++++++++----------- test/html/test_document_fragment.rb | 560 +++++---- test/xml/test_document.rb | 1733 ++++++++++++++------------- test/xml/test_document_fragment.rb | 559 +++++---- 4 files changed, 2220 insertions(+), 2046 deletions(-) diff --git a/test/html/test_document.rb b/test/html/test_document.rb index b5b594dab5..29d87cf6d9 100644 --- a/test/html/test_document.rb +++ b/test/html/test_document.rb @@ -4,802 +4,862 @@ module Nokogiri module HTML class TestDocument < Nokogiri::TestCase - let(:html) { Nokogiri::HTML.parse(File.read(HTML_FILE)) } + describe Nokogiri::HTML::Document do + let(:html) { Nokogiri::HTML.parse(File.read(HTML_FILE)) } - def test_nil_css - # Behavior is undefined but shouldn't break - assert(html.css(nil)) - assert(html.xpath(nil)) - end + def test_nil_css + # Behavior is undefined but shouldn't break + assert(html.css(nil)) + assert(html.xpath(nil)) + end - def test_does_not_fail_with_illformatted_html - doc = Nokogiri::HTML('"";'.dup.force_encoding(Encoding::BINARY)) - assert_not_nil(doc) - end + def test_does_not_fail_with_illformatted_html + doc = Nokogiri::HTML('"";'.dup.force_encoding(Encoding::BINARY)) + assert_not_nil(doc) + end - def test_exceptions_remove_newlines - errors = html.errors - assert(errors.length > 0, "has errors") - errors.each do |error| - assert_equal(error.to_s.chomp, error.to_s) + def test_exceptions_remove_newlines + errors = html.errors + assert(errors.length > 0, "has errors") + errors.each do |error| + assert_equal(error.to_s.chomp, error.to_s) + end end - end - def test_fragment - fragment = html.fragment - assert_equal(0, fragment.children.length) - end + def test_fragment + fragment = html.fragment + assert_equal(0, fragment.children.length) + end - def test_document_takes_config_block - options = nil - Nokogiri::HTML(File.read(HTML_FILE), HTML_FILE) do |cfg| - options = cfg - options.nonet.nowarning.dtdattr + def test_document_takes_config_block + options = nil + Nokogiri::HTML(File.read(HTML_FILE), HTML_FILE) do |cfg| + options = cfg + options.nonet.nowarning.dtdattr + end + assert(options.nonet?) + assert(options.nowarning?) + assert(options.dtdattr?) end - assert(options.nonet?) - assert(options.nowarning?) - assert(options.dtdattr?) - end - def test_parse_takes_config_block - options = nil - Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE) do |cfg| - options = cfg - options.nonet.nowarning.dtdattr + def test_parse_takes_config_block + options = nil + Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE) do |cfg| + options = cfg + options.nonet.nowarning.dtdattr + end + assert(options.nonet?) + assert(options.nowarning?) + assert(options.dtdattr?) end - assert(options.nonet?) - assert(options.nowarning?) - assert(options.dtdattr?) - end - def test_subclass - klass = Class.new(Nokogiri::HTML::Document) - doc = klass.new - assert_instance_of(klass, doc) - end + def test_subclass + klass = Class.new(Nokogiri::HTML::Document) + doc = klass.new + assert_instance_of(klass, doc) + end - def test_subclass_initialize - klass = Class.new(Nokogiri::HTML::Document) do - attr_accessor :initialized_with + def test_subclass_initialize + klass = Class.new(Nokogiri::HTML::Document) do + attr_accessor :initialized_with - def initialize(*args) - @initialized_with = args + def initialize(*args) + @initialized_with = args + end end + doc = klass.new("uri", "external_id", 1) + assert_equal(["uri", "external_id", 1], doc.initialized_with) end - doc = klass.new("uri", "external_id", 1) - assert_equal(["uri", "external_id", 1], doc.initialized_with) - end - def test_subclass_dup - klass = Class.new(Nokogiri::HTML::Document) - doc = klass.new.dup - assert_instance_of(klass, doc) - end + def test_subclass_dup + klass = Class.new(Nokogiri::HTML::Document) + doc = klass.new.dup + assert_instance_of(klass, doc) + end - def test_subclass_parse - klass = Class.new(Nokogiri::HTML::Document) - doc = klass.parse(File.read(HTML_FILE)) - assert_equal(html.to_s, doc.to_s) - assert_instance_of(klass, doc) - end + def test_subclass_parse + klass = Class.new(Nokogiri::HTML::Document) + doc = klass.parse(File.read(HTML_FILE)) + assert_equal(html.to_s, doc.to_s) + assert_instance_of(klass, doc) + end - def test_document_parse_method - html = Nokogiri::HTML::Document.parse(File.read(HTML_FILE)) - assert_equal(html.to_s, html.to_s) - end + def test_document_parse_method + html = Nokogiri::HTML::Document.parse(File.read(HTML_FILE)) + assert_equal(html.to_s, html.to_s) + end - def test_document_parse_method_with_url - doc = Nokogiri::HTML("", "http://foobar.example.com/", "UTF-8") - refute_empty(doc.to_s, "Document should not be empty") - assert_equal("http://foobar.example.com/", doc.url) - end + def test_document_parse_method_with_url + doc = Nokogiri::HTML("", "http://foobar.example.com/", "UTF-8") + refute_empty(doc.to_s, "Document should not be empty") + assert_equal("http://foobar.example.com/", doc.url) + end - ### - # Nokogiri::HTML returns an empty Document when given a blank string GH#11 - def test_empty_string_returns_empty_doc - doc = Nokogiri::HTML("") - assert_instance_of(Nokogiri::HTML::Document, doc) - assert_nil(doc.root) - end + ### + # Nokogiri::HTML returns an empty Document when given a blank string GH#11 + def test_empty_string_returns_empty_doc + doc = Nokogiri::HTML("") + assert_instance_of(Nokogiri::HTML::Document, doc) + assert_nil(doc.root) + end + + unless Nokogiri.uses_libxml?("~> 2.6.0") + def test_to_xhtml_with_indent + doc = Nokogiri::HTML("foo") + doc = Nokogiri::HTML(doc.to_xhtml(indent: 2)) + assert_indent(2, doc) + end - unless Nokogiri.uses_libxml?("~> 2.6.0") - def test_to_xhtml_with_indent - doc = Nokogiri::HTML("foo") - doc = Nokogiri::HTML(doc.to_xhtml(indent: 2)) - assert_indent(2, doc) + def test_write_to_xhtml_with_indent + io = StringIO.new + doc = Nokogiri::HTML("foo") + doc.write_xhtml_to(io, indent: 5) + io.rewind + doc = Nokogiri::HTML(io.read) + assert_indent(5, doc) + end end - def test_write_to_xhtml_with_indent - io = StringIO.new - doc = Nokogiri::HTML("foo") - doc.write_xhtml_to(io, indent: 5) - io.rewind - doc = Nokogiri::HTML(io.read) - assert_indent(5, doc) + def test_swap_should_not_exist + assert_raises(NoMethodError) do + html.swap + end end - end - def test_swap_should_not_exist - assert_raises(NoMethodError) do - html.swap + def test_namespace_should_not_exist + assert_raises(NoMethodError) do + html.namespace + end end - end - def test_namespace_should_not_exist - assert_raises(NoMethodError) do - html.namespace + def test_meta_encoding + assert_equal("UTF-8", html.meta_encoding) end - end - def test_meta_encoding - assert_equal("UTF-8", html.meta_encoding) - end + def test_meta_encoding_is_strict_about_http_equiv + doc = Nokogiri::HTML(<<~EOHTML) + + + + + + foo + + + EOHTML + assert_nil(doc.meta_encoding) + end - def test_meta_encoding_is_strict_about_http_equiv - doc = Nokogiri::HTML(<<~EOHTML) - - - - - - foo - - - EOHTML - assert_nil(doc.meta_encoding) - end + def test_meta_encoding_handles_malformed_content_charset + doc = Nokogiri::HTML(<<~EOHTML) + + + + + + foo + + + EOHTML + assert_nil(doc.meta_encoding) + end - def test_meta_encoding_handles_malformed_content_charset - doc = Nokogiri::HTML(<<~EOHTML) - - - - - - foo - - - EOHTML - assert_nil(doc.meta_encoding) - end + def test_meta_encoding_checks_charset + doc = Nokogiri::HTML(<<~EOHTML) + + + + + + foo + + + EOHTML + assert_equal("UTF-8", doc.meta_encoding) + end - def test_meta_encoding_checks_charset - doc = Nokogiri::HTML(<<~EOHTML) - - - - - - foo - - - EOHTML - assert_equal("UTF-8", doc.meta_encoding) - end + def test_meta_encoding= + html.meta_encoding = "EUC-JP" + assert_equal("EUC-JP", html.meta_encoding) + end - def test_meta_encoding= - html.meta_encoding = "EUC-JP" - assert_equal("EUC-JP", html.meta_encoding) - end + def test_title + assert_equal("Tender Lovemaking ", html.title) + doc = Nokogiri::HTML("foo") + assert_nil(doc.title) + end - def test_title - assert_equal("Tender Lovemaking ", html.title) - doc = Nokogiri::HTML("foo") - assert_nil(doc.title) - end + def test_title= + doc = Nokogiri::HTML(<<~EOHTML) + + + old + + + foo + + + EOHTML + doc.title = "new" + assert_equal(1, doc.css("title").size) + assert_equal("new", doc.title) + + doc = Nokogiri::HTML(<<~EOHTML) + + + + + + foo + + + EOHTML + doc.title = "new" + assert_equal("new", doc.title) + title = doc.at("/html/head/title") + assert_not_nil(title) + assert_equal("new", title.text) + assert_equal(-1, doc.at("meta[@http-equiv]") <=> title) + + doc = Nokogiri::HTML(<<~EOHTML) + + + foo + + + EOHTML + doc.title = "new" + assert_equal("new", doc.title) + # may or may not be added + title = doc.at("/html//title") + assert_not_nil(title) + assert_equal("new", title.text) + assert_equal(-1, title <=> doc.at("body")) + + doc = Nokogiri::HTML(<<~EOHTML) + + + + foo + + + EOHTML + doc.title = "new" + assert_equal("new", doc.title) + assert_equal(-1, doc.at("meta[@charset]") <=> doc.at("title")) + assert_equal(-1, doc.at("title") <=> doc.at("body")) + + doc = Nokogiri::HTML("

hello") + doc.title = "new" + assert_equal("new", doc.title) + assert_instance_of(Nokogiri::XML::DTD, doc.children.first) + assert_equal(-1, doc.at("title") <=> doc.at("p")) + + doc = Nokogiri::HTML("") + doc.title = "new" + assert_equal("new", doc.title) + assert_equal("new", doc.at("/html/head/title/text()").to_s) + end - def test_title= - doc = Nokogiri::HTML(<<~EOHTML) - - - old - - - foo - - - EOHTML - doc.title = "new" - assert_equal(1, doc.css("title").size) - assert_equal("new", doc.title) - - doc = Nokogiri::HTML(<<~EOHTML) - - - - - - foo - - - EOHTML - doc.title = "new" - assert_equal("new", doc.title) - title = doc.at("/html/head/title") - assert_not_nil(title) - assert_equal("new", title.text) - assert_equal(-1, doc.at("meta[@http-equiv]") <=> title) - - doc = Nokogiri::HTML(<<~EOHTML) - - - foo - - - EOHTML - doc.title = "new" - assert_equal("new", doc.title) - # may or may not be added - title = doc.at("/html//title") - assert_not_nil(title) - assert_equal("new", title.text) - assert_equal(-1, title <=> doc.at("body")) - - doc = Nokogiri::HTML(<<~EOHTML) - - - - foo - - - EOHTML - doc.title = "new" - assert_equal("new", doc.title) - assert_equal(-1, doc.at("meta[@charset]") <=> doc.at("title")) - assert_equal(-1, doc.at("title") <=> doc.at("body")) - - doc = Nokogiri::HTML("

hello") - doc.title = "new" - assert_equal("new", doc.title) - assert_instance_of(Nokogiri::XML::DTD, doc.children.first) - assert_equal(-1, doc.at("title") <=> doc.at("p")) - - doc = Nokogiri::HTML("") - doc.title = "new" - assert_equal("new", doc.title) - assert_equal("new", doc.at("/html/head/title/text()").to_s) - end + def test_meta_encoding_without_head + encoding = "EUC-JP" + html = Nokogiri::HTML("foo", nil, encoding) - def test_meta_encoding_without_head - encoding = "EUC-JP" - html = Nokogiri::HTML("foo", nil, encoding) + assert_nil(html.meta_encoding) - assert_nil(html.meta_encoding) + html.meta_encoding = encoding + assert_equal(encoding, html.meta_encoding) - html.meta_encoding = encoding - assert_equal(encoding, html.meta_encoding) + meta = html.at("/html/head/meta[@http-equiv and boolean(@content)]") + assert(meta, "meta is in head") - meta = html.at("/html/head/meta[@http-equiv and boolean(@content)]") - assert(meta, "meta is in head") + assert(meta.at("./parent::head/following-sibling::body"), "meta is before body") + end - assert(meta.at("./parent::head/following-sibling::body"), "meta is before body") - end + def test_html5_meta_encoding_without_head + encoding = "EUC-JP" + html = Nokogiri::HTML("foo", nil, encoding) - def test_html5_meta_encoding_without_head - encoding = "EUC-JP" - html = Nokogiri::HTML("foo", nil, encoding) + assert_nil(html.meta_encoding) - assert_nil(html.meta_encoding) + html.meta_encoding = encoding + assert_equal(encoding, html.meta_encoding) - html.meta_encoding = encoding - assert_equal(encoding, html.meta_encoding) + meta = html.at("/html/head/meta[@charset]") + assert(meta, "meta is in head") - meta = html.at("/html/head/meta[@charset]") - assert(meta, "meta is in head") + assert(meta.at("./parent::head/following-sibling::body"), "meta is before body") + end - assert(meta.at("./parent::head/following-sibling::body"), "meta is before body") - end + def test_meta_encoding_with_empty_content_type + html = Nokogiri::HTML(<<~EOHTML) + + + + + + foo + + + EOHTML + assert_nil(html.meta_encoding) + + html = Nokogiri::HTML(<<~EOHTML) + + + + + + foo + + + EOHTML + assert_nil(html.meta_encoding) + end - def test_meta_encoding_with_empty_content_type - html = Nokogiri::HTML(<<~EOHTML) - - - - - - foo - - - EOHTML - assert_nil(html.meta_encoding) - - html = Nokogiri::HTML(<<~EOHTML) - - - - - - foo - - - EOHTML - assert_nil(html.meta_encoding) - end + def test_root_node_parent_is_document + parent = html.root.parent + assert_equal(html, parent) + assert_instance_of(Nokogiri::HTML::Document, parent) + end - def test_root_node_parent_is_document - parent = html.root.parent - assert_equal(html, parent) - assert_instance_of(Nokogiri::HTML::Document, parent) - end + def test_parse_handles_nil_gracefully + @doc = Nokogiri::HTML::Document.parse(nil) + assert_instance_of(Nokogiri::HTML::Document, @doc) + end - def test_parse_handles_nil_gracefully - @doc = Nokogiri::HTML::Document.parse(nil) - assert_instance_of(Nokogiri::HTML::Document, @doc) - end + def test_parse_empty_document + doc = Nokogiri::HTML("\n") + assert_equal(0, doc.css("a").length) + assert_equal(0, doc.xpath("//a").length) + assert_equal(0, doc.search("//a").length) + end - def test_parse_empty_document - doc = Nokogiri::HTML("\n") - assert_equal(0, doc.css("a").length) - assert_equal(0, doc.xpath("//a").length) - assert_equal(0, doc.search("//a").length) - end + def test_HTML_function + html = Nokogiri::HTML(File.read(HTML_FILE)) + assert(html.html?) + end - def test_HTML_function - html = Nokogiri::HTML(File.read(HTML_FILE)) - assert(html.html?) - end + def test_parse_works_with_an_object_that_responds_to_read + klass = Class.new do + def initialize + @contents = StringIO.new("

foo
") + end - def test_parse_works_with_an_object_that_responds_to_read - klass = Class.new do - def initialize - @contents = StringIO.new("
foo
") + def read(*args) + @contents.read(*args) + end end - def read(*args) - @contents.read(*args) - end + doc = Nokogiri::HTML.parse(klass.new) + assert_equal("foo", doc.at_css("div").content) end - doc = Nokogiri::HTML.parse(klass.new) - assert_equal("foo", doc.at_css("div").content) - end - - def test_parse_temp_file - temp_html_file = Tempfile.new("TEMP_HTML_FILE") - File.open(HTML_FILE, "rb") { |f| temp_html_file.write(f.read) } - temp_html_file.close - temp_html_file.open - assert_equal(Nokogiri::HTML.parse(File.read(HTML_FILE)).xpath("//div/a").length, - Nokogiri::HTML.parse(temp_html_file).xpath("//div/a").length) - end - - def test_to_xhtml - assert_match("XHTML", html.to_xhtml) - assert_match("XHTML", html.to_xhtml(encoding: "UTF-8")) - assert_match("UTF-8", html.to_xhtml(encoding: "UTF-8")) - end - - def test_no_xml_header - html = Nokogiri::HTML(<<~EOHTML) - - - EOHTML - assert(html.to_html.length > 0, "html length is too short") - assert_no_match(/^<\?xml/, html.to_html) - end + def test_parse_temp_file + temp_html_file = Tempfile.new("TEMP_HTML_FILE") + File.open(HTML_FILE, "rb") { |f| temp_html_file.write(f.read) } + temp_html_file.close + temp_html_file.open + assert_equal(Nokogiri::HTML.parse(File.read(HTML_FILE)).xpath("//div/a").length, + Nokogiri::HTML.parse(temp_html_file).xpath("//div/a").length) + end - def test_document_has_error - html = Nokogiri::HTML(<<~EOHTML) - - -
- -
-

inside div tag

-
-

outside div tag

- - - EOHTML - set = html.search("div").search("p") - assert_equal(1, set.length) - assert_equal("inside div tag", set.first.inner_text) - end + def test_no_xml_header + html = Nokogiri::HTML(<<~EOHTML) + + + EOHTML + assert(html.to_html.length > 0, "html length is too short") + assert_no_match(/^<\?xml/, html.to_html) + end - def test_multi_css - html = Nokogiri::HTML(<<~EOHTML) - - -
-

p tag

- a tag -
- - - EOHTML - set = html.css("p, a") - assert_equal(2, set.length) - assert_equal(["a tag", "p tag"].sort, set.map(&:content).sort) - end + def test_document_has_error + html = Nokogiri::HTML(<<~EOHTML) + + +
+ +
+

inside div tag

+
+

outside div tag

+ + + EOHTML + set = html.search("div").search("p") + assert_equal(1, set.length) + assert_equal("inside div tag", set.first.inner_text) + end - def test_doc_type - html = Nokogiri::HTML(<<~EOHTML) - - - -

Rainbow Dash

- - - EOHTML - assert_equal("html", html.internal_subset.name) - assert_equal("-//W3C//DTD XHTML 1.1//EN", html.internal_subset.external_id) - assert_equal("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", html.internal_subset.system_id) - assert_equal("", html.to_s[0, 97]) - end + def test_multi_css + html = Nokogiri::HTML(<<~EOHTML) + + +
+

p tag

+ a tag +
+ + + EOHTML + set = html.css("p, a") + assert_equal(2, set.length) + assert_equal(["a tag", "p tag"].sort, set.map(&:content).sort) + end - def test_content_size - html = Nokogiri::HTML("
\n
") - assert_equal(1, html.content.size) - assert_equal(1, html.content.split("").size) - assert_equal("\n", html.content) - end + def test_inner_text + html = Nokogiri::HTML(<<~EOHTML) + + +
+

+ Hello world! +

+
+ + + EOHTML + node = html.xpath("//div").first + assert_equal("Hello world!", node.inner_text.strip) + end - def test_find_by_xpath - found = html.xpath("//div/a") - assert_equal(3, found.length) - end + def test_doc_type + html = Nokogiri::HTML(<<~EOHTML) + + + +

Rainbow Dash

+ + + EOHTML + assert_equal("html", html.internal_subset.name) + assert_equal("-//W3C//DTD XHTML 1.1//EN", html.internal_subset.external_id) + assert_equal("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", html.internal_subset.system_id) + assert_equal( + "", html.to_s[0, +97] + ) + end - def test_find_by_css - found = html.css("div > a") - assert_equal(3, found.length) - end + def test_content_size + html = Nokogiri::HTML("
\n
") + assert_equal(1, html.content.size) + assert_equal(1, html.content.split("").size) + assert_equal("\n", html.content) + end - def test_find_by_css_with_square_brackets - found = html.css("div[@id='header'] > h1") - found = html.css("div[@id='header'] h1") # this blows up on commit 6fa0f6d329d9dbf1cc21c0ac72f7e627bb4c05fc - assert_equal(1, found.length) - end + def test_find_by_xpath + found = html.xpath("//div/a") + assert_equal(3, found.length) + end - def test_find_by_css_with_escaped_characters - found_without_escape = html.css("div[@id='abc.123']") - found_by_id = html.css('#abc\.123') - found_by_class = html.css('.special\.character') - assert_equal(1, found_without_escape.length) - assert_equal(found_by_id, found_without_escape) - assert_equal(found_by_class, found_without_escape) - end + def test_find_by_css + found = html.css("div > a") + assert_equal(3, found.length) + end - def test_find_with_function - assert(html.css("div:awesome() h1", Class.new do - def awesome(divs) - [divs.first] - end - end.new)) - end + def test_find_by_css_with_square_brackets + found = html.css("div[@id='header'] > h1") + found = html.css("div[@id='header'] h1") # this blows up on commit 6fa0f6d329d9dbf1cc21c0ac72f7e627bb4c05fc + assert_equal(1, found.length) + end - def test_dup_shallow - found = html.search("//div/a").first - dup = found.dup(0) - assert(dup) - assert_equal("", dup.content) - end + def test_find_by_css_with_escaped_characters + found_without_escape = html.css("div[@id='abc.123']") + found_by_id = html.css('#abc\.123') + found_by_class = html.css('.special\.character') + assert_equal(1, found_without_escape.length) + assert_equal(found_by_id, found_without_escape) + assert_equal(found_by_class, found_without_escape) + end - def test_search_can_handle_xpath_and_css - found = html.search("//div/a", "div > p") - length = html.xpath("//div/a").length + - html.css("div > p").length - assert_equal(length, found.length) - end + def test_find_with_function + assert(html.css("div:awesome() h1", Class.new do + def awesome(divs) + [divs.first] + end + end.new)) + end - def test_dup_document - assert(dup = html.dup) - assert_not_equal(dup, html) - assert(html.html?) - assert_instance_of(Nokogiri::HTML::Document, dup) - assert(dup.html?, "duplicate should be html") - assert_equal(html.to_s, dup.to_s) - end + def test_dup_shallow + found = html.search("//div/a").first + dup = found.dup(0) + assert(dup) + assert_equal("", dup.content) + end - def test_dup_document_shallow - assert(dup = html.dup(0)) - assert_not_equal(dup, html) - end + def test_search_can_handle_xpath_and_css + found = html.search("//div/a", "div > p") + length = html.xpath("//div/a").length + + html.css("div > p").length + assert_equal(length, found.length) + end - def test_dup - found = html.search("//div/a").first - dup = found.dup - assert(dup) - assert_equal(found.content, dup.content) - assert_equal(found.document, dup.document) - end + def test_dup_document + assert(dup = html.dup) + assert_not_equal(dup, html) + assert(html.html?) + assert_instance_of(Nokogiri::HTML::Document, dup) + assert(dup.html?, "duplicate should be html") + assert_equal(html.to_s, dup.to_s) + end - # issue 1060 - def test_node_ownership_after_dup - html = "
replace me
" - doc = Nokogiri::HTML::Document.parse(html) - dup = doc.dup - assert_same(dup, dup.at_css("div").document) + def test_dup_document_shallow + assert(dup = html.dup(0)) + assert_not_equal(dup, html) + end - # should not raise an exception - dup.at_css("div").parse("
replaced
") - end + def test_dup + found = html.search("//div/a").first + dup = found.dup + assert(dup) + assert_equal(found.content, dup.content) + assert_equal(found.document, dup.document) + end - def test_inner_html - html = Nokogiri::HTML(<<~EOHTML) - - -
-

- Hello world! -

-
- - - EOHTML - node = html.xpath("//div").first - assert_equal("

Helloworld!

", node.inner_html.gsub(/\s/, "")) - end + # issue 1060 + def test_node_ownership_after_dup + html = "
replace me
" + doc = Nokogiri::HTML::Document.parse(html) + dup = doc.dup + assert_same(dup, dup.at_css("div").document) - def test_round_trip - doc = Nokogiri::HTML(html.inner_html) - assert_equal(html.root.to_html, doc.root.to_html) - end + # should not raise an exception + dup.at_css("div").parse("
replaced
") + end - def test_fragment_contains_text_node - fragment = Nokogiri::HTML.fragment("fooo") - assert_equal(1, fragment.children.length) - assert_equal("fooo", fragment.inner_text) - end + def test_inner_html + html = Nokogiri::HTML(<<~EOHTML) + + +
+

+ Hello world! +

+
+ + + EOHTML + node = html.xpath("//div").first + assert_equal("

Helloworld!

", node.inner_html.gsub(/\s/, "")) + end - def test_fragment_includes_two_tags - assert_equal(2, Nokogiri::HTML.fragment("

").children.length) - end + def test_round_trip + doc = Nokogiri::HTML(html.inner_html) + assert_equal(html.root.to_html, doc.root.to_html) + end - def test_relative_css_finder - doc = Nokogiri::HTML(<<~EOHTML) - - -
-

- inside red -

-
-
-

- inside green -

-
- - - EOHTML - red_divs = doc.css("div.red") - assert_equal(1, red_divs.length) - p_tags = red_divs.first.css("p") - assert_equal(1, p_tags.length) - assert_equal("inside red", p_tags.first.text.strip) - end + def test_fragment_contains_text_node + fragment = Nokogiri::HTML.fragment("fooo") + assert_equal(1, fragment.children.length) + assert_equal("fooo", fragment.inner_text) + end - def test_find_classes - doc = Nokogiri::HTML(<<~EOHTML) - - -

RED

-

RED

-

GREEN

-

GREEN

- - - EOHTML - list = doc.css(".red") - assert_equal(2, list.length) - assert_equal(%w{RED RED}, list.map(&:text)) - end + def test_fragment_includes_two_tags + assert_equal(2, Nokogiri::HTML.fragment("

").children.length) + end - def test_parse_can_take_io - html = nil - File.open(HTML_FILE, "rb") do |f| - html = Nokogiri::HTML(f) + def test_relative_css_finder + doc = Nokogiri::HTML(<<~EOHTML) + + +
+

+ inside red +

+
+
+

+ inside green +

+
+ + + EOHTML + red_divs = doc.css("div.red") + assert_equal(1, red_divs.length) + p_tags = red_divs.first.css("p") + assert_equal(1, p_tags.length) + assert_equal("inside red", p_tags.first.text.strip) end - assert(html.html?) - assert_equal(HTML_FILE, html.url) - end - def test_parse_works_with_an_object_that_responds_to_path - html = String.new("hello") - def html.path - "/i/should/be/the/document/url" + def test_find_classes + doc = Nokogiri::HTML(<<~EOHTML) + + +

RED

+

RED

+

GREEN

+

GREEN

+ + + EOHTML + list = doc.css(".red") + assert_equal(2, list.length) + assert_equal(%w{RED RED}, list.map(&:text)) end - doc = Nokogiri::HTML.parse(html) + def test_parse_can_take_io + html = nil + File.open(HTML_FILE, "rb") do |f| + html = Nokogiri::HTML(f) + end + assert(html.html?) + assert_equal(HTML_FILE, html.url) + end - assert_equal("/i/should/be/the/document/url", doc.url) - end + def test_parse_works_with_an_object_that_responds_to_path + html = String.new("hello") + def html.path + "/i/should/be/the/document/url" + end - # issue #1821, #2110 - def test_parse_can_take_pathnames - assert(File.size(HTML_FILE) > 4096) # file must be big enough to trip the read callback more than once + doc = Nokogiri::HTML.parse(html) - doc = Nokogiri::HTML.parse(Pathname.new(HTML_FILE)) + assert_equal("/i/should/be/the/document/url", doc.url) + end - # an arbitrary assertion on the structure of the document - assert_equal(166, doc.css("a").length) - assert_equal(HTML_FILE, doc.url) - end + # issue #1821, #2110 + def test_parse_can_take_pathnames + assert(File.size(HTML_FILE) > 4096) # file must be big enough to trip the read callback more than once - def test_html? - assert(!html.xml?) - assert(html.html?) - end + doc = Nokogiri::HTML.parse(Pathname.new(HTML_FILE)) - def test_serialize - assert(html.serialize) - assert(html.to_html) - end + # an arbitrary assertion on the structure of the document + assert_equal(166, doc.css("a").length) + assert_equal(HTML_FILE, doc.url) + end - def test_empty_document - # empty document should return "" #699 - assert_equal("", Nokogiri::HTML.parse(nil).text) - assert_equal("", Nokogiri::HTML.parse("").text) - end + def test_html? + assert(!html.xml?) + assert(html.html?) + end - def test_capturing_nonparse_errors_during_document_clone - # see https://github.com/sparklemotion/nokogiri/issues/1196 for background - original = Nokogiri::HTML.parse("
") - original_errors = original.errors.dup + def test_serialize + assert(html.serialize) + assert(html.to_html) + end - copy = original.dup - assert_equal(original_errors, copy.errors) - end + def test_empty_document + # empty document should return "" #699 + assert_equal("", Nokogiri::HTML.parse(nil).text) + assert_equal("", Nokogiri::HTML.parse("").text) + end - def test_capturing_nonparse_errors_during_node_copy_between_docs - # Errors should be emitted while parsing only, and should not change when moving nodes. - doc1 = Nokogiri::HTML("one") - doc2 = Nokogiri::HTML("two") - node1 = doc1.at_css("#unique") - node2 = doc2.at_css("#unique") - original_errors1 = doc1.errors.dup - original_errors2 = doc2.errors.dup - assert(original_errors1.any? { |e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name") - assert(original_errors2.any? { |e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name") - - node1.add_child(node2) - - assert_equal(original_errors1, doc1.errors) - assert_equal(original_errors2, doc2.errors) - end + def test_capturing_nonparse_errors_during_document_clone + # see https://github.com/sparklemotion/nokogiri/issues/1196 for background + original = Nokogiri::HTML.parse("
") + original_errors = original.errors.dup - def test_silencing_nonparse_errors_during_attribute_insertion_1262 - # see https://github.com/sparklemotion/nokogiri/issues/1262 - # - # libxml2 emits a warning when this happens; the JRuby - # implementation does not. so rather than capture the error in - # doc.errors in a platform-dependent way, I'm opting to have - # the error silenced. - # - # So this test doesn't look meaningful, but we want to avoid - # having `ID unique-issue-1262 already defined` emitted to - # stderr when running the test suite. - # - doc = Nokogiri::HTML::Document.new - Nokogiri::XML::Element.new("div", doc).set_attribute("id", "unique-issue-1262") - Nokogiri::XML::Element.new("div", doc).set_attribute("id", "unique-issue-1262") - assert_equal(0, doc.errors.length) - end + copy = original.dup + assert_equal(original_errors, copy.errors) + end - it "skips encoding for script tags" do - html = Nokogiri::HTML(<<~EOHTML) - - - - - - - EOHTML - node = html.xpath("//script").first - assert_equal("var isGreater = 4 > 5;", node.inner_html) - end + def test_capturing_nonparse_errors_during_node_copy_between_docs + # Errors should be emitted while parsing only, and should not change when moving nodes. + doc1 = Nokogiri::HTML("one") + doc2 = Nokogiri::HTML("two") + node1 = doc1.at_css("#unique") + node2 = doc2.at_css("#unique") + original_errors1 = doc1.errors.dup + original_errors2 = doc2.errors.dup + assert(original_errors1.any? { |e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name") + assert(original_errors2.any? { |e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name") + + node1.add_child(node2) + + assert_equal(original_errors1, doc1.errors) + assert_equal(original_errors2, doc2.errors) + end - it "skips encoding for style tags" do - html = Nokogiri::HTML(<<~EOHTML) - - - - - - - EOHTML - node = html.xpath("//style").first - assert_equal("tr > div { display:block; }", node.inner_html) - end + def test_silencing_nonparse_errors_during_attribute_insertion_1262 + # see https://github.com/sparklemotion/nokogiri/issues/1262 + # + # libxml2 emits a warning when this happens; the JRuby + # implementation does not. so rather than capture the error in + # doc.errors in a platform-dependent way, I'm opting to have + # the error silenced. + # + # So this test doesn't look meaningful, but we want to avoid + # having `ID unique-issue-1262 already defined` emitted to + # stderr when running the test suite. + # + doc = Nokogiri::HTML::Document.new + Nokogiri::XML::Element.new("div", doc).set_attribute("id", "unique-issue-1262") + Nokogiri::XML::Element.new("div", doc).set_attribute("id", "unique-issue-1262") + assert_equal(0, doc.errors.length) + end - it "does not fail when converting to_html using explicit encoding" do - html_fragment = <<~EOHTML - Inactive hide details for "User" ---19/05/2015 12:55:29---Provvediamo subito nell’integrare - EOHTML - doc = Nokogiri::HTML(html_fragment, nil, "ISO-8859-1") - html = doc.to_html - assert html.index("src=\"images/icon.gif\"") - assert_equal "ISO-8859-1", html.encoding.name - end + it "skips encoding for script tags" do + html = Nokogiri::HTML(<<~EOHTML) + + + + + + + EOHTML + node = html.xpath("//script").first + assert_equal("var isGreater = 4 > 5;", node.inner_html) + end - def test_leaking_dtd_nodes_after_internal_subset_removal - # see https://github.com/sparklemotion/nokogiri/issues/1784 - # - # just checking that this doesn't raise a valgrind error. we - # don't otherwise have any test coverage for removing DTDs. - # - 100.times do |i| - Nokogiri::HTML::Document.new.internal_subset.remove + it "skips encoding for style tags" do + html = Nokogiri::HTML(<<~EOHTML) + + + + + + + EOHTML + node = html.xpath("//style").first + assert_equal("tr > div { display:block; }", node.inner_html) end - end - describe "HTML::Document.parse" do - let(:html_strict) do - Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::DEFAULT_HTML).norecover + it "does not fail when converting to_html using explicit encoding" do + html_fragment = <<~EOHTML + Inactive hide details for "User" ---19/05/2015 12:55:29---Provvediamo subito nell’integrare + EOHTML + doc = Nokogiri::HTML(html_fragment, nil, "ISO-8859-1") + html = doc.to_html + assert html.index("src=\"images/icon.gif\"") + assert_equal "ISO-8859-1", html.encoding.name end - it "sets the test up correctly" do - assert(html_strict.strict?) + def test_leaking_dtd_nodes_after_internal_subset_removal + # see https://github.com/sparklemotion/nokogiri/issues/1784 + # + # just checking that this doesn't raise a valgrind error. we + # don't otherwise have any test coverage for removing DTDs. + # + 100.times do |i| + Nokogiri::HTML::Document.new.internal_subset.remove + end end - describe "read memory" do - let(:input) { ", 0) end - assert_match(/Parser without recover option encountered error or warning/, exception.to_s) end end - describe "default options" do - it "does not raise exception on parse error" do - doc = Nokogiri::HTML.parse(input) - assert_operator(doc.errors.length, :>, 0) + describe "read io" do + let(:input) { StringIO.new(", 0) + end end end end - describe "read io" do - let(:input) { StringIO.new(", 0) + describe ".new" do + it "returns an instance of the expected class" do + doc = klass.new + assert_instance_of(klass, doc) + end + + it "calls #initialize exactly once" do + doc = klass.new + assert_equal(1, doc.initialized_count) + end + + it "passes arguments to #initialize" do + doc = klass.new("http://www.w3.org/TR/REC-html40/loose.dtd", "-//W3C//DTD HTML 4.0 Transitional//EN") + assert_equal(["http://www.w3.org/TR/REC-html40/loose.dtd", "-//W3C//DTD HTML 4.0 Transitional//EN"], + doc.initialized_with) + end + end + + it "#dup returns the expected class" do + doc = klass.new.dup + assert_instance_of(klass, doc) + end + + describe ".parse" do + it "returns an instance of the expected class" do + doc = klass.parse(File.read(HTML_FILE)) + assert_instance_of(klass, doc) + end + + it "calls #initialize exactly once" do + doc = klass.parse(File.read(HTML_FILE)) + assert_equal(1, doc.initialized_count) + end + + it "parses the doc" do + doc = klass.parse(File.read(HTML_FILE)) + assert_equal(html.root.to_s, doc.root.to_s) end end end diff --git a/test/html/test_document_fragment.rb b/test/html/test_document_fragment.rb index 91da5c2c3a..582093d721 100644 --- a/test/html/test_document_fragment.rb +++ b/test/html/test_document_fragment.rb @@ -1,315 +1,363 @@ # -*- coding: utf-8 -*- +# frozen_string_literal: true require "helper" module Nokogiri module HTML class TestDocumentFragment < Nokogiri::TestCase - def setup - super - @html = Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE) - end - - def test_ascii_8bit_encoding - s = String.new 'hello' - s.force_encoding ::Encoding::ASCII_8BIT - assert_equal "hello", Nokogiri::HTML::DocumentFragment.parse(s).to_html - end + describe Nokogiri::HTML::DocumentFragment do + let(:html) { Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE) } - def test_inspect_encoding - fragment = "
こんにちは!
".encode('EUC-JP') - f = Nokogiri::HTML::DocumentFragment.parse fragment - assert_equal "こんにちは!", f.content - end - - def test_html_parse_encoding - fragment = "
こんにちは!
".encode 'EUC-JP' - f = Nokogiri::HTML.fragment fragment - assert_equal 'EUC-JP', f.document.encoding - assert_equal "こんにちは!", f.content - end + def test_ascii_8bit_encoding + s = String.new('hello') + s.force_encoding(::Encoding::ASCII_8BIT) + assert_equal("hello", Nokogiri::HTML::DocumentFragment.parse(s).to_html) + end - def test_unlink_empty_document - frag = Nokogiri::HTML::DocumentFragment.parse('').unlink # must_not_raise - assert_nil frag.parent - end + def test_inspect_encoding + fragment = "
こんにちは!
".encode('EUC-JP') + f = Nokogiri::HTML::DocumentFragment.parse(fragment) + assert_equal("こんにちは!", f.content) + end - def test_colons_are_not_removed - doc = Nokogiri::HTML::DocumentFragment.parse("3:30pm") - assert_match(/3:30/, doc.to_s) - end + def test_html_parse_encoding + fragment = "
こんにちは!
".encode('EUC-JP') + f = Nokogiri::HTML.fragment(fragment) + assert_equal('EUC-JP', f.document.encoding) + assert_equal("こんにちは!", f.content) + end - def test_parse_encoding - fragment = "
hello world
" - f = Nokogiri::HTML::DocumentFragment.parse fragment, 'ISO-8859-1' - assert_equal 'ISO-8859-1', f.document.encoding - assert_equal "hello world", f.content - end + def test_unlink_empty_document + frag = Nokogiri::HTML::DocumentFragment.parse('').unlink # must_not_raise + assert_nil(frag.parent) + end - def test_html_parse_with_encoding - fragment = "
hello world
" - f = Nokogiri::HTML.fragment fragment, 'ISO-8859-1' - assert_equal 'ISO-8859-1', f.document.encoding - assert_equal "hello world", f.content - end + def test_colons_are_not_removed + doc = Nokogiri::HTML::DocumentFragment.parse("3:30pm") + assert_match(/3:30/, doc.to_s) + end - def test_parse_in_context - assert_equal('
', @html.root.parse('
').to_s) - end + def test_parse_encoding + fragment = "
hello world
" + f = Nokogiri::HTML::DocumentFragment.parse(fragment, 'ISO-8859-1') + assert_equal('ISO-8859-1', f.document.encoding) + assert_equal("hello world", f.content) + end - def test_inner_html= - fragment = Nokogiri::HTML.fragment '
' + def test_html_parse_with_encoding + fragment = "
hello world
" + f = Nokogiri::HTML.fragment(fragment, 'ISO-8859-1') + assert_equal('ISO-8859-1', f.document.encoding) + assert_equal("hello world", f.content) + end - fragment.inner_html = "hello" - assert_equal 'hello', fragment.inner_html - end + def test_parse_in_context + assert_equal('
', html.root.parse('
').to_s) + end - def test_ancestors_search - html = %q{ -
-
    -
  • foo
  • -
-
- } - fragment = Nokogiri::HTML.fragment html - li = fragment.at('li') - assert li.matches?('li') - end + def test_inner_html= + fragment = Nokogiri::HTML.fragment('
') - def test_fun_encoding - string = %Q(こんにちは) - html = Nokogiri::HTML::DocumentFragment.parse( - string - ).to_html(:encoding => 'UTF-8') - assert_equal string, html - end + fragment.inner_html = "hello" + assert_equal('hello', fragment.inner_html) + end - def test_new - assert Nokogiri::HTML::DocumentFragment.new(@html) - end + def test_ancestors_search + html = <<~EOF +
+
    +
  • foo
  • +
+
+ EOF + fragment = Nokogiri::HTML.fragment(html) + li = fragment.at('li') + assert(li.matches?('li')) + end - def test_body_fragment_should_contain_body - fragment = Nokogiri::HTML::DocumentFragment.parse("
foo
") - assert_match(/^/, fragment.to_s) - end + def test_fun_encoding + string = %(こんにちは) + html = Nokogiri::HTML::DocumentFragment.parse( + string + ).to_html(encoding: 'UTF-8') + assert_equal(string, html) + end - def test_nonbody_fragment_should_not_contain_body - fragment = Nokogiri::HTML::DocumentFragment.parse("
foo
") - assert_match(/^
/, fragment.to_s) - end + def test_new + assert(Nokogiri::HTML::DocumentFragment.new(html)) + end - def test_fragment_should_have_document - fragment = Nokogiri::HTML::DocumentFragment.new(@html) - assert_equal @html, fragment.document - end + def test_body_fragment_should_contain_body + fragment = Nokogiri::HTML::DocumentFragment.parse("
foo
") + assert_match(/^/, fragment.to_s) + end - def test_empty_fragment_should_be_searchable_by_css - fragment = Nokogiri::HTML.fragment("") - assert_equal 0, fragment.css("a").size - end + def test_nonbody_fragment_should_not_contain_body + fragment = Nokogiri::HTML::DocumentFragment.parse("
foo
") + assert_match(/^
/, fragment.to_s) + end - def test_empty_fragment_should_be_searchable - fragment = Nokogiri::HTML.fragment("") - assert_equal 0, fragment.search("//a").size - end + def test_fragment_should_have_document + fragment = Nokogiri::HTML::DocumentFragment.new(html) + assert_equal(html, fragment.document) + end - def test_name - fragment = Nokogiri::HTML::DocumentFragment.new(@html) - assert_equal '#document-fragment', fragment.name - end + def test_empty_fragment_should_be_searchable_by_css + fragment = Nokogiri::HTML.fragment("") + assert_equal(0, fragment.css("a").size) + end - def test_static_method - fragment = Nokogiri::HTML::DocumentFragment.parse("
a
") - assert_instance_of Nokogiri::HTML::DocumentFragment, fragment - end + def test_empty_fragment_should_be_searchable + fragment = Nokogiri::HTML.fragment("") + assert_equal(0, fragment.search("//a").size) + end - def test_many_fragments - 100.times { Nokogiri::HTML::DocumentFragment.new(@html) } - end + def test_name + fragment = Nokogiri::HTML::DocumentFragment.new(html) + assert_equal('#document-fragment', fragment.name) + end - def test_subclass - klass = Class.new(Nokogiri::HTML::DocumentFragment) - fragment = klass.new(@html, "
a
") - assert_instance_of klass, fragment - end + def test_static_method + fragment = Nokogiri::HTML::DocumentFragment.parse("
a
") + assert_instance_of(Nokogiri::HTML::DocumentFragment, fragment) + end - def test_subclass_parse - klass = Class.new(Nokogiri::HTML::DocumentFragment) - doc = klass.parse("
a
") - assert_instance_of klass, doc - end + def test_many_fragments + 100.times { Nokogiri::HTML::DocumentFragment.new(html) } + end - def test_html_fragment - fragment = Nokogiri::HTML.fragment("
a
") - assert_equal "
a
", fragment.to_s - end + def test_html_fragment + fragment = Nokogiri::HTML.fragment("
a
") + assert_equal("
a
", fragment.to_s) + end - def test_html_fragment_has_outer_text - doc = "a
b
c" - fragment = Nokogiri::HTML::Document.new.fragment(doc) - if Nokogiri.uses_libxml?("<= 2.6.16") - assert_equal "a
b

c

", fragment.to_s - else - assert_equal "a
b
c", fragment.to_s + def test_html_fragment_has_outer_text + doc = "a
b
c" + fragment = Nokogiri::HTML::Document.new.fragment(doc) + if Nokogiri.uses_libxml?("<= 2.6.16") + assert_equal("a
b

c

", fragment.to_s) + else + assert_equal("a
b
c", fragment.to_s) + end end - end - def test_html_fragment_case_insensitivity - doc = "
b
" - fragment = Nokogiri::HTML::Document.new.fragment(doc) - assert_equal "
b
", fragment.to_s - end + def test_html_fragment_case_insensitivity + doc = "
b
" + fragment = Nokogiri::HTML::Document.new.fragment(doc) + assert_equal("
b
", fragment.to_s) + end - def test_html_fragment_with_leading_whitespace - doc = "
b
" - fragment = Nokogiri::HTML::Document.new.fragment(doc) - assert_match %r%
b
*%, fragment.to_s - end + def test_html_fragment_with_leading_whitespace + doc = "
b
" + fragment = Nokogiri::HTML::Document.new.fragment(doc) + assert_match(%r%
b
*%, fragment.to_s) + end - def test_html_fragment_with_leading_whitespace_and_newline - doc = " \n
b
" - fragment = Nokogiri::HTML::Document.new.fragment(doc) - assert_match %r% \n
b
*%, fragment.to_s - end + def test_html_fragment_with_leading_whitespace_and_newline + doc = " \n
b
" + fragment = Nokogiri::HTML::Document.new.fragment(doc) + assert_match(%r% \n
b
*%, fragment.to_s) + end - def test_html_fragment_with_input_and_intermediate_whitespace - doc = " span" - fragment = Nokogiri::HTML::Document.new.fragment(doc) - assert_equal " span", fragment.to_s - end + def test_html_fragment_with_input_and_intermediate_whitespace + doc = " span" + fragment = Nokogiri::HTML::Document.new.fragment(doc) + assert_equal(" span", fragment.to_s) + end - def test_html_fragment_with_leading_text_and_newline - fragment = HTML::Document.new.fragment("First line\nSecond line
Broken line") - assert_equal fragment.to_s, "First line\nSecond line
Broken line" - end + def test_html_fragment_with_leading_text_and_newline + fragment = HTML::Document.new.fragment("First line\nSecond line
Broken line") + assert_equal(fragment.to_s, "First line\nSecond line
Broken line") + end - def test_html_fragment_with_leading_whitespace_and_text_and_newline - fragment = HTML::Document.new.fragment(" First line\nSecond line
Broken line") - assert_equal " First line\nSecond line
Broken line", fragment.to_s - end + def test_html_fragment_with_leading_whitespace_and_text_and_newline + fragment = HTML::Document.new.fragment(" First line\nSecond line
Broken line") + assert_equal(" First line\nSecond line
Broken line", fragment.to_s) + end - def test_html_fragment_with_leading_entity - failed = ""test
test"" - fragment = Nokogiri::HTML::DocumentFragment.parse(failed) - assert_equal '"test
test"', fragment.to_html - end + def test_html_fragment_with_leading_entity + failed = ""test
test"" + fragment = Nokogiri::HTML::DocumentFragment.parse(failed) + assert_equal('"test
test"', fragment.to_html) + end - def test_to_s - doc = "foo
bar" - fragment = Nokogiri::HTML::Document.new.fragment(doc) - assert_equal "foo
bar", fragment.to_s - end + def test_to_s + doc = "foo
bar" + fragment = Nokogiri::HTML::Document.new.fragment(doc) + assert_equal("foo
bar", fragment.to_s) + end - def test_to_html - doc = "foo
bar" - fragment = Nokogiri::HTML::Document.new.fragment(doc) - assert_equal "foo
bar", fragment.to_html - end + def test_to_html + doc = "foo
bar" + fragment = Nokogiri::HTML::Document.new.fragment(doc) + assert_equal("foo
bar", fragment.to_html) + end - def test_to_xhtml - doc = "foo
bar

" - fragment = Nokogiri::HTML::Document.new.fragment(doc) - if Nokogiri.jruby? || Nokogiri.uses_libxml?(">= 2.7.0") - assert_equal "foo
bar

", fragment.to_xhtml - else - # FIXME: why are we doing this ? this violates the spec, - # see http://www.w3.org/TR/xhtml1/#C_2 - assert_equal "foo
bar

", fragment.to_xhtml + def test_to_xhtml + doc = "foo
bar

" + fragment = Nokogiri::HTML::Document.new.fragment(doc) + if Nokogiri.jruby? || Nokogiri.uses_libxml?(">= 2.7.0") + assert_equal("foo
bar

", fragment.to_xhtml) + else + # FIXME: why are we doing this ? this violates the spec, + # see http://www.w3.org/TR/xhtml1/#C_2 + assert_equal("foo
bar

", fragment.to_xhtml) + end end - end - def test_to_xml - doc = "foo
bar" - fragment = Nokogiri::HTML::Document.new.fragment(doc) - assert_equal "foo
bar", fragment.to_xml - end + def test_to_xml + doc = "foo
bar" + fragment = Nokogiri::HTML::Document.new.fragment(doc) + assert_equal("foo
bar", fragment.to_xml) + end - def test_fragment_script_tag_with_cdata - doc = HTML::Document.new - fragment = doc.fragment("") - assert_equal("", - fragment.to_s) - end + def test_fragment_script_tag_with_cdata + doc = HTML::Document.new + fragment = doc.fragment("") + assert_equal("", + fragment.to_s) + end - def test_fragment_with_comment - doc = HTML::Document.new - fragment = doc.fragment("

hello

") - assert_equal("

hello

", - fragment.to_s) - end + def test_fragment_with_comment + doc = HTML::Document.new + fragment = doc.fragment("

hello

") + assert_equal("

hello

", + fragment.to_s) + end - def test_element_children_counts - if Nokogiri.uses_libxml?("<= 2.9.1") - skip "#elements doesn't work in 2.9.1, see 1793a5a for history" + def test_element_children_counts + if Nokogiri.uses_libxml?("<= 2.9.1") + skip("#elements doesn't work in 2.9.1, see 1793a5a for history") + end + doc = Nokogiri::HTML::DocumentFragment.parse("
\n ") + assert_equal(1, doc.element_children.count) end - doc = Nokogiri::HTML::DocumentFragment.parse("
\n ") - assert_equal 1, doc.element_children.count - end - def test_malformed_fragment_is_corrected - fragment = HTML::DocumentFragment.parse("
") - assert_equal "
", fragment.to_s - end + def test_malformed_fragment_is_corrected + fragment = HTML::DocumentFragment.parse("
") + assert_equal("
", fragment.to_s) + end - def test_unclosed_script_tag - # see GH#315 - fragment = HTML::DocumentFragment.parse("foo ", fragment.to_html - end + def test_unclosed_script_tag + # see GH#315 + fragment = HTML::DocumentFragment.parse("foo ", fragment.to_html) + end - def test_error_propagation_on_fragment_parse - frag = Nokogiri::HTML::DocumentFragment.parse "oh, hello there." - assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be copied to the fragment" - end + def test_error_propagation_on_fragment_parse + frag = Nokogiri::HTML::DocumentFragment.parse("oh, hello there.") + assert(frag.errors.any? { |err| err.to_s =~ /Tag hello invalid/ }, "errors should be copied to the fragment") + end - def test_error_propagation_on_fragment_parse_in_node_context - doc = Nokogiri::HTML::Document.parse "
" - context_node = doc.at_css "div" - frag = Nokogiri::HTML::DocumentFragment.new doc, "oh, hello there.", context_node - assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be on the context node's document" - end + def test_error_propagation_on_fragment_parse_in_node_context + doc = Nokogiri::HTML::Document.parse("
") + context_node = doc.at_css("div") + frag = Nokogiri::HTML::DocumentFragment.new(doc, "oh, hello there.", context_node) + assert(frag.errors.any? do |err| + err.to_s =~ /Tag hello invalid/ + end, "errors should be on the context node's document") + end - def test_error_propagation_on_fragment_parse_in_node_context_should_not_include_preexisting_errors - doc = Nokogiri::HTML::Document.parse "
" - assert doc.errors.any?{|err| err.to_s =~ /jimmy/}, "assert on setup" + def test_error_propagation_on_fragment_parse_in_node_context_should_not_include_preexisting_errors + doc = Nokogiri::HTML::Document.parse("
") + assert(doc.errors.any? { |err| err.to_s =~ /jimmy/ }, "assert on setup") + + context_node = doc.at_css("div") + frag = Nokogiri::HTML::DocumentFragment.new(doc, "oh, hello there.", context_node) + assert(frag.errors.any? do |err| + err.to_s =~ /Tag hello invalid/ + end, "errors should be on the context node's document") + assert(frag.errors.none? do |err| + err.to_s =~ /jimmy/ + end, "errors should not include pre-existing document errors") + end - context_node = doc.at_css "div" - frag = Nokogiri::HTML::DocumentFragment.new doc, "oh, hello there.", context_node - assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be on the context node's document" - assert frag.errors.none?{|err| err.to_s =~ /jimmy/}, "errors should not include pre-existing document errors" - end + def test_capturing_nonparse_errors_during_fragment_clone + # see https://github.com/sparklemotion/nokogiri/issues/1196 for background + original = Nokogiri::HTML.fragment("
") + original_errors = original.errors.dup - def test_capturing_nonparse_errors_during_fragment_clone - # see https://github.com/sparklemotion/nokogiri/issues/1196 for background - original = Nokogiri::HTML.fragment("
") - original_errors = original.errors.dup + copy = original.dup + assert_equal(original_errors, copy.errors) + end - copy = original.dup - assert_equal original_errors, copy.errors - end + def test_capturing_nonparse_errors_during_node_copy_between_fragments + # Errors should be emitted while parsing only, and should not change when moving nodes. + frag1 = Nokogiri::HTML.fragment("one") + frag2 = Nokogiri::HTML.fragment("two") + node1 = frag1.at_css("#unique") + node2 = frag2.at_css("#unique") + original_errors1 = frag1.errors.dup + original_errors2 = frag2.errors.dup + assert(original_errors1.any? { |e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name") + assert(original_errors2.any? { |e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name") + + node1.add_child(node2) + + assert_equal(original_errors1, frag1.errors) + assert_equal(original_errors2, frag2.errors) + end - def test_capturing_nonparse_errors_during_node_copy_between_fragments - # Errors should be emitted while parsing only, and should not change when moving nodes. - frag1 = Nokogiri::HTML.fragment("one") - frag2 = Nokogiri::HTML.fragment("two") - node1 = frag1.at_css("#unique") - node2 = frag2.at_css("#unique") - original_errors1 = frag1.errors.dup - original_errors2 = frag2.errors.dup - assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name" - assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name" - - node1.add_child node2 - - assert_equal original_errors1, frag1.errors - assert_equal original_errors2, frag2.errors - end + def test_dup_should_create_an_html_document_fragment + # https://github.com/sparklemotion/nokogiri/issues/1846 + original = Nokogiri::HTML::DocumentFragment.parse("

hello

") + duplicate = original.dup + assert_instance_of(Nokogiri::HTML::DocumentFragment, duplicate) + end - def test_dup_should_create_an_html_document_fragment - # https://github.com/sparklemotion/nokogiri/issues/1846 - original = Nokogiri::HTML::DocumentFragment.parse("

hello

") - duplicate = original.dup - assert_instance_of Nokogiri::HTML::DocumentFragment, duplicate + describe "subclassing" do + let(:klass) do + Class.new(Nokogiri::HTML::DocumentFragment) do + attr_accessor :initialized_with, :initialized_count + + def initialize(*args) + super + @initialized_with = args + @initialized_count ||= 0 + @initialized_count += 1 + end + end + end + + describe ".new" do + it "returns an instance of the right class" do + fragment = klass.new(html, "
a
") + assert_instance_of(klass, fragment) + end + + it "calls #initialize exactly once" do + fragment = klass.new(html, "
a
") + assert_equal(1, fragment.initialized_count) + end + + it "passes args to #initialize" do + fragment = klass.new(html, "
a
") + assert_equal([html, "
a
"], fragment.initialized_with) + end + end + + it "#dup returns the expected class" do + doc = klass.new(html, "
a
").dup + assert_instance_of(klass, doc) + end + + describe ".parse" do + it "returns an instance of the right class" do + fragment = klass.parse("
a
") + assert_instance_of(klass, fragment) + end + + it "calls #initialize exactly once" do + fragment = klass.parse("
a
") + assert_equal(1, fragment.initialized_count) + end + + it "passes the fragment" do + fragment = klass.parse("
a
") + assert_equal(Nokogiri::HTML::DocumentFragment.parse("
a
").to_s, fragment.to_s) + end + end + end end end end diff --git a/test/xml/test_document.rb b/test/xml/test_document.rb index 85892e184d..8d69f333db 100644 --- a/test/xml/test_document.rb +++ b/test/xml/test_document.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true require "helper" require "uri" @@ -5,758 +6,727 @@ module Nokogiri module XML class TestDocument < Nokogiri::TestCase - URI = if URI.const_defined?(:DEFAULT_PARSER) - ::URI::DEFAULT_PARSER - else - ::URI - end + describe Nokogiri::XML::Document do + URI = if URI.const_defined?(:DEFAULT_PARSER) + ::URI::DEFAULT_PARSER + else + ::URI + end - def setup - super - @xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) - end + let(:xml) { Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) } - def test_dtd_with_empty_internal_subset - doc = Nokogiri::XML <<-eoxml - - - - - eoxml - assert doc.root - end + def test_dtd_with_empty_internal_subset + doc = Nokogiri::XML(<<~eoxml) + + + + + eoxml + assert(doc.root) + end - # issue #1005 - def test_strict_parsing_empty_doc_should_raise_exception - ["", " "].each do |empty_string| - assert_raises(SyntaxError, "empty string '#{empty_string}' should raise a SyntaxError") do - Nokogiri::XML(empty_string) { |c| c.strict } - end - assert_raises(SyntaxError, "StringIO of '#{empty_string}' should raise a SyntaxError") do - Nokogiri::XML(StringIO.new(empty_string)) { |c| c.strict } + # issue #1005 + def test_strict_parsing_empty_doc_should_raise_exception + ["", " "].each do |empty_string| + assert_raises(SyntaxError, "empty string '#{empty_string}' should raise a SyntaxError") do + Nokogiri::XML(empty_string) { |c| c.strict } + end + assert_raises(SyntaxError, "StringIO of '#{empty_string}' should raise a SyntaxError") do + Nokogiri::XML(StringIO.new(empty_string)) { |c| c.strict } + end end end - end - # issue #838 - def test_document_with_invalid_prolog - doc = Nokogiri::XML "" - assert_empty doc.content - end + # issue #838 + def test_document_with_invalid_prolog + doc = Nokogiri::XML("") + assert_empty(doc.content) + end - # issue #837 - def test_document_with_refentity - doc = Nokogiri::XML "&" - assert_equal "", doc.content - end + # issue #837 + def test_document_with_refentity + doc = Nokogiri::XML("&") + assert_equal("", doc.content) + end - # issue 1060 - def test_node_ownership_after_dup - html = "
replace me
" - doc = Nokogiri::XML::Document.parse(html) - dup = doc.dup - assert_same dup, dup.at_css("div").document + # issue 1060 + def test_node_ownership_after_dup + html = "
replace me
" + doc = Nokogiri::XML::Document.parse(html) + dup = doc.dup + assert_same(dup, dup.at_css("div").document) - # should not raise an exception - dup.at_css("div").parse("
replaced
") - end + # should not raise an exception + dup.at_css("div").parse("
replaced
") + end - # issue #835 - def test_manually_adding_reference_entities - d = Nokogiri::XML::Document.new - root = Nokogiri::XML::Element.new("bar", d) - txt = Nokogiri::XML::Text.new("foo", d) - ent = Nokogiri::XML::EntityReference.new(d, "#8217") - root << txt - root << ent - d << root - assert_match(/’/, d.to_html) - end + # issue #835 + def test_manually_adding_reference_entities + d = Nokogiri::XML::Document.new + root = Nokogiri::XML::Element.new("bar", d) + txt = Nokogiri::XML::Text.new("foo", d) + ent = Nokogiri::XML::EntityReference.new(d, "#8217") + root << txt + root << ent + d << root + assert_match(/’/, d.to_html) + end - def test_document_with_initial_space - doc = Nokogiri::XML(" ") - assert_equal 2, doc.children.size - end + def test_document_with_initial_space + doc = Nokogiri::XML(" ") + assert_equal(2, doc.children.size) + end - def test_root_set_to_nil - @xml.root = nil - assert_nil @xml.root - end + def test_root_set_to_nil + xml.root = nil + assert_nil(xml.root) + end - def test_million_laugh_attach - doc = Nokogiri::XML ' - - - - - - - - - -]> -&lol9;' - assert_not_nil doc - end + def test_million_laugh_attach + doc = Nokogiri::XML(<<~EOF) + + + + + + + + + + + ]> + &lol9; + EOF + assert_not_nil(doc) + end - def test_million_laugh_attach_2 - doc = Nokogiri::XML ' - - - - - - - - ]> - - &a; - ' - assert_not_nil doc - end + def test_million_laugh_attach_2 + doc = Nokogiri::XML(<<~EOF) + + + + + + + + + ]> + + &a; + + EOF + assert_not_nil(doc) + end - def test_ignore_unknown_namespace - doc = Nokogiri::XML(<<-eoxml) - - - - - eoxml - if Nokogiri.jruby? - refute doc.xpath("//foo").first.namespace # assert that the namespace is nil + def test_ignore_unknown_namespace + doc = Nokogiri::XML(<<~eoxml) + + + + + eoxml + if Nokogiri.jruby? + refute(doc.xpath("//foo").first.namespace) # assert that the namespace is nil + end + refute_empty(doc.xpath("//bar"), "bar wasn't found in the document") # bar should be part of the doc end - refute_empty doc.xpath("//bar"), "bar wasn't found in the document" # bar should be part of the doc - end - def test_collect_namespaces - doc = Nokogiri::XML(<<-eoxml) - - - - - - eoxml - assert_equal({ "xmlns" => "hello", "xmlns:foo" => "world" }, - doc.collect_namespaces) - end + def test_collect_namespaces + doc = Nokogiri::XML(<<~eoxml) + + + + + + eoxml + assert_equal({ "xmlns" => "hello", "xmlns:foo" => "world" }, + doc.collect_namespaces) + end - def test_subclass_initialize_modify # testing a segv - Class.new(Nokogiri::XML::Document) { - def initialize - super - body_node = Nokogiri::XML::Node.new "body", self - body_node.content = "stuff" - self.root = body_node - end - }.new - end + def test_subclass_initialize_modify # testing a segv + Class.new(Nokogiri::XML::Document) do + def initialize + super + body_node = Nokogiri::XML::Node.new("body", self) + body_node.content = "stuff" + self.root = body_node + end + end.new + end - def test_create_text_node - txt = @xml.create_text_node "foo" - assert_instance_of Nokogiri::XML::Text, txt - assert_equal "foo", txt.text - assert_equal @xml, txt.document - end + def test_create_text_node + txt = xml.create_text_node("foo") + assert_instance_of(Nokogiri::XML::Text, txt) + assert_equal("foo", txt.text) + assert_equal(xml, txt.document) + end - def test_create_text_node_with_block - @xml.create_text_node "foo" do |txt| - assert_instance_of Nokogiri::XML::Text, txt - assert_equal "foo", txt.text - assert_equal @xml, txt.document + def test_create_text_node_with_block + xml.create_text_node("foo") do |txt| + assert_instance_of(Nokogiri::XML::Text, txt) + assert_equal("foo", txt.text) + assert_equal(xml, txt.document) + end end - end - def test_create_element - elm = @xml.create_element("foo") - assert_instance_of Nokogiri::XML::Element, elm - assert_equal "foo", elm.name - assert_equal @xml, elm.document - end + def test_create_element + elm = xml.create_element("foo") + assert_instance_of(Nokogiri::XML::Element, elm) + assert_equal("foo", elm.name) + assert_equal(xml, elm.document) + end - def test_create_element_with_block - @xml.create_element("foo") do |elm| - assert_instance_of Nokogiri::XML::Element, elm - assert_equal "foo", elm.name - assert_equal @xml, elm.document + def test_create_element_with_block + xml.create_element("foo") do |elm| + assert_instance_of(Nokogiri::XML::Element, elm) + assert_equal("foo", elm.name) + assert_equal(xml, elm.document) + end end - end - def test_create_element_with_attributes - elm = @xml.create_element("foo", :a => "1") - assert_instance_of Nokogiri::XML::Element, elm - assert_instance_of Nokogiri::XML::Attr, elm.attributes["a"] - assert_equal "1", elm["a"] - end + def test_create_element_with_attributes + elm = xml.create_element("foo", a: "1") + assert_instance_of(Nokogiri::XML::Element, elm) + assert_instance_of(Nokogiri::XML::Attr, elm.attributes["a"]) + assert_equal("1", elm["a"]) + end - def test_create_element_with_namespace - elm = @xml.create_element("foo", :'xmlns:foo' => "http://tenderlovemaking.com") - assert_equal "http://tenderlovemaking.com", elm.namespaces["xmlns:foo"] - end + def test_create_element_with_namespace + elm = xml.create_element("foo", 'xmlns:foo': "http://tenderlovemaking.com") + assert_equal("http://tenderlovemaking.com", elm.namespaces["xmlns:foo"]) + end - def test_create_element_with_hyphenated_namespace - elm = @xml.create_element("foo", :'xmlns:SOAP-ENC' => "http://tenderlovemaking.com") - assert_equal "http://tenderlovemaking.com", elm.namespaces["xmlns:SOAP-ENC"] - end + def test_create_element_with_hyphenated_namespace + elm = xml.create_element("foo", 'xmlns:SOAP-ENC': "http://tenderlovemaking.com") + assert_equal("http://tenderlovemaking.com", elm.namespaces["xmlns:SOAP-ENC"]) + end - def test_create_element_with_content - elm = @xml.create_element("foo", "needs more xml/violence") - assert_equal "needs more xml/violence", elm.content - end + def test_create_element_with_content + elm = xml.create_element("foo", "needs more xml/violence") + assert_equal("needs more xml/violence", elm.content) + end - def test_create_cdata - cdata = @xml.create_cdata("abc") - assert_instance_of Nokogiri::XML::CDATA, cdata - assert_equal "abc", cdata.content - end + def test_create_cdata + cdata = xml.create_cdata("abc") + assert_instance_of(Nokogiri::XML::CDATA, cdata) + assert_equal("abc", cdata.content) + end - def test_create_cdata_with_block - @xml.create_cdata("abc") do |cdata| - assert_instance_of Nokogiri::XML::CDATA, cdata - assert_equal "abc", cdata.content + def test_create_cdata_with_block + xml.create_cdata("abc") do |cdata| + assert_instance_of(Nokogiri::XML::CDATA, cdata) + assert_equal("abc", cdata.content) + end end - end - def test_create_comment - comment = @xml.create_comment("abc") - assert_instance_of Nokogiri::XML::Comment, comment - assert_equal "abc", comment.content - end + def test_create_comment + comment = xml.create_comment("abc") + assert_instance_of(Nokogiri::XML::Comment, comment) + assert_equal("abc", comment.content) + end - def test_create_comment_with_block - @xml.create_comment("abc") do |comment| - assert_instance_of Nokogiri::XML::Comment, comment - assert_equal "abc", comment.content + def test_create_comment_with_block + xml.create_comment("abc") do |comment| + assert_instance_of(Nokogiri::XML::Comment, comment) + assert_equal("abc", comment.content) + end end - end - def test_pp - out = StringIO.new(String.new) - ::PP.pp @xml, out - assert_operator out.string.length, :>, 0 - end + def test_pp + out = StringIO.new(String.new) + ::PP.pp(xml, out) + assert_operator(out.string.length, :>, 0) + end - def test_create_internal_subset_on_existing_subset - assert_not_nil @xml.internal_subset - assert_raises(RuntimeError) do - @xml.create_internal_subset("staff", nil, "staff.dtd") + def test_create_internal_subset_on_existing_subset + assert_not_nil(xml.internal_subset) + assert_raises(RuntimeError) do + xml.create_internal_subset("staff", nil, "staff.dtd") + end end - end - def test_create_internal_subset - xml = Nokogiri::XML("") - assert_nil xml.internal_subset + def test_create_internal_subset + xml = Nokogiri::XML("") + assert_nil(xml.internal_subset) - xml.create_internal_subset("name", nil, "staff.dtd") - ss = xml.internal_subset - assert_equal "name", ss.name - assert_nil ss.external_id - assert_equal "staff.dtd", ss.system_id - end + xml.create_internal_subset("name", nil, "staff.dtd") + ss = xml.internal_subset + assert_equal("name", ss.name) + assert_nil(ss.external_id) + assert_equal("staff.dtd", ss.system_id) + end - def test_external_subset - assert_nil @xml.external_subset - Dir.chdir(ASSETS_DIR) do - @xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) { |cfg| - cfg.dtdload - } + def test_external_subset + assert_nil(xml.external_subset) + xml = Dir.chdir(ASSETS_DIR) do + Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) do |cfg| + cfg.dtdload + end + end + assert(xml.external_subset) end - assert @xml.external_subset - end - def test_create_external_subset_fails_with_existing_subset - assert_nil @xml.external_subset - Dir.chdir(ASSETS_DIR) do - @xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) { |cfg| - cfg.dtdload - } + def test_create_external_subset_fails_with_existing_subset + assert_nil(xml.external_subset) + xml = Dir.chdir(ASSETS_DIR) do + Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) do |cfg| + cfg.dtdload + end + end + assert(xml.external_subset) + + assert_raises(RuntimeError) do + xml.create_external_subset("staff", nil, "staff.dtd") + end end - assert @xml.external_subset - assert_raises(RuntimeError) do - @xml.create_external_subset("staff", nil, "staff.dtd") + def test_create_external_subset + dtd = xml.create_external_subset("staff", nil, "staff.dtd") + assert_nil(dtd.external_id) + assert_equal("staff.dtd", dtd.system_id) + assert_equal("staff", dtd.name) + assert_equal(dtd, xml.external_subset) end - end - def test_create_external_subset - dtd = @xml.create_external_subset("staff", nil, "staff.dtd") - assert_nil dtd.external_id - assert_equal "staff.dtd", dtd.system_id - assert_equal "staff", dtd.name - assert_equal dtd, @xml.external_subset - end + def test_version + assert_equal("1.0", xml.version) + end - def test_version - assert_equal "1.0", @xml.version - end + def test_add_namespace + assert_raise(NoMethodError) do + xml.add_namespace("foo", "bar") + end + end - def test_add_namespace - assert_raise NoMethodError do - @xml.add_namespace("foo", "bar") + def test_attributes + assert_raise(NoMethodError) do + xml.attributes + end end - end - def test_attributes - assert_raise NoMethodError do - @xml.attributes + def test_namespace + assert_raise(NoMethodError) do + xml.namespace + end end - end - def test_namespace - assert_raise NoMethodError do - @xml.namespace + def test_namespace_definitions + assert_raise(NoMethodError) do + xml.namespace_definitions + end end - end - def test_namespace_definitions - assert_raise NoMethodError do - @xml.namespace_definitions + def test_line + assert_raise(NoMethodError) do + xml.line + end end - end - def test_line - assert_raise NoMethodError do - @xml.line + def test_empty_node_converted_to_html_is_not_self_closing + doc = Nokogiri::XML("") + assert_equal("", doc.inner_html) end - end - def test_empty_node_converted_to_html_is_not_self_closing - doc = Nokogiri::XML("") - assert_equal "", doc.inner_html - end + def test_fragment + fragment = xml.fragment + assert_equal(0, fragment.children.length) + end - def test_fragment - fragment = @xml.fragment - assert_equal 0, fragment.children.length - end + def test_add_child_fragment_with_single_node + doc = Nokogiri::XML::Document.new + fragment = doc.fragment("") + doc.add_child(fragment) + assert_equal("/hello", doc.at("//hello").path) + assert_equal("hello", doc.root.name) + end - def test_add_child_fragment_with_single_node - doc = Nokogiri::XML::Document.new - fragment = doc.fragment("") - doc.add_child fragment - assert_equal "/hello", doc.at("//hello").path - assert_equal "hello", doc.root.name - end + def test_add_child_fragment_with_multiple_nodes + doc = Nokogiri::XML::Document.new + fragment = doc.fragment("") + assert_raises(RuntimeError) do + doc.add_child(fragment) + end + end - def test_add_child_fragment_with_multiple_nodes - doc = Nokogiri::XML::Document.new - fragment = doc.fragment("") - assert_raises(RuntimeError) do - doc.add_child fragment + def test_add_child_with_multiple_roots + assert_raises(RuntimeError) do + xml << Node.new("foo", xml) + end end - end - def test_add_child_with_multiple_roots - assert_raises(RuntimeError) do - @xml << Node.new("foo", @xml) + def test_add_child_with_string + doc = Nokogiri::XML::Document.new + doc.add_child("
quack!
") + assert_equal(1, doc.root.children.length) + assert_equal("quack!", doc.root.children.first.content) end - end - def test_add_child_with_string - doc = Nokogiri::XML::Document.new - doc.add_child "
quack!
" - assert_equal 1, doc.root.children.length - assert_equal "quack!", doc.root.children.first.content - end + def test_prepend + doc = Nokogiri::XML("") - def test_prepend - doc = Nokogiri::XML("") + node_set = doc.root.prepend_child("") + assert_equal(%w[branch], node_set.map(&:name)) - node_set = doc.root.prepend_child "" - assert_equal %w[branch], node_set.map(&:name) + branch = doc.at("//branch") - branch = doc.at("//branch") + leaves = %w[leaf1 leaf2 leaf3] + leaves.each do |name| + branch.prepend_child("<%s/>" % name) + end + assert_equal(leaves.length, branch.children.length) + assert_equal(leaves.reverse, branch.children.map(&:name)) + end - leaves = %w[leaf1 leaf2 leaf3] - leaves.each { |name| - branch.prepend_child("<%s/>" % name) - } - assert_equal leaves.length, branch.children.length - assert_equal leaves.reverse, branch.children.map(&:name) - end + def test_prepend_child_fragment_with_single_node + doc = Nokogiri::XML::Document.new + fragment = doc.fragment("") + doc.prepend_child(fragment) + assert_equal("/hello", doc.at("//hello").path) + assert_equal("hello", doc.root.name) + end - def test_prepend_child_fragment_with_single_node - doc = Nokogiri::XML::Document.new - fragment = doc.fragment("") - doc.prepend_child fragment - assert_equal "/hello", doc.at("//hello").path - assert_equal "hello", doc.root.name - end + def test_prepend_child_fragment_with_multiple_nodes + doc = Nokogiri::XML::Document.new + fragment = doc.fragment("") + assert_raises(RuntimeError) do + doc.prepend_child(fragment) + end + end - def test_prepend_child_fragment_with_multiple_nodes - doc = Nokogiri::XML::Document.new - fragment = doc.fragment("") - assert_raises(RuntimeError) do - doc.prepend_child fragment + def test_prepend_child_with_multiple_roots + assert_raises(RuntimeError) do + xml.prepend_child(Node.new("foo", xml)) + end end - end - def test_prepend_child_with_multiple_roots - assert_raises(RuntimeError) do - @xml.prepend_child Node.new("foo", @xml) + def test_prepend_child_with_string + doc = Nokogiri::XML::Document.new + doc.prepend_child("
quack!
") + assert_equal(1, doc.root.children.length) + assert_equal("quack!", doc.root.children.first.content) end - end - def test_prepend_child_with_string - doc = Nokogiri::XML::Document.new - doc.prepend_child "
quack!
" - assert_equal 1, doc.root.children.length - assert_equal "quack!", doc.root.children.first.content - end + def test_move_root_to_document_with_no_root + sender = Nokogiri::XML("foo") + newdoc = Nokogiri::XML::Document.new + newdoc.root = sender.root + end - def test_move_root_to_document_with_no_root - sender = Nokogiri::XML("foo") - newdoc = Nokogiri::XML::Document.new - newdoc.root = sender.root - end + def test_move_root_with_existing_root_gets_gcd + doc = Nokogiri::XML("test") + doc2 = Nokogiri::XML("#{'x' * 5000000}") + doc2.root = doc.root + end - def test_move_root_with_existing_root_gets_gcd - doc = Nokogiri::XML("test") - doc2 = Nokogiri::XML("#{"x" * 5000000}") - doc2.root = doc.root - end + def test_validate + if Nokogiri.uses_libxml? + assert_equal(45, xml.validate.length) + else + xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) { |cfg| cfg.dtdvalid } + assert_equal(40, xml.validate.length) + end + end - def test_validate - if Nokogiri.uses_libxml? - assert_equal 45, @xml.validate.length - else - xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) { |cfg| cfg.dtdvalid } - assert_equal 40, xml.validate.length + def test_validate_no_internal_subset + doc = Nokogiri::XML("") + assert_nil(doc.validate) end - end - def test_validate_no_internal_subset - doc = Nokogiri::XML("") - assert_nil doc.validate - end + def test_clone + assert(xml.clone) + end - def test_clone - assert @xml.clone - end + def test_document_should_not_have_default_ns + doc = Nokogiri::XML::Document.new - def test_document_should_not_have_default_ns - doc = Nokogiri::XML::Document.new + assert_raises(NoMethodError) do + doc.default_namespace = "http://innernet.com/" + end - assert_raises NoMethodError do - doc.default_namespace = "http://innernet.com/" + assert_raises(NoMethodError) do + doc.add_namespace_definition("foo", "bar") + end end - assert_raises NoMethodError do - doc.add_namespace_definition("foo", "bar") + def test_parse_handles_nil_gracefully + @doc = Nokogiri::XML::Document.parse(nil) + assert_instance_of(Nokogiri::XML::Document, @doc) end - end - def test_parse_handles_nil_gracefully - @doc = Nokogiri::XML::Document.parse(nil) - assert_instance_of Nokogiri::XML::Document, @doc - end + def test_parse_takes_block + options = nil + Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) do |cfg| + options = cfg + end + assert(options) + end - def test_parse_takes_block - options = nil - Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) do |cfg| - options = cfg + def test_parse_yields_parse_options + options = nil + Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) do |cfg| + options = cfg + options.nonet.nowarning.dtdattr + end + assert(options.nonet?) + assert(options.nowarning?) + assert(options.dtdattr?) end - assert options - end - def test_parse_yields_parse_options - options = nil - Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) do |cfg| - options = cfg - options.nonet.nowarning.dtdattr + def test_XML_takes_block + options = nil + Nokogiri::XML(File.read(XML_FILE), XML_FILE) do |cfg| + options = cfg + options.nonet.nowarning.dtdattr + end + assert(options.nonet?) + assert(options.nowarning?) + assert(options.dtdattr?) end - assert options.nonet? - assert options.nowarning? - assert options.dtdattr? - end - def test_XML_takes_block - options = nil - Nokogiri::XML(File.read(XML_FILE), XML_FILE) do |cfg| - options = cfg - options.nonet.nowarning.dtdattr + def test_document_parse_method + xml = Nokogiri::XML::Document.parse(File.read(XML_FILE)) + # lame hack uses root to avoid comparing DOCTYPE tags which can appear out of order. + # I should really finish lorax and use that here. + assert_equal(xml.root.to_s, xml.root.to_s) end - assert options.nonet? - assert options.nowarning? - assert options.dtdattr? - end - def test_subclass - klass = Class.new(Nokogiri::XML::Document) - doc = klass.new - assert_instance_of klass, doc - end + def test_encoding= + xml.encoding = "UTF-8" + assert_match("UTF-8", xml.to_xml) - def test_subclass_initialize - klass = Class.new(Nokogiri::XML::Document) do - attr_accessor :initialized_with + xml.encoding = "EUC-JP" + assert_match("EUC-JP", xml.to_xml) + end - def initialize(*args) - @initialized_with = args + def test_namespace_should_not_exist + assert_raises(NoMethodError) do + xml.namespace end end - doc = klass.new("1.0", 1) - assert_equal ["1.0", 1], doc.initialized_with - end - - def test_subclass_dup - klass = Class.new(Nokogiri::XML::Document) - doc = klass.new.dup - assert_instance_of klass, doc - end - - def test_subclass_parse - klass = Class.new(Nokogiri::XML::Document) - doc = klass.parse(File.read(XML_FILE)) - # lame hack uses root to avoid comparing DOCTYPE tags which can appear out of order. - # I should really finish lorax and use that here. - assert_equal @xml.root.to_s, doc.root.to_s - assert_instance_of klass, doc - end - - def test_document_parse_method - xml = Nokogiri::XML::Document.parse(File.read(XML_FILE)) - # lame hack uses root to avoid comparing DOCTYPE tags which can appear out of order. - # I should really finish lorax and use that here. - assert_equal @xml.root.to_s, xml.root.to_s - end - def test_encoding= - @xml.encoding = "UTF-8" - assert_match "UTF-8", @xml.to_xml + def test_non_existant_function + # WTF. I don't know why this is different between MRI and Jruby + # They should be the same... Either way, raising an exception + # is the correct thing to do. + exception = RuntimeError - @xml.encoding = "EUC-JP" - assert_match "EUC-JP", @xml.to_xml - end + if !Nokogiri.uses_libxml? || (Nokogiri.uses_libxml? && Nokogiri::VERSION_INFO["libxml"]["platform"] == "jruby") + exception = Nokogiri::XML::XPath::SyntaxError + end - def test_namespace_should_not_exist - assert_raises(NoMethodError) { - @xml.namespace - } - end + assert_raises(exception) do + xml.xpath("//name[foo()]") + end + end - def test_non_existant_function - # WTF. I don't know why this is different between MRI and Jruby - # They should be the same... Either way, raising an exception - # is the correct thing to do. - exception = RuntimeError + def test_xpath_syntax_error + assert_raises(Nokogiri::XML::XPath::SyntaxError) do + xml.xpath('\\') + end + end - if !Nokogiri.uses_libxml? || (Nokogiri.uses_libxml? && Nokogiri::VERSION_INFO["libxml"]["platform"] == "jruby") - exception = Nokogiri::XML::XPath::SyntaxError + def test_ancestors + assert_equal(0, xml.ancestors.length) end - assert_raises(exception) { - @xml.xpath("//name[foo()]") - } - end + def test_root_node_parent_is_document + parent = xml.root.parent + assert_equal(xml, parent) + assert_instance_of(Nokogiri::XML::Document, parent) + end - def test_xpath_syntax_error - assert_raises(Nokogiri::XML::XPath::SyntaxError) do - @xml.xpath('\\') + def test_xmlns_is_automatically_registered + doc = Nokogiri::XML(<<~eoxml) + + + bar + + + eoxml + assert_equal(1, doc.css("xmlns|foo").length) + assert_equal(1, doc.css("foo").length) + assert_equal(0, doc.css("|foo").length) + assert_equal(1, doc.xpath("//xmlns:foo").length) + assert_equal(1, doc.search("xmlns|foo").length) + assert_equal(1, doc.search("//xmlns:foo").length) + assert(doc.at("xmlns|foo")) + assert(doc.at("//xmlns:foo")) + assert(doc.at("foo")) end - end - def test_ancestors - assert_equal 0, @xml.ancestors.length - end + def test_xmlns_is_registered_for_nodesets + doc = Nokogiri::XML(<<~eoxml) + + + + baz + + + + eoxml + assert_equal(1, doc.css("xmlns|foo").css("xmlns|bar").length) + assert_equal(1, doc.css("foo").css("bar").length) + assert_equal(1, doc.xpath("//xmlns:foo").xpath("./xmlns:bar").length) + assert_equal(1, doc.search("xmlns|foo").search("xmlns|bar").length) + assert_equal(1, doc.search("//xmlns:foo").search("./xmlns:bar").length) + end - def test_root_node_parent_is_document - parent = @xml.root.parent - assert_equal @xml, parent - assert_instance_of Nokogiri::XML::Document, parent - end + def test_to_xml_with_indent + doc = Nokogiri::XML("") + doc = Nokogiri::XML(doc.to_xml(indent: 5)) - def test_xmlns_is_automatically_registered - doc = Nokogiri::XML(<<-eoxml) - - - bar - - - eoxml - assert_equal 1, doc.css("xmlns|foo").length - assert_equal 1, doc.css("foo").length - assert_equal 0, doc.css("|foo").length - assert_equal 1, doc.xpath("//xmlns:foo").length - assert_equal 1, doc.search("xmlns|foo").length - assert_equal 1, doc.search("//xmlns:foo").length - assert doc.at("xmlns|foo") - assert doc.at("//xmlns:foo") - assert doc.at("foo") - end + assert_indent(5, doc) + end - def test_xmlns_is_registered_for_nodesets - doc = Nokogiri::XML(<<-eoxml) - - - - baz - - - - eoxml - assert_equal 1, doc.css("xmlns|foo").css("xmlns|bar").length - assert_equal 1, doc.css("foo").css("bar").length - assert_equal 1, doc.xpath("//xmlns:foo").xpath("./xmlns:bar").length - assert_equal 1, doc.search("xmlns|foo").search("xmlns|bar").length - assert_equal 1, doc.search("//xmlns:foo").search("./xmlns:bar").length - end + def test_write_xml_to_with_indent + io = StringIO.new + doc = Nokogiri::XML("") + doc.write_xml_to(io, indent: 5) + io.rewind + doc = Nokogiri::XML(io.read) + assert_indent(5, doc) + end - def test_to_xml_with_indent - doc = Nokogiri::XML("") - doc = Nokogiri::XML(doc.to_xml(:indent => 5)) + unless Nokogiri.uses_libxml?("~> 2.6.0") + def test_encoding + xml = Nokogiri::XML(File.read(XML_FILE), XML_FILE, "UTF-8") + assert_equal("UTF-8", xml.encoding) + end + end - assert_indent 5, doc - end + def test_memory_explosion_on_invalid_xml + doc = Nokogiri::XML("<<<") + refute_nil(doc) + refute_empty(doc.errors) + end - def test_write_xml_to_with_indent - io = StringIO.new - doc = Nokogiri::XML("") - doc.write_xml_to io, :indent => 5 - io.rewind - doc = Nokogiri::XML(io.read) - assert_indent 5, doc - end + def test_memory_explosion_on_wrong_formatted_element_following_the_root_element + doc = Nokogiri::XML("<\n") + refute_nil(doc) + refute_empty(doc.errors) + end - if ! Nokogiri.uses_libxml?("~> 2.6.0") - def test_encoding - xml = Nokogiri::XML(File.read(XML_FILE), XML_FILE, "UTF-8") - assert_equal "UTF-8", xml.encoding + def test_document_has_errors + doc = Nokogiri::XML(<<~eoxml) + + eoxml + assert(doc.errors.length > 0) + doc.errors.each do |error| + assert_match(error.message, error.inspect) + assert_match(error.message, error.to_s) + end end - end - def test_memory_explosion_on_invalid_xml - doc = Nokogiri::XML("<<<") - refute_nil doc - refute_empty doc.errors - end + def test_strict_document_throws_syntax_error + assert_raises(Nokogiri::XML::SyntaxError) do + Nokogiri::XML("", nil, nil, 0) + end - def test_memory_explosion_on_wrong_formatted_element_following_the_root_element - doc = Nokogiri::XML("<\n") - refute_nil doc - refute_empty doc.errors - end + assert_raises(Nokogiri::XML::SyntaxError) do + Nokogiri::XML("") do |cfg| + cfg.strict + end + end - def test_document_has_errors - doc = Nokogiri::XML(<<-eoxml) - - eoxml - assert doc.errors.length > 0 - doc.errors.each do |error| - assert_match error.message, error.inspect - assert_match error.message, error.to_s + assert_raises(Nokogiri::XML::SyntaxError) do + Nokogiri::XML(StringIO.new("")) do |cfg| + cfg.strict + end + end end - end - def test_strict_document_throws_syntax_error - assert_raises(Nokogiri::XML::SyntaxError) { - Nokogiri::XML("", nil, nil, 0) - } - - assert_raises(Nokogiri::XML::SyntaxError) { - Nokogiri::XML("") { |cfg| - cfg.strict - } - } - - assert_raises(Nokogiri::XML::SyntaxError) { - Nokogiri::XML(StringIO.new("")) { |cfg| - cfg.strict - } - } - end + def test_XML_function + xml = Nokogiri::XML(File.read(XML_FILE), XML_FILE) + assert(xml.xml?) + end - def test_XML_function - xml = Nokogiri::XML(File.read(XML_FILE), XML_FILE) - assert xml.xml? - end + def test_url + assert(xml.url) + assert_equal(XML_FILE, xml.url) + end - def test_url - assert @xml.url - assert_equal XML_FILE, @xml.url - end + def test_document_parent + xml = Nokogiri::XML(File.read(XML_FILE), XML_FILE) + assert_raises(NoMethodError) do + xml.parent + end + end - def test_document_parent - xml = Nokogiri::XML(File.read(XML_FILE), XML_FILE) - assert_raises(NoMethodError) { - xml.parent - } - end + def test_document_name + xml = Nokogiri::XML(File.read(XML_FILE), XML_FILE) + assert_equal("document", xml.name) + end - def test_document_name - xml = Nokogiri::XML(File.read(XML_FILE), XML_FILE) - assert_equal "document", xml.name - end + def test_parse_can_take_io + xml = nil + File.open(XML_FILE, "rb") do |f| + xml = Nokogiri::XML(f) + end + assert(xml.xml?) + assert_equal(XML_FILE, xml.url) + set = xml.search("//employee") + assert(set.length > 0) + end - def test_parse_can_take_io - xml = nil - File.open(XML_FILE, "rb") { |f| - xml = Nokogiri::XML(f) - } - assert xml.xml? - assert_equal XML_FILE, xml.url - set = xml.search("//employee") - assert set.length > 0 - end + def test_parsing_empty_io + doc = Nokogiri::XML.parse(StringIO.new("")) + refute_nil(doc) + end - def test_parsing_empty_io - doc = Nokogiri::XML.parse(StringIO.new("")) - refute_nil doc - end + def test_parse_works_with_an_object_that_responds_to_read + klass = Class.new do + def initialize + @contents = StringIO.new("
foo
") + end - def test_parse_works_with_an_object_that_responds_to_read - klass = Class.new do - def initialize - @contents = StringIO.new("
foo
") + def read(*args) + @contents.read(*args) + end end - def read(*args) - @contents.read(*args) - end + doc = Nokogiri::XML.parse(klass.new) + assert_equal("foo", doc.at_css("div").content) end - doc = Nokogiri::XML.parse klass.new - assert_equal "foo", doc.at_css("div").content - end - - def test_parse_works_with_an_object_that_responds_to_path - xml = String.new("hello") - def xml.path - "/i/should/be/the/document/url" - end + def test_parse_works_with_an_object_that_responds_to_path + xml = String.new("hello") + def xml.path + "/i/should/be/the/document/url" + end - doc = Nokogiri::XML.parse(xml) + doc = Nokogiri::XML.parse(xml) - assert_equal "/i/should/be/the/document/url", doc.url - end + assert_equal("/i/should/be/the/document/url", doc.url) + end - # issue #1821, #2110 - def test_parse_can_take_pathnames - assert(File.size(XML_ATOM_FILE) > 4096) # file must be big enough to trip the read callback more than once + # issue #1821, #2110 + def test_parse_can_take_pathnames + assert(File.size(XML_ATOM_FILE) > 4096) # file must be big enough to trip the read callback more than once - doc = Nokogiri::XML.parse(Pathname.new(XML_ATOM_FILE)) + doc = Nokogiri::XML.parse(Pathname.new(XML_ATOM_FILE)) - # an arbitrary assertion on the structure of the document - assert_equal 20, doc.xpath("/xmlns:feed/xmlns:entry/xmlns:author", - "xmlns" => "http://www.w3.org/2005/Atom").length - assert_equal XML_ATOM_FILE, doc.url - end + # an arbitrary assertion on the structure of the document + assert_equal(20, doc.xpath("/xmlns:feed/xmlns:entry/xmlns:author", + "xmlns" => "http://www.w3.org/2005/Atom").length) + assert_equal(XML_ATOM_FILE, doc.url) + end - def test_search_on_empty_documents - doc = Nokogiri::XML::Document.new - ns = doc.search("//foo") - assert_equal 0, ns.length + def test_search_on_empty_documents + doc = Nokogiri::XML::Document.new + ns = doc.search("//foo") + assert_equal(0, ns.length) - ns = doc.css("foo") - assert_equal 0, ns.length + ns = doc.css("foo") + assert_equal(0, ns.length) - ns = doc.xpath("//foo") - assert_equal 0, ns.length - end + ns = doc.xpath("//foo") + assert_equal(0, ns.length) + end - def test_document_search_with_multiple_queries - xml = ' + def test_document_search_with_multiple_queries + xml = '
important thing
@@ -767,305 +737,360 @@ def test_document_search_with_multiple_queries

more stuff

' - document = Nokogiri::XML(xml) - assert_kind_of Nokogiri::XML::Document, document + document = Nokogiri::XML(xml) + assert_kind_of(Nokogiri::XML::Document, document) - assert_equal 3, document.xpath(".//div", ".//p").length - assert_equal 3, document.css(".title", ".content", "p").length - assert_equal 3, document.search(".//div", "p.blah").length - end + assert_equal(3, document.xpath(".//div", ".//p").length) + assert_equal(3, document.css(".title", ".content", "p").length) + assert_equal(3, document.search(".//div", "p.blah").length) + end - def test_bad_xpath_raises_syntax_error - assert_raises(XML::XPath::SyntaxError) { - @xml.xpath('\\') - } - end + def test_bad_xpath_raises_syntax_error + assert_raises(XML::XPath::SyntaxError) do + xml.xpath('\\') + end + end - def test_find_with_namespace - doc = Nokogiri::XML.parse(<<-eoxml) - - snuggles! - - eoxml - - ctx = Nokogiri::XML::XPathContext.new(doc) - ctx.register_ns "tenderlove", "http://tenderlovemaking.com/" - set = ctx.evaluate("//tenderlove:foo") - assert_equal 1, set.length - assert_equal "foo", set.first.name - - # It looks like only the URI is important: - ctx = Nokogiri::XML::XPathContext.new(doc) - ctx.register_ns "america", "http://tenderlovemaking.com/" - set = ctx.evaluate("//america:foo") - assert_equal 1, set.length - assert_equal "foo", set.first.name - - # Its so important that a missing slash will cause it to return nothing - ctx = Nokogiri::XML::XPathContext.new(doc) - ctx.register_ns "america", "http://tenderlovemaking.com" - set = ctx.evaluate("//america:foo") - assert_equal 0, set.length - end + def test_find_with_namespace + doc = Nokogiri::XML.parse(<<~eoxml) + + snuggles! + + eoxml + + ctx = Nokogiri::XML::XPathContext.new(doc) + ctx.register_ns("tenderlove", "http://tenderlovemaking.com/") + set = ctx.evaluate("//tenderlove:foo") + assert_equal(1, set.length) + assert_equal("foo", set.first.name) + + # It looks like only the URI is important: + ctx = Nokogiri::XML::XPathContext.new(doc) + ctx.register_ns("america", "http://tenderlovemaking.com/") + set = ctx.evaluate("//america:foo") + assert_equal(1, set.length) + assert_equal("foo", set.first.name) + + # Its so important that a missing slash will cause it to return nothing + ctx = Nokogiri::XML::XPathContext.new(doc) + ctx.register_ns("america", "http://tenderlovemaking.com") + set = ctx.evaluate("//america:foo") + assert_equal(0, set.length) + end - def test_xml? - assert @xml.xml? - end + def test_xml? + assert(xml.xml?) + end - def test_document - assert @xml.document - end + def test_document + assert(xml.document) + end - def test_singleton_methods - assert node_set = @xml.search("//name") - assert node_set.length > 0 - node = node_set.first - def node.test - "test" + def test_singleton_methods + assert(node_set = xml.search("//name")) + assert(node_set.length > 0) + node = node_set.first + def node.test + "test" + end + assert(node_set = xml.search("//name")) + assert_equal("test", node_set.first.test) end - assert node_set = @xml.search("//name") - assert_equal "test", node_set.first.test - end - def test_multiple_search - assert node_set = @xml.search("//employee", "//name") - employees = @xml.search("//employee") - names = @xml.search("//name") - assert_equal(employees.length + names.length, node_set.length) - end + def test_multiple_search + assert(node_set = xml.search("//employee", "//name")) + employees = xml.search("//employee") + names = xml.search("//name") + assert_equal(employees.length + names.length, node_set.length) + end - def test_node_set_index - assert node_set = @xml.search("//employee") + def test_node_set_index + assert(node_set = xml.search("//employee")) - assert_equal(5, node_set.length) - assert node_set[4] - assert_nil node_set[5] - end + assert_equal(5, node_set.length) + assert(node_set[4]) + assert_nil(node_set[5]) + end - def test_search - assert node_set = @xml.search("//employee") + def test_search + assert(node_set = xml.search("//employee")) - assert_equal(5, node_set.length) + assert_equal(5, node_set.length) - node_set.each do |node| - assert_equal("employee", node.name) + node_set.each do |node| + assert_equal("employee", node.name) + end end - end - - def test_dump - assert @xml.serialize - assert @xml.to_xml - end - def test_dup - dup = @xml.dup - assert_instance_of Nokogiri::XML::Document, dup - assert dup.xml?, "duplicate should be xml" - end + def test_dump + assert(xml.serialize) + assert(xml.to_xml) + end - def test_new - doc = nil - doc = Nokogiri::XML::Document.new - assert doc - assert doc.xml? - assert_nil doc.root - end + def test_dup + dup = xml.dup + assert_instance_of(Nokogiri::XML::Document, dup) + assert(dup.xml?, "duplicate should be xml") + end - def test_set_root - doc = nil - doc = Nokogiri::XML::Document.new - assert doc - assert doc.xml? - assert_nil doc.root - node = Nokogiri::XML::Node.new("b", doc) { |n| - n.content = "hello world" - } - assert_equal("hello world", node.content) - doc.root = node - assert_equal(node, doc.root) - end + def test_new + doc = nil + doc = Nokogiri::XML::Document.new + assert(doc) + assert(doc.xml?) + assert_nil(doc.root) + end - def test_remove_namespaces - doc = Nokogiri::XML <<-EOX - - hello from a - hello from b - - hello from c - - - EOX - - namespaces = doc.root.namespaces - - # assert on setup - assert_equal 2, doc.root.namespaces.length - assert_equal 3, doc.at_xpath("//container").namespaces.length - assert_equal 0, doc.xpath("//foo").length - assert_equal 1, doc.xpath("//a:foo").length - assert_equal 1, doc.xpath("//a:foo").length - assert_equal 1, doc.xpath("//x:foo", "x" => "http://c.flavorjon.es/").length - assert_match %r{foo c:attr}, doc.to_xml - doc.at_xpath("//x:foo", "x" => "http://c.flavorjon.es/").tap do |node| - assert_nil node["attr"] - assert_equal "attr-value", node["c:attr"] - assert_nil node.attribute_with_ns("attr", nil) - assert_equal "attr-value", node.attribute_with_ns("attr", "http://c.flavorjon.es/").value - assert_equal "attr-value", node.attributes["attr"].value - end - - doc.remove_namespaces! - - assert_equal 0, doc.root.namespaces.length - assert_equal 0, doc.at_xpath("//container").namespaces.length - assert_equal 3, doc.xpath("//foo").length - assert_equal 0, doc.xpath("//a:foo", namespaces).length - assert_equal 0, doc.xpath("//a:foo", namespaces).length - assert_equal 0, doc.xpath("//x:foo", "x" => "http://c.flavorjon.es/").length - assert_match %r{foo attr}, doc.to_xml - doc.at_xpath("//container/foo").tap do |node| - assert_equal "attr-value", node["attr"] - assert_nil node["c:attr"] - assert_equal "attr-value", node.attribute_with_ns("attr", nil).value - assert_nil node.attribute_with_ns("attr", "http://c.flavorjon.es/") - assert_equal "attr-value", node.attributes["attr"].value # doesn't change! + def test_set_root + doc = nil + doc = Nokogiri::XML::Document.new + assert(doc) + assert(doc.xml?) + assert_nil(doc.root) + node = Nokogiri::XML::Node.new("b", doc) do |n| + n.content = "hello world" + end + assert_equal("hello world", node.content) + doc.root = node + assert_equal(node, doc.root) end - end - # issue #785 - def test_attribute_decoration - decorator = Module.new do - def test_method + def test_remove_namespaces + doc = Nokogiri::XML(<<~EOX) + + hello from a + hello from b + + hello from c + + + EOX + + namespaces = doc.root.namespaces + + # assert on setup + assert_equal(2, doc.root.namespaces.length) + assert_equal(3, doc.at_xpath("//container").namespaces.length) + assert_equal(0, doc.xpath("//foo").length) + assert_equal(1, doc.xpath("//a:foo").length) + assert_equal(1, doc.xpath("//a:foo").length) + assert_equal(1, doc.xpath("//x:foo", "x" => "http://c.flavorjon.es/").length) + assert_match(/foo c:attr/, doc.to_xml) + doc.at_xpath("//x:foo", "x" => "http://c.flavorjon.es/").tap do |node| + assert_nil(node["attr"]) + assert_equal("attr-value", node["c:attr"]) + assert_nil(node.attribute_with_ns("attr", nil)) + assert_equal("attr-value", node.attribute_with_ns("attr", "http://c.flavorjon.es/").value) + assert_equal("attr-value", node.attributes["attr"].value) + end + + doc.remove_namespaces! + + assert_equal(0, doc.root.namespaces.length) + assert_equal(0, doc.at_xpath("//container").namespaces.length) + assert_equal(3, doc.xpath("//foo").length) + assert_equal(0, doc.xpath("//a:foo", namespaces).length) + assert_equal(0, doc.xpath("//a:foo", namespaces).length) + assert_equal(0, doc.xpath("//x:foo", "x" => "http://c.flavorjon.es/").length) + assert_match(/foo attr/, doc.to_xml) + doc.at_xpath("//container/foo").tap do |node| + assert_equal("attr-value", node["attr"]) + assert_nil(node["c:attr"]) + assert_equal("attr-value", node.attribute_with_ns("attr", nil).value) + assert_nil(node.attribute_with_ns("attr", "http://c.flavorjon.es/")) + assert_equal("attr-value", node.attributes["attr"].value) # doesn't change! end end - util_decorate(@xml, decorator) + # issue #785 + def test_attribute_decoration + decorator = Module.new do + def test_method + end + end - assert @xml.search("//@street").first.respond_to?(:test_method) - end + util_decorate(xml, decorator) - def test_subset_is_decorated - x = Module.new do - def awesome! + assert(xml.search("//@street").first.respond_to?(:test_method)) + end + + def test_subset_is_decorated + x = Module.new do + def awesome! + end end + util_decorate(xml, x) + + assert(xml.respond_to?(:awesome!)) + assert(node_set = xml.search("//staff")) + assert(node_set.respond_to?(:awesome!)) + assert(subset = node_set.search(".//employee")) + assert(subset.respond_to?(:awesome!)) + assert(sub_subset = node_set.search(".//name")) + assert(sub_subset.respond_to?(:awesome!)) end - util_decorate(@xml, x) - assert @xml.respond_to?(:awesome!) - assert node_set = @xml.search("//staff") - assert node_set.respond_to?(:awesome!) - assert subset = node_set.search(".//employee") - assert subset.respond_to?(:awesome!) - assert sub_subset = node_set.search(".//name") - assert sub_subset.respond_to?(:awesome!) - end + def test_decorator_is_applied + x = Module.new do + def awesome! + end + end + util_decorate(xml, x) - def test_decorator_is_applied - x = Module.new do - def awesome! + assert(xml.respond_to?(:awesome!)) + assert(node_set = xml.search("//employee")) + assert(node_set.respond_to?(:awesome!)) + node_set.each do |node| + assert(node.respond_to?(:awesome!), node.class) end + assert(xml.root.respond_to?(:awesome!)) + assert(xml.children.respond_to?(:awesome!)) end - util_decorate(@xml, x) - assert @xml.respond_to?(:awesome!) - assert node_set = @xml.search("//employee") - assert node_set.respond_to?(:awesome!) - node_set.each do |node| - assert node.respond_to?(:awesome!), node.class + if Nokogiri.jruby? + def wrap_java_document + require "java" + factory = javax.xml.parsers.DocumentBuilderFactory.newInstance + builder = factory.newDocumentBuilder + document = builder.newDocument + root = document.createElement("foo") + document.appendChild(root) + Nokogiri::XML::Document.wrap(document) + end end - assert @xml.root.respond_to?(:awesome!) - assert @xml.children.respond_to?(:awesome!) - end - if Nokogiri.jruby? - def wrap_java_document - require "java" - factory = javax.xml.parsers.DocumentBuilderFactory.newInstance - builder = factory.newDocumentBuilder - document = builder.newDocument - root = document.createElement("foo") - document.appendChild(root) - Nokogiri::XML::Document.wrap(document) + def test_java_integration + skip("CRuby doesn't have the Document#wrap method") unless Nokogiri.jruby? + noko_doc = wrap_java_document + assert_equal("foo", noko_doc.root.name) + + noko_doc = Nokogiri::XML(<<~eoxml) + + + + eoxml + dom = noko_doc.to_java + assert(dom.is_a?(org.w3c.dom.Document)) + assert_equal("foo", dom.getDocumentElement.getTagName) end - end - def test_java_integration - skip("CRuby doesn't have the Document#wrap method") unless Nokogiri.jruby? - noko_doc = wrap_java_document - assert_equal "foo", noko_doc.root.name - - noko_doc = Nokogiri::XML(< - - -eoxml - dom = noko_doc.to_java - assert dom.kind_of? org.w3c.dom.Document - assert_equal "foo", dom.getDocumentElement().getTagName() - end + def test_add_child + skip("CRuby doesn't have the Document#wrap method") unless Nokogiri.jruby? + doc = wrap_java_document + doc.root.add_child("") + end - def test_add_child - skip("CRuby doesn't have the Document#wrap method") unless Nokogiri.jruby? - doc = wrap_java_document - doc.root.add_child "" - end + def test_can_be_closed + f = File.open(XML_FILE) + Nokogiri::XML(f) + f.close + end - def test_can_be_closed - f = File.open XML_FILE - Nokogiri::XML f - f.close - end + describe "XML::Document.parse" do + # establish baseline behavior for HTML document behavior in + # https://github.com/sparklemotion/nokogiri/issues/2130 + # (see similar tests in test/html/test_document.rb) + let(:xml_strict) do + Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::DEFAULT_XML).norecover + end - describe "XML::Document.parse" do - # establish baseline behavior for HTML document behavior in - # https://github.com/sparklemotion/nokogiri/issues/2130 - # (see similar tests in test/html/test_document.rb) - let(:xml_strict) do - Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::DEFAULT_XML).norecover - end + it "sets the test up correctly" do + assert(xml_strict.strict?) + end - it "sets the test up correctly" do - assert(xml_strict.strict?) - end + describe "read memory" do + let(:input) { ", 0) end end end - describe "default options" do - it "does not raise exception on parse error" do - doc = Nokogiri::XML.parse(input) - assert_operator(doc.errors.length, :>, 0) + describe "read io" do + let(:input) { StringIO.new(", 0) + end end end end - describe "read io" do - let(:input) { StringIO.new(", 0) + describe ".new" do + it "returns an instance of the expected class" do + doc = klass.new + assert_instance_of(klass, doc) + end + + it "calls #initialize exactly once" do + doc = klass.new + assert_equal(1, doc.initialized_count) + end + + it "passes arguments to #initialize" do + doc = klass.new("1.0", 1) + assert_equal ["1.0", 1], doc.initialized_with + end + end + + it "#dup returns the expected class" do + doc = klass.new.dup + assert_instance_of(klass, doc) + end + + describe ".parse" do + it "returns an instance of the expected class" do + doc = klass.parse(File.read(XML_FILE)) + assert_instance_of(klass, doc) + end + + it "calls #initialize exactly once" do + doc = klass.parse(File.read(XML_FILE)) + assert_equal(1, doc.initialized_count) + end + + it "parses the doc" do + doc = klass.parse(File.read(XML_FILE)) + assert_equal xml.root.to_s, doc.root.to_s end end end diff --git a/test/xml/test_document_fragment.rb b/test/xml/test_document_fragment.rb index a3cfebdbdf..0c6468eda1 100644 --- a/test/xml/test_document_fragment.rb +++ b/test/xml/test_document_fragment.rb @@ -1,176 +1,163 @@ +# frozen_string_literal: true require "helper" module Nokogiri module XML class TestDocumentFragment < Nokogiri::TestCase - def setup - super - @xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) - end - - def test_replace_text_node - html = "foo" - doc = Nokogiri::XML::DocumentFragment.parse(html) - doc.children[0].replace "bar" - assert_equal 'bar', doc.children[0].content - end - - def test_fragment_is_relative - doc = Nokogiri::XML('') - ctx = doc.root.child - fragment = Nokogiri::XML::DocumentFragment.new(doc, '', ctx) - hello = fragment.child - - assert_equal 'hello', hello.name - assert_equal doc.root.child.namespace, hello.namespace - end - - def test_node_fragment_is_relative - doc = Nokogiri::XML('') - assert doc.root.child - fragment = doc.root.child.fragment('') - hello = fragment.child + describe Nokogiri::XML::DocumentFragment do + let(:xml) { Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) } + + def test_replace_text_node + html = "foo" + doc = Nokogiri::XML::DocumentFragment.parse(html) + doc.children[0].replace("bar") + assert_equal('bar', doc.children[0].content) + end - assert_equal 'hello', hello.name - assert_equal doc.root.child.namespace, hello.namespace - end + def test_fragment_is_relative + doc = Nokogiri::XML('') + ctx = doc.root.child + fragment = Nokogiri::XML::DocumentFragment.new(doc, '', ctx) + hello = fragment.child - def test_new - assert Nokogiri::XML::DocumentFragment.new(@xml) - end + assert_equal('hello', hello.name) + assert_equal(doc.root.child.namespace, hello.namespace) + end - def test_fragment_should_have_document - fragment = Nokogiri::XML::DocumentFragment.new(@xml) - assert_equal @xml, fragment.document - end + def test_node_fragment_is_relative + doc = Nokogiri::XML('') + assert(doc.root.child) + fragment = doc.root.child.fragment('') + hello = fragment.child - def test_name - fragment = Nokogiri::XML::DocumentFragment.new(@xml) - assert_equal '#document-fragment', fragment.name - end + assert_equal('hello', hello.name) + assert_equal(doc.root.child.namespace, hello.namespace) + end - def test_static_method - fragment = Nokogiri::XML::DocumentFragment.parse("
a
") - assert_instance_of Nokogiri::XML::DocumentFragment, fragment - end + def test_new + assert(Nokogiri::XML::DocumentFragment.new(xml)) + end - def test_static_method_with_namespaces - # follows different path in FragmentHandler#start_element which blew up after 597195ff - fragment = Nokogiri::XML::DocumentFragment.parse("a") - assert_instance_of Nokogiri::XML::DocumentFragment, fragment - end + def test_fragment_should_have_document + fragment = Nokogiri::XML::DocumentFragment.new(xml) + assert_equal(xml, fragment.document) + end - def test_many_fragments - 100.times { Nokogiri::XML::DocumentFragment.new(@xml) } - end + def test_name + fragment = Nokogiri::XML::DocumentFragment.new(xml) + assert_equal('#document-fragment', fragment.name) + end - def test_subclass - klass = Class.new(Nokogiri::XML::DocumentFragment) - fragment = klass.new(@xml, "
a
") - assert_instance_of klass, fragment - end + def test_static_method + fragment = Nokogiri::XML::DocumentFragment.parse("
a
") + assert_instance_of(Nokogiri::XML::DocumentFragment, fragment) + end - def test_subclass_parse - klass = Class.new(Nokogiri::XML::DocumentFragment) - doc = klass.parse("
a
") - assert_instance_of klass, doc - end + def test_static_method_with_namespaces + # follows different path in FragmentHandler#start_element which blew up after 597195ff + fragment = Nokogiri::XML::DocumentFragment.parse("a") + assert_instance_of(Nokogiri::XML::DocumentFragment, fragment) + end - def test_unparented_text_node_parse - fragment = Nokogiri::XML::DocumentFragment.parse("foo") - fragment.children.after("") - end + def test_many_fragments + 100.times { Nokogiri::XML::DocumentFragment.new(xml) } + end - def test_xml_fragment - fragment = Nokogiri::XML.fragment("
a
") - assert_equal "
a
", fragment.to_s - end + def test_unparented_text_node_parse + fragment = Nokogiri::XML::DocumentFragment.parse("foo") + fragment.children.after("") + end - def test_xml_fragment_has_multiple_toplevel_children - doc = "
b
e
" - fragment = Nokogiri::XML::Document.new.fragment(doc) - assert_equal "
b
e
", fragment.to_s - end + def test_xml_fragment + fragment = Nokogiri::XML.fragment("
a
") + assert_equal("
a
", fragment.to_s) + end - def test_xml_fragment_has_outer_text - # this test is descriptive, not prescriptive. - doc = "a
b
" - fragment = Nokogiri::XML::Document.new.fragment(doc) - assert_equal "a
b
", fragment.to_s + def test_xml_fragment_has_multiple_toplevel_children + doc = "
b
e
" + fragment = Nokogiri::XML::Document.new.fragment(doc) + assert_equal("
b
e
", fragment.to_s) + end - doc = "
b
c" - fragment = Nokogiri::XML::Document.new.fragment(doc) - assert_equal "
b
c", fragment.to_s - end + def test_xml_fragment_has_outer_text + # this test is descriptive, not prescriptive. + doc = "a
b
" + fragment = Nokogiri::XML::Document.new.fragment(doc) + assert_equal("a
b
", fragment.to_s) - def test_xml_fragment_case_sensitivity - doc = "b" - fragment = Nokogiri::XML::Document.new.fragment(doc) - assert_equal "b", fragment.to_s - end + doc = "
b
c" + fragment = Nokogiri::XML::Document.new.fragment(doc) + assert_equal("
b
c", fragment.to_s) + end - def test_xml_fragment_with_leading_whitespace - doc = "
b
" - fragment = Nokogiri::XML::Document.new.fragment(doc) - assert_equal "
b
", fragment.to_s - end + def test_xml_fragment_case_sensitivity + doc = "b" + fragment = Nokogiri::XML::Document.new.fragment(doc) + assert_equal("b", fragment.to_s) + end - def test_xml_fragment_with_leading_whitespace_and_newline - doc = " \n
b
" - fragment = Nokogiri::XML::Document.new.fragment(doc) - assert_equal " \n
b
", fragment.to_s - end + def test_xml_fragment_with_leading_whitespace + doc = "
b
" + fragment = Nokogiri::XML::Document.new.fragment(doc) + assert_equal("
b
", fragment.to_s) + end - def test_fragment_children_search - fragment = Nokogiri::XML::Document.new.fragment( - '

hi

' - ) - expected = fragment.children.xpath('.//p') - assert_equal 1, expected.length - - css = fragment.children.css('p') - search_css = fragment.children.search('p') - search_xpath = fragment.children.search('.//p') - assert_equal expected, css - assert_equal expected, search_css - assert_equal expected, search_xpath - end + def test_xml_fragment_with_leading_whitespace_and_newline + doc = " \n
b
" + fragment = Nokogiri::XML::Document.new.fragment(doc) + assert_equal(" \n
b
", fragment.to_s) + end - def test_fragment_css_search_with_whitespace_and_node_removal - # The same xml without leading whitespace in front of the first line - # does not expose the error. Putting both nodes on the same line - # instead also fixes the crash. - fragment = Nokogiri::XML::DocumentFragment.parse <<-EOXML -

hi

x

another paragraph

- EOXML - children = fragment.css('p') - assert_equal 2, children.length - # removing the last node instead does not yield the error. Probably the - # node removal leaves around two consecutive text nodes which make the - # css search crash? - children.first.remove - assert_equal 1, fragment.xpath('.//p | self::p').length - assert_equal 1, fragment.css('p').length - end + def test_fragment_children_search + fragment = Nokogiri::XML::Document.new.fragment( + '

hi

' + ) + expected = fragment.children.xpath('.//p') + assert_equal(1, expected.length) + + css = fragment.children.css('p') + search_css = fragment.children.search('p') + search_xpath = fragment.children.search('.//p') + assert_equal(expected, css) + assert_equal(expected, search_css) + assert_equal(expected, search_xpath) + end - def test_fragment_search_three_ways - frag = Nokogiri::XML::Document.new.fragment '

foo

bar

' - expected = frag.xpath('./*[@id = "content"]') - assert_equal 2, expected.length + def test_fragment_css_search_with_whitespace_and_node_removal + # The same xml without leading whitespace in front of the first line + # does not expose the error. Putting both nodes on the same line + # instead also fixes the crash. + fragment = Nokogiri::XML::DocumentFragment.parse(<<~EOXML) +

hi

x

another paragraph

+ EOXML + children = fragment.css('p') + assert_equal(2, children.length) + # removing the last node instead does not yield the error. Probably the + # node removal leaves around two consecutive text nodes which make the + # css search crash? + children.first.remove + assert_equal(1, fragment.xpath('.//p | self::p').length) + assert_equal(1, fragment.css('p').length) + end - [ - [:css, '#content'], - [:search, '#content'], - [:search, './*[@id = \'content\']'], - ].each do |method, query| - result = frag.send(method, query) - assert_equal(expected, result, - "fragment search with :#{method} using '#{query}' expected '#{expected}' got '#{result}'") + def test_fragment_search_three_ways + frag = Nokogiri::XML::Document.new.fragment('

foo

bar

') + expected = frag.xpath('./*[@id = "content"]') + assert_equal(2, expected.length) + + [ + [:css, '#content'], + [:search, '#content'], + [:search, './*[@id = \'content\']'], + ].each do |method, query| + result = frag.send(method, query) + assert_equal(expected, result, + "fragment search with :#{method} using '#{query}' expected '#{expected}' got '#{result}'") + end end - end - def test_fragment_search_with_multiple_queries - xml = ' + def test_fragment_search_with_multiple_queries + xml = '
important thing
@@ -179,148 +166,202 @@ def test_fragment_search_with_multiple_queries

more stuff

' - fragment = Nokogiri::XML.fragment(xml) - assert_kind_of Nokogiri::XML::DocumentFragment, fragment + fragment = Nokogiri::XML.fragment(xml) + assert_kind_of(Nokogiri::XML::DocumentFragment, fragment) - assert_equal 3, fragment.xpath('.//div', './/p').length - assert_equal 3, fragment.css('.title', '.content', 'p').length - assert_equal 3, fragment.search('.//div', 'p.blah').length - end + assert_equal(3, fragment.xpath('.//div', './/p').length) + assert_equal(3, fragment.css('.title', '.content', 'p').length) + assert_equal(3, fragment.search('.//div', 'p.blah').length) + end - def test_fragment_without_a_namespace_does_not_get_a_namespace - doc = Nokogiri::XML <<-EOX - - - - EOX - frag = doc.fragment "" - assert_nil frag.namespace - end + def test_fragment_without_a_namespace_does_not_get_a_namespace + doc = Nokogiri::XML(<<~EOX) + + + + EOX + frag = doc.fragment("") + assert_nil(frag.namespace) + end - def test_fragment_namespace_resolves_against_document_root - doc = Nokogiri::XML <<-EOX - - - - EOX - ns = doc.root.namespace_definitions.detect { |x| x.prefix == "bar" } - - frag = doc.fragment "" - assert frag.children.first.namespace - assert_equal ns, frag.children.first.namespace - end + def test_fragment_namespace_resolves_against_document_root + doc = Nokogiri::XML(<<~EOX) + + + + EOX + ns = doc.root.namespace_definitions.detect { |x| x.prefix == "bar" } + + frag = doc.fragment("") + assert(frag.children.first.namespace) + assert_equal(ns, frag.children.first.namespace) + end - def test_fragment_invalid_namespace_is_silently_ignored - doc = Nokogiri::XML <<-EOX - - - - EOX - frag = doc.fragment "" - assert_nil frag.children.first.namespace - end + def test_fragment_invalid_namespace_is_silently_ignored + doc = Nokogiri::XML(<<~EOX) + + + + EOX + frag = doc.fragment("") + assert_nil(frag.children.first.namespace) + end - def test_decorator_is_applied - x = Module.new do - def awesome! + def test_decorator_is_applied + x = Module.new do + def awesome! + end end - end - util_decorate(@xml, x) - fragment = Nokogiri::XML::DocumentFragment.new(@xml, "
a
b
") + util_decorate(xml, x) + fragment = Nokogiri::XML::DocumentFragment.new(xml, "
a
b
") - assert node_set = fragment.css('div') - assert node_set.respond_to?(:awesome!) - node_set.each do |node| - assert node.respond_to?(:awesome!), node.class + assert(node_set = fragment.css('div')) + assert(node_set.respond_to?(:awesome!)) + node_set.each do |node| + assert(node.respond_to?(:awesome!), node.class) + end + assert(fragment.children.respond_to?(:awesome!), fragment.children.class) end - assert fragment.children.respond_to?(:awesome!), fragment.children.class - end - def test_decorator_is_applied_to_empty_set - x = Module.new do - def awesome! + def test_decorator_is_applied_to_empty_set + x = Module.new do + def awesome! + end end + util_decorate(xml, x) + fragment = Nokogiri::XML::DocumentFragment.new(xml, "") + assert(fragment.children.respond_to?(:awesome!), fragment.children.class) end - util_decorate(@xml, x) - fragment = Nokogiri::XML::DocumentFragment.new(@xml, "") - assert fragment.children.respond_to?(:awesome!), fragment.children.class - end - def test_add_node_to_doc_fragment_segfault - frag = Nokogiri::XML::DocumentFragment.new(@xml, '

hello world

') - Nokogiri::XML::Comment.new(frag,'moo') - end + def test_add_node_to_doc_fragment_segfault + frag = Nokogiri::XML::DocumentFragment.new(xml, '

hello world

') + Nokogiri::XML::Comment.new(frag, 'moo') + end - def test_issue_1077_parsing_of_frozen_strings - input = <<-EOS - - - - -EOS - input.freeze + def test_issue_1077_parsing_of_frozen_strings + input = <<~EOS + + + + + EOS + input.freeze - Nokogiri::XML::DocumentFragment.parse(input) # assert_nothing_raised - end + Nokogiri::XML::DocumentFragment.parse(input) # assert_nothing_raised + end + + if Nokogiri.uses_libxml? + def test_dup_should_exist_in_a_new_document + # https://github.com/sparklemotion/nokogiri/issues/1063 + original = Nokogiri::XML::DocumentFragment.parse("

hello

") + duplicate = original.dup + assert_not_equal(original.document, duplicate.document) + end + end - if Nokogiri.uses_libxml? - def test_dup_should_exist_in_a_new_document - # https://github.com/sparklemotion/nokogiri/issues/1063 + def test_dup_should_create_an_xml_document_fragment + # https://github.com/sparklemotion/nokogiri/issues/1846 original = Nokogiri::XML::DocumentFragment.parse("

hello

") duplicate = original.dup - assert_not_equal original.document, duplicate.document + assert_instance_of(Nokogiri::XML::DocumentFragment, duplicate) end - end - - def test_dup_should_create_an_xml_document_fragment - # https://github.com/sparklemotion/nokogiri/issues/1846 - original = Nokogiri::XML::DocumentFragment.parse("

hello

") - duplicate = original.dup - assert_instance_of Nokogiri::XML::DocumentFragment, duplicate - end - def test_dup_creates_tree_with_identical_structure - original = Nokogiri::XML::DocumentFragment.parse("

hello

") - duplicate = original.dup - assert_equal original.to_html, duplicate.to_html - end + def test_dup_creates_tree_with_identical_structure + original = Nokogiri::XML::DocumentFragment.parse("

hello

") + duplicate = original.dup + assert_equal(original.to_html, duplicate.to_html) + end - def test_dup_creates_mutable_tree - original = Nokogiri::XML::DocumentFragment.parse("

hello

") - duplicate = original.dup - duplicate.at_css("div").add_child("hello there") - assert_nil original.at_css("b") - assert_not_nil duplicate.at_css("b") - end + def test_dup_creates_mutable_tree + original = Nokogiri::XML::DocumentFragment.parse("

hello

") + duplicate = original.dup + duplicate.at_css("div").add_child("hello there") + assert_nil(original.at_css("b")) + assert_not_nil(duplicate.at_css("b")) + end - if Nokogiri.uses_libxml? - def test_for_libxml_in_context_fragment_parsing_bug_workaround - 10.times do - begin + if Nokogiri.uses_libxml? + def test_for_libxml_in_context_fragment_parsing_bug_workaround + 10.times do fragment = Nokogiri::XML.fragment("
") parent = fragment.children.first child = parent.parse("

").first - parent.add_child child + parent.add_child(child) + + GC.start end - GC.start + end + + def test_for_libxml_in_context_memory_badness_when_encountering_encoding_errors + # see issue #643 for background + # this test exists solely to raise an error during valgrind test runs. + html = <<~EOHTML + + + + + +
Foo
+ + + EOHTML + doc = Nokogiri::HTML(html) + doc.at_css("div").replace("Bar") end end - def test_for_libxml_in_context_memory_badness_when_encountering_encoding_errors - # see issue #643 for background - # this test exists solely to raise an error during valgrind test runs. - html = <<-EOHTML - - - - - -
Foo
- - -EOHTML - doc = Nokogiri::HTML html - doc.at_css("div").replace("Bar") + describe "subclassing" do + let(:klass) do + Class.new(Nokogiri::XML::DocumentFragment) do + attr_accessor :initialized_with, :initialized_count + + def initialize(*args) + super + @initialized_with = args + @initialized_count ||= 0 + @initialized_count += 1 + end + end + end + + describe ".new" do + it "returns an instance of the right class" do + fragment = klass.new(xml, "
a
") + assert_instance_of(klass, fragment) + end + + it "calls #initialize exactly once" do + fragment = klass.new(xml, "
a
") + assert_equal(1, fragment.initialized_count) + end + + it "passes args to #initialize" do + fragment = klass.new(xml, "
a
") + assert_equal([xml, "
a
"], fragment.initialized_with) + end + end + + it "#dup returns the expected class" do + doc = klass.new(xml, "
a
").dup + assert_instance_of(klass, doc) + end + + describe ".parse" do + it "returns an instance of the right class" do + fragment = klass.parse("
a
") + assert_instance_of(klass, fragment) + end + + it "calls #initialize exactly once" do + fragment = klass.parse("
a
") + assert_equal(1, fragment.initialized_count) + end + + it "passes the fragment" do + fragment = klass.parse("
a
") + assert_equal(Nokogiri::XML::DocumentFragment.parse("
a
").to_s, fragment.to_s) + end + end end end end From e5646e813a5aeb2bbdbcebb807643ef84bf9af70 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Fri, 15 Jan 2021 08:04:27 -0500 Subject: [PATCH 2/5] fix(jruby): call Document#initialize when creating via .parse This was noticed while debugging some Loofah behavior that relied on overriding `#initialize` to decorate nodes. Related to https://github.com/flavorjones/loofah/issues/88 --- ext/java/nokogiri/internals/HtmlDomParserContext.java | 3 +++ ext/java/nokogiri/internals/XmlDomParserContext.java | 2 ++ 2 files changed, 5 insertions(+) diff --git a/ext/java/nokogiri/internals/HtmlDomParserContext.java b/ext/java/nokogiri/internals/HtmlDomParserContext.java index 71a2bacdef..69c1d47d66 100644 --- a/ext/java/nokogiri/internals/HtmlDomParserContext.java +++ b/ext/java/nokogiri/internals/HtmlDomParserContext.java @@ -52,6 +52,7 @@ import org.jruby.Ruby; import org.jruby.RubyClass; import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.Helpers; import org.jruby.runtime.builtin.IRubyObject; import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; @@ -133,6 +134,8 @@ public XmlDocument parse(ThreadContext context, RubyClass klass, IRubyObject url protected XmlDocument wrapDocument(ThreadContext context, RubyClass klass, Document document) { HtmlDocument htmlDocument = new HtmlDocument(context.runtime, klass, document); htmlDocument.setDocumentNode(context.runtime, document); + Helpers.invoke(context, htmlDocument, "initialize"); + if (ruby_encoding.isNil()) { // ruby_encoding might have detected by HtmlDocument::EncodingReader if (detected_encoding != null && !detected_encoding.isNil()) { diff --git a/ext/java/nokogiri/internals/XmlDomParserContext.java b/ext/java/nokogiri/internals/XmlDomParserContext.java index d400faae0c..d993047fc7 100644 --- a/ext/java/nokogiri/internals/XmlDomParserContext.java +++ b/ext/java/nokogiri/internals/XmlDomParserContext.java @@ -46,6 +46,7 @@ import org.jruby.RubyFixnum; import org.jruby.exceptions.RaiseException; import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.Helpers; import org.jruby.runtime.builtin.IRubyObject; import org.w3c.dom.Document; import org.w3c.dom.Node; @@ -213,6 +214,7 @@ private XmlDocument getInterruptedOrNewXmlDocument(ThreadContext context, RubyCl */ protected XmlDocument wrapDocument(ThreadContext context, RubyClass klass, Document doc) { XmlDocument xmlDocument = new XmlDocument(context.runtime, klass, doc); + Helpers.invoke(context, xmlDocument, "initialize"); xmlDocument.setEncoding(ruby_encoding); if (options.dtdLoad) { From 858797219259063bdca75e0330ef4f0726b7f40d Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sun, 17 Jan 2021 11:47:46 -0500 Subject: [PATCH 3/5] cleanup: rename internal method to nokogiri_xml_document_wrap This is a prefactor for the upcoming commit. --- ext/nokogiri/html_document.c | 6 +++--- ext/nokogiri/xml_document.c | 10 +++++----- ext/nokogiri/xml_document.h | 2 +- ext/nokogiri/xslt_stylesheet.c | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ext/nokogiri/html_document.c b/ext/nokogiri/html_document.c index 16fdb4d775..8c5d39c6bf 100644 --- a/ext/nokogiri/html_document.c +++ b/ext/nokogiri/html_document.c @@ -23,7 +23,7 @@ rb_html_document_s_new(int argc, VALUE *argv, VALUE klass) RTEST(uri) ? (const xmlChar *)StringValueCStr(uri) : NULL, RTEST(external_id) ? (const xmlChar *)StringValueCStr(external_id) : NULL ); - rb_doc = Nokogiri_wrap_xml_document(klass, doc); + rb_doc = nokogiri_xml_document_wrap(klass, doc); rb_obj_call_init(rb_doc, argc, argv); return rb_doc ; } @@ -81,7 +81,7 @@ rb_html_document_s_read_io(VALUE klass, VALUE rb_io, VALUE rb_url, VALUE rb_enco return Qnil; } - rb_doc = Nokogiri_wrap_xml_document(klass, c_doc); + rb_doc = nokogiri_xml_document_wrap(klass, c_doc); rb_iv_set(rb_doc, "@errors", rb_error_list); return rb_doc; } @@ -129,7 +129,7 @@ rb_html_document_s_read_memory(VALUE klass, VALUE rb_html, VALUE rb_url, VALUE r return Qnil; } - rb_doc = Nokogiri_wrap_xml_document(klass, c_doc); + rb_doc = nokogiri_xml_document_wrap(klass, c_doc); rb_iv_set(rb_doc, "@errors", rb_error_list); return rb_doc; } diff --git a/ext/nokogiri/xml_document.c b/ext/nokogiri/xml_document.c index 6c010b5438..cc6276db52 100644 --- a/ext/nokogiri/xml_document.c +++ b/ext/nokogiri/xml_document.c @@ -278,7 +278,7 @@ static VALUE read_io( VALUE klass, return Qnil; } - document = Nokogiri_wrap_xml_document(klass, doc); + document = nokogiri_xml_document_wrap(klass, doc); rb_iv_set(document, "@errors", error_list); return document; } @@ -322,7 +322,7 @@ static VALUE read_memory( VALUE klass, return Qnil; } - document = Nokogiri_wrap_xml_document(klass, doc); + document = nokogiri_xml_document_wrap(klass, doc); rb_iv_set(document, "@errors", error_list); return document; } @@ -351,7 +351,7 @@ static VALUE duplicate_document(int argc, VALUE *argv, VALUE self) if(dup == NULL) return Qnil; dup->type = doc->type; - copy = Nokogiri_wrap_xml_document(rb_obj_class(self), dup); + copy = nokogiri_xml_document_wrap(rb_obj_class(self), dup); error_list = rb_iv_get(self, "@errors"); rb_iv_set(copy, "@errors", error_list); return copy ; @@ -373,7 +373,7 @@ static VALUE new(int argc, VALUE *argv, VALUE klass) if (NIL_P(version)) version = rb_str_new2("1.0"); doc = xmlNewDoc((xmlChar *)StringValueCStr(version)); - rb_doc = Nokogiri_wrap_xml_document(klass, doc); + rb_doc = nokogiri_xml_document_wrap(klass, doc); rb_obj_call_init(rb_doc, argc, argv); return rb_doc ; } @@ -596,7 +596,7 @@ void init_xml_document() /* this takes klass as a param because it's used for HtmlDocument, too. */ -VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc) +VALUE nokogiri_xml_document_wrap(VALUE klass, xmlDocPtr doc) { nokogiriTuplePtr tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple)); diff --git a/ext/nokogiri/xml_document.h b/ext/nokogiri/xml_document.h index 48353a3e97..c4bb6dfee0 100644 --- a/ext/nokogiri/xml_document.h +++ b/ext/nokogiri/xml_document.h @@ -12,7 +12,7 @@ typedef struct _nokogiriTuple nokogiriTuple; typedef nokogiriTuple * nokogiriTuplePtr; void init_xml_document(); -VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc); +VALUE nokogiri_xml_document_wrap(VALUE klass, xmlDocPtr doc); #define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private)) #define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc) diff --git a/ext/nokogiri/xslt_stylesheet.c b/ext/nokogiri/xslt_stylesheet.c index 73b975044e..8894d02d98 100644 --- a/ext/nokogiri/xslt_stylesheet.c +++ b/ext/nokogiri/xslt_stylesheet.c @@ -173,7 +173,7 @@ static VALUE transform(int argc, VALUE* argv, VALUE self) rb_exc_raise(exception); } - return Nokogiri_wrap_xml_document((VALUE)0, result) ; + return nokogiri_xml_document_wrap((VALUE)0, result) ; } static void method_caller(xmlXPathParserContextPtr ctxt, int nargs) From c6dfeb93bdd498ffb0a97369839eaf8073446b8a Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sun, 17 Jan 2021 12:09:58 -0500 Subject: [PATCH 4/5] fix(cruby): ensure Document #initialize is called exactly once Previously #initialize was being called twice from Document.parse --- ext/nokogiri/html_document.c | 3 +- ext/nokogiri/xml_document.c | 67 +++++++++++++++++++----------------- ext/nokogiri/xml_document.h | 1 + 3 files changed, 37 insertions(+), 34 deletions(-) diff --git a/ext/nokogiri/html_document.c b/ext/nokogiri/html_document.c index 8c5d39c6bf..bfed30f7aa 100644 --- a/ext/nokogiri/html_document.c +++ b/ext/nokogiri/html_document.c @@ -23,8 +23,7 @@ rb_html_document_s_new(int argc, VALUE *argv, VALUE klass) RTEST(uri) ? (const xmlChar *)StringValueCStr(uri) : NULL, RTEST(external_id) ? (const xmlChar *)StringValueCStr(external_id) : NULL ); - rb_doc = nokogiri_xml_document_wrap(klass, doc); - rb_obj_call_init(rb_doc, argc, argv); + rb_doc = nokogiri_xml_document_wrap_with_init_args(klass, doc, argc, argv); return rb_doc ; } diff --git a/ext/nokogiri/xml_document.c b/ext/nokogiri/xml_document.c index cc6276db52..4ed3234cde 100644 --- a/ext/nokogiri/xml_document.c +++ b/ext/nokogiri/xml_document.c @@ -339,7 +339,6 @@ static VALUE duplicate_document(int argc, VALUE *argv, VALUE self) xmlDocPtr doc, dup; VALUE copy; VALUE level; - VALUE error_list; if(rb_scan_args(argc, argv, "01", &level) == 0) level = INT2NUM((long)1); @@ -352,8 +351,7 @@ static VALUE duplicate_document(int argc, VALUE *argv, VALUE self) dup->type = doc->type; copy = nokogiri_xml_document_wrap(rb_obj_class(self), dup); - error_list = rb_iv_get(self, "@errors"); - rb_iv_set(copy, "@errors", error_list); + rb_iv_set(copy, "@errors", rb_iv_get(self, "@errors")); return copy ; } @@ -373,8 +371,7 @@ static VALUE new(int argc, VALUE *argv, VALUE klass) if (NIL_P(version)) version = rb_str_new2("1.0"); doc = xmlNewDoc((xmlChar *)StringValueCStr(version)); - rb_doc = nokogiri_xml_document_wrap(klass, doc); - rb_obj_call_init(rb_doc, argc, argv); + rb_doc = nokogiri_xml_document_wrap_with_init_args(klass, doc, argc, argv); return rb_doc ; } @@ -564,6 +561,39 @@ static VALUE nokogiri_xml_document_canonicalize(int argc, VALUE* argv, VALUE sel return rb_funcall(io, rb_intern("string"), 0); } +VALUE nokogiri_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv) +{ + nokogiriTuplePtr tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple)); + + VALUE rb_doc = Data_Wrap_Struct( + klass ? klass : cNokogiriXmlDocument, + mark, + dealloc, + doc + ); + + VALUE cache = rb_ary_new(); + rb_iv_set(rb_doc, "@decorators", Qnil); + rb_iv_set(rb_doc, "@errors", Qnil); + rb_iv_set(rb_doc, "@node_cache", cache); + + tuple->doc = rb_doc; + tuple->unlinkedNodes = st_init_numtable_with_size(128); + tuple->node_cache = cache; + doc->_private = tuple ; + + rb_obj_call_init(rb_doc, argc, argv); + + return rb_doc ; +} + + +VALUE nokogiri_xml_document_wrap(VALUE klass, xmlDocPtr doc) +{ + return nokogiri_xml_document_wrap_with_init_args(klass, doc, 0, NULL); +} + + VALUE cNokogiriXmlDocument ; void init_xml_document() { @@ -593,30 +623,3 @@ void init_xml_document() rb_define_method(klass, "create_entity", create_entity, -1); rb_define_method(klass, "remove_namespaces!", remove_namespaces_bang, 0); } - - -/* this takes klass as a param because it's used for HtmlDocument, too. */ -VALUE nokogiri_xml_document_wrap(VALUE klass, xmlDocPtr doc) -{ - nokogiriTuplePtr tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple)); - - VALUE rb_doc = Data_Wrap_Struct( - klass ? klass : cNokogiriXmlDocument, - mark, - dealloc, - doc - ); - - VALUE cache = rb_ary_new(); - rb_iv_set(rb_doc, "@decorators", Qnil); - rb_iv_set(rb_doc, "@node_cache", cache); - - tuple->doc = rb_doc; - tuple->unlinkedNodes = st_init_numtable_with_size(128); - tuple->node_cache = cache; - doc->_private = tuple ; - - rb_obj_call_init(rb_doc, 0, NULL); - - return rb_doc ; -} diff --git a/ext/nokogiri/xml_document.h b/ext/nokogiri/xml_document.h index c4bb6dfee0..4a83ad85e7 100644 --- a/ext/nokogiri/xml_document.h +++ b/ext/nokogiri/xml_document.h @@ -12,6 +12,7 @@ typedef struct _nokogiriTuple nokogiriTuple; typedef nokogiriTuple * nokogiriTuplePtr; void init_xml_document(); +VALUE nokogiri_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv); VALUE nokogiri_xml_document_wrap(VALUE klass, xmlDocPtr doc); #define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private)) From 30c3a57a45973109f13a770d0dfbeb9b7ff46d99 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sun, 17 Jan 2021 12:44:57 -0500 Subject: [PATCH 5/5] update CHANGELOG.md --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 215e05ae44..2a6621c86b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA --- +## next / unreleased + +### Fixed + +* [CRuby] `{XML,HTML}::Document.parse` now invokes `#initialize` exactly once. Previously `#initialize` was invoked twice on each object. +* [JRuby] `{XML,HTML}::Document.parse` now invokes `#initialize` exactly once. Previously `#initialize` was not called, which was a problem for subclassing such as done by `Loofah`. + + ## v1.11.1 / 2021-01-06 ### Fixed