Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace S3 dispatch by C implementation #400

Merged
merged 24 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,14 @@ xml2_example <- function(path = NULL) {
system.file("extdata", path, package = "xml2", mustWork = TRUE)
}
}

sample_nodeset <- function() {
x <- read_xml("<body>
<p>Some <b>text</b>.</p>
<p>Some <i>other</i>.</p>
<p>No bold text</p>
</body>")

children <- xml_children(x)
xml_find_first(children, ".//b|.//i")
}
2 changes: 1 addition & 1 deletion R/xml_name.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ xml_name.xml_missing <- function(x, ns = character()) {

#' @export
xml_name.xml_nodeset <- function(x, ns = character()) {
vapply(x, xml_name, ns = ns, FUN.VALUE = character(1))
.Call(nodeset_name, x, ns)
}

#' @export
Expand Down
19 changes: 14 additions & 5 deletions R/xml_text.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,24 @@ xml_text.xml_missing <- function(x, trim = FALSE) {
xml_text.xml_node <- function(x, trim = FALSE) {
res <- .Call(node_text, x$node)
if (isTRUE(trim)) {
res <- sub("^[[:space:]\u00a0]+", "", res)
res <- sub("[[:space:]\u00a0]+$", "", res)
res <- trim_text(res)
}
res
}

#' @export
xml_text.xml_nodeset <- function(x, trim = FALSE) {
vapply(x, xml_text, trim = trim, FUN.VALUE = character(1))
res <- .Call(nodeset_text, x)

if (isTRUE(trim)) {
res <- trim_text(res)
}
res
}

trim_text <- function(x) {
x <- sub("^[[:space:]\u00a0]+", "", x)
sub("[[:space:]\u00a0]+$", "", x)
}

#' @rdname xml_text
Expand Down Expand Up @@ -109,7 +118,7 @@ xml_double.xml_node <- function(x) {

#' @export
xml_double.xml_nodeset <- function(x) {
vapply(x, xml_double, numeric(1))
as.numeric(xml_text(x))
hadley marked this conversation as resolved.
Show resolved Hide resolved
}

#' @export
Expand All @@ -135,5 +144,5 @@ xml_integer.xml_node <- function(x) {

#' @export
xml_integer.xml_nodeset <- function(x) {
vapply(x, xml_integer, integer(1))
as.integer(xml_text(x))
}
2 changes: 1 addition & 1 deletion R/xml_type.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ xml_type.xml_node <- function(x) {

#' @export
xml_type.xml_nodeset <- function(x) {
types <- vapply(x, function(x) .Call(node_type, x$node), integer(1))
types <- .Call(nodeset_type, x)
xmlElementType[types]
}

Expand Down
6 changes: 6 additions & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ extern SEXP node_copy(SEXP);
extern SEXP node_has_children(SEXP, SEXP);
extern SEXP node_length(SEXP, SEXP);
extern SEXP node_name(SEXP, SEXP);
extern SEXP nodeset_name(SEXP, SEXP);
extern SEXP node_new(SEXP);
extern SEXP node_new_dtd(SEXP, SEXP, SEXP, SEXP);
extern SEXP node_new_ns(SEXP, SEXP);
Expand All @@ -57,7 +58,9 @@ extern SEXP node_set_namespace_prefix(SEXP, SEXP, SEXP);
extern SEXP node_set_namespace_uri(SEXP, SEXP, SEXP);
extern SEXP node_siblings(SEXP, SEXP);
extern SEXP node_text(SEXP);
extern SEXP nodeset_text(SEXP);
extern SEXP node_type(SEXP);
extern SEXP nodeset_type(SEXP);
extern SEXP node_write_character(SEXP, SEXP, SEXP);
extern SEXP node_write_connection(SEXP, SEXP, SEXP, SEXP);
extern SEXP node_write_file(SEXP, SEXP, SEXP, SEXP);
Expand Down Expand Up @@ -103,6 +106,7 @@ static const R_CallMethodDef CallEntries[] = {
{"node_has_children", (DL_FUNC) &node_has_children, 2},
{"node_length", (DL_FUNC) &node_length, 2},
{"node_name", (DL_FUNC) &node_name, 2},
{"nodeset_name", (DL_FUNC) &nodeset_name, 2},
{"node_new", (DL_FUNC) &node_new, 1},
{"node_new_dtd", (DL_FUNC) &node_new_dtd, 4},
{"node_new_ns", (DL_FUNC) &node_new_ns, 2},
Expand All @@ -122,7 +126,9 @@ static const R_CallMethodDef CallEntries[] = {
{"node_set_namespace_uri", (DL_FUNC) &node_set_namespace_uri, 3},
{"node_siblings", (DL_FUNC) &node_siblings, 2},
{"node_text", (DL_FUNC) &node_text, 1},
{"nodeset_text", (DL_FUNC) &nodeset_text, 1},
{"node_type", (DL_FUNC) &node_type, 1},
{"nodeset_type", (DL_FUNC) &nodeset_type, 1},
{"node_write_character", (DL_FUNC) &node_write_character, 3},
{"node_write_connection", (DL_FUNC) &node_write_connection, 4},
{"node_write_file", (DL_FUNC) &node_write_file, 4},
Expand Down
90 changes: 90 additions & 0 deletions src/xml2_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,36 @@ extern "C" SEXP node_name(SEXP node_sxp, SEXP nsMap) {
END_CPP
}

// [[export]]
extern "C" SEXP nodeset_name(SEXP node_sxp, SEXP nsMap) {
BEGIN_CPP

int n = Rf_xlength(node_sxp);

SEXP out = PROTECT(Rf_allocVector(STRSXP, n));

for (int i = 0; i < n; ++i) {
SEXP node_sxp_i = VECTOR_ELT(node_sxp, i);

if (Rf_inherits(node_sxp_i, "xml_node")) {
mgirlich marked this conversation as resolved.
Show resolved Hide resolved
SEXP node_field_i = VECTOR_ELT(node_sxp_i, 0);
XPtrNode node_i(node_field_i);
std::string name_i = nodeName(node_i.checked_get(), nsMap);
SET_STRING_ELT(out, i, Rf_mkCharLenCE(name_i.c_str(), name_i.size(), CE_UTF8));
} else if (Rf_inherits(node_sxp_i, "xml_missing")) {
SET_STRING_ELT(out, i, NA_STRING);
} else {
// xml_nodeset can't appear
Rf_error("Unexpected node type");
}
}

UNPROTECT(1);
return out;

END_CPP
}

// [[export]]
extern "C" SEXP node_set_name(SEXP node_sxp, SEXP value) {
BEGIN_CPP
Expand All @@ -57,6 +87,36 @@ extern "C" SEXP node_text(SEXP node_sxp) {
END_CPP
}

// [[export]]
extern "C" SEXP nodeset_text(SEXP node_sxp) {
BEGIN_CPP

int n = Rf_xlength(node_sxp);

SEXP out = PROTECT(Rf_allocVector(STRSXP, n));

for (int i = 0; i < n; ++i) {
SEXP node_sxp_i = VECTOR_ELT(node_sxp, i);

if (Rf_inherits(node_sxp_i, "xml_node")) {
SEXP node_field_i = VECTOR_ELT(node_sxp_i, 0);
XPtrNode node_i(node_field_i);
SEXP text_i = Xml2String(xmlNodeGetContent(node_i.checked_get())).asRString();
SET_STRING_ELT(out, i, text_i);
} else if (Rf_inherits(node_sxp_i, "xml_missing")) {
SET_STRING_ELT(out, i, NA_STRING);
} else {
// xml_nodeset can't appear
Rf_error("Unexpected node type");
}
}

UNPROTECT(1);
return out;

END_CPP
}

bool hasPrefix(std::string lhs, std::string rhs) {
if (lhs.length() > rhs.length()) {
return false;
Expand Down Expand Up @@ -584,6 +644,36 @@ extern "C" SEXP node_type(SEXP node_sxp) {
END_CPP
}

// [[export]]
extern "C" SEXP nodeset_type(SEXP node_sxp) {
BEGIN_CPP

int n = Rf_xlength(node_sxp);

SEXP out = PROTECT(Rf_allocVector(INTSXP, n));

for (int i = 0; i < n; ++i) {
SEXP node_sxp_i = VECTOR_ELT(node_sxp, i);

if (Rf_inherits(node_sxp_i, "xml_node")) {
SEXP node_field_i = VECTOR_ELT(node_sxp_i, 0);
XPtrNode node_i(node_field_i);
INTEGER(out)[i] = node_i->type;
} else if (Rf_inherits(node_sxp_i, "xml_missing")) {
INTEGER(out)[i] = NA_INTEGER;
} else {
// xml_nodeset can't appear
Rf_error("Unexpected node type");
}
}

UNPROTECT(1);
return out;

END_CPP
}


// [[export]]
extern "C" SEXP node_copy(SEXP node_sxp) {
BEGIN_CPP
Expand Down
4 changes: 4 additions & 0 deletions tests/testthat/_snaps/xml_name.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# error if missing ns spec

Couldn't find prefix for url http://bar.com

1 change: 0 additions & 1 deletion tests/testthat/test-xml_missing.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ test_that("xml_missing methods return properly for all S3 methods", {
expect_equal(xml_parent(mss), xml_missing())
expect_equal(xml_path(mss), NA_character_)
expect_equal(xml_text(mss), NA_character_)
expect_equal(xml_type(mss), NA_character_)
expect_equal(xml_url(mss), NA_character_)
})

Expand Down
12 changes: 11 additions & 1 deletion tests/testthat/test-xml_name.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
test_that("xml_name() returns the name", {
x <- sample_nodeset()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO it would be better to copy and paste the contents of sample_nodeset() here; otherwise it's hard to verify that the test is correct.


expect_equal(xml_name(x[[1]]), "b")
expect_equal(xml_name(x[[2]]), "i")
expect_equal(xml_name(x[[3]]), NA_character_)

expect_equal(xml_name(x), c("b", "i", NA_character_))
})

test_that("qualified names returned when ns given", {
x <- read_xml(test_path("ns-multiple-default.xml"))
ns <- xml_ns(x)
Expand All @@ -12,7 +22,7 @@ test_that("error if missing ns spec", {
ns <- xml_ns(x)[1]

bars <- xml_children(xml_children(x))
expect_error(xml_name(bars, ns), "Couldn't find prefix")
expect_snapshot_error(xml_name(bars, ns))
})

test_that("xml_name<- modifies the name", {
Expand Down
1 change: 0 additions & 1 deletion tests/testthat/test-xml_nodeset.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ test_that("methods work on empty nodesets", {
expect_output(xml_structure(empty), NA)

expect_identical(xml_text(empty), character())
expect_identical(xml_type(empty), character())
expect_identical(xml_url(empty), character())
})

Expand Down
22 changes: 8 additions & 14 deletions tests/testthat/test-xml_text.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,10 @@ test_that("xml_text returns only text without markup", {
expect_identical(xml_text(xml_children(x)), "bold!")
})

test_that("xml_text returns only text without markup", {
x <- read_xml("<p>This is some text. This is <b>bold!</b></p>")

expect_identical(xml_text(x), "This is some text. This is bold!")

expect_identical(xml_text(xml_children(x)), "bold!")
})

test_that("xml_text works properly with xml_nodeset objects", {
x <- read_xml("<x>This is some text. <x>This is some nested text.</x></x>")

expect_identical(xml_text(x), "This is some text. This is some nested text.")

expect_identical(
xml_text(xml_find_all(x, "//x")),
c("This is some text. This is some nested text.", "This is some nested text.")
xml_text(sample_nodeset()),
c("text", "other", NA)
)
})

Expand All @@ -47,6 +35,12 @@ test_that("xml_text trims whitespace if requested, including non-breaking spaces
xml_text(x, trim = TRUE),
"Some text \u20ac"
)

x2 <- read_html("<body><p> Some text &euro; &nbsp;</p><p> and more &euro; text &nbsp;</body>")
expect_identical(
xml_text(xml_find_all(x2, ".//p"), trim = TRUE),
c("Some text \u20ac", "and more \u20ac text")
)
})

test_that("xml_integer() returns an integer vector", {
Expand Down
11 changes: 11 additions & 0 deletions tests/testthat/test-xml_type.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
test_that("multiplication works", {
mgirlich marked this conversation as resolved.
Show resolved Hide resolved
x <- sample_nodeset()

expect_equal(xml_type(x[[1]]), "element")
expect_equal(xml_type(x[[3]]), NA_character_)

expect_equal(xml_type(x), c("element", "element", NA))

empty <- xml_children(x)
expect_identical(xml_type(empty), character())
})
Loading