Skip to content

Commit

Permalink
Check for duplicate names when supplying character literals (#367)
Browse files Browse the repository at this point in the history
Closes #346
  • Loading branch information
lionel- authored Oct 28, 2024
1 parent f2c9682 commit 4005748
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 32 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

* `matches()` now uses `perl = TRUE` by default. This makes it more consitent with regular expressions in stringr (#330).

* `eval_select()` now fails when data has duplicate names and a character vector is provided as input (#346).

* `eval_select()` and `eval_relocate()` gain a new `error_arg` argument that can be specified to throw a better error message when `allow_empty = FALSE` or `allow_rename = FALSE` (@olivroy, #327).

* `vars_pull()` now also warns when using `.data` (#335). Please
Expand Down
55 changes: 24 additions & 31 deletions R/eval-walk.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,36 +18,12 @@ vars_select_eval <- function(vars,
return(pos)
}

uniquely_named <- uniquely_named %||% is.data.frame(data)

if (!is_symbolic(wrapped)) {
pos <- as_indices_sel_impl(
wrapped,
vars = vars,
strict = strict,
data = data,
allow_predicates = allow_predicates,
call = error_call
)
pos <- loc_validate(pos, vars, call = error_call)
pos <- ensure_named(
pos,
vars,
uniquely_named = uniquely_named,
allow_rename = allow_rename,
allow_empty = allow_empty,
error_arg = error_arg,
call = error_call
)
return(pos)
}

vars <- peek_vars()

vars_split <- vctrs::vec_split(seq_along(vars), vars)

# Mark data duplicates so we can fail instead of disambiguating them
# when renaming
uniquely_named <- uniquely_named %||% is.data.frame(data)
if (uniquely_named) {
vars_split$val <- map(vars_split$val, mark_data_dups)
}
Expand Down Expand Up @@ -177,7 +153,7 @@ walk_data_tree <- function(expr, data_mask, context_mask, colon = FALSE) {

out <- switch(
expr_kind(expr, context_mask, error_call),
literal = expr,
literal = eval_literal(expr, data_mask, context_mask),
symbol = eval_sym(expr, data_mask, context_mask),
`(` = walk_data_tree(expr[[2]], data_mask, context_mask, colon = colon),
`!` = eval_bang(expr, data_mask, context_mask),
Expand Down Expand Up @@ -301,11 +277,6 @@ chr_as_locations <- function(x, vars, call = caller_env(), arg = NULL) {
set_names(out, names(x))
}

as_indices <- function(x, vars, strict = TRUE, call) {
inds <- with_subscript_errors(as_indices_impl(x, vars, strict, call))
vctrs::vec_as_location(inds, length(vars), vars, convert_values = NULL)
}

expr_kind <- function(expr, context_mask, error_call) {
switch(
typeof(expr),
Expand Down Expand Up @@ -347,6 +318,28 @@ call_kind <- function(expr, context_mask, error_call) {
)
}

eval_literal <- function(expr, data_mask, context_mask) {
internal <- data_mask$.__tidyselect__.$internal

if (internal$uniquely_named && is_character(expr)) {
# Since tidyselect allows repairing data frames with duplicate names by
# renaming or selecting positions, we can't check the input for duplicates.
# Instead, we check the output. But in case of character literals, checking
# the output doesn't work because we use `vctrs::vec_as_location()` to
# transform the strings to locations and it ignores duplicate names. So we
# instead check the input here, since it's not possible to repair duplicate
# names by matching them by name. This avoids an inconsistency with the
# symbolic path (#346).
vctrs::vec_as_names(
internal$vars,
repair = "check_unique",
call = internal$error_call
)
}

expr
}

eval_colon <- function(expr, data_mask, context_mask) {
if (is_negated_colon(expr)) {
# Compatibility syntax for `-1:-2`. We interpret it as `-(1:2)`.
Expand Down
1 change: 0 additions & 1 deletion R/vars.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

peeker <- function(what) {
function(..., fn = NULL) {
if (!missing(...)) {
Expand Down
31 changes: 31 additions & 0 deletions tests/testthat/_snaps/eval-walk.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,34 @@
Error:
! Must select at least one item.

# duplicate names are checked when literals are supplied (#346)

Code
select_loc(df, "x")
Condition <vctrs_error_names_must_be_unique>
Error in `select_loc()`:
! Names must be unique.
x These names are duplicated:
* "x" at locations 1 and 2.
Code
select_loc(df, c("x"))
Condition <vctrs_error_names_must_be_unique>
Error in `select_loc()`:
! Names must be unique.
x These names are duplicated:
* "x" at locations 1 and 2.
Code
select_loc(df, c(!!1:2))
Condition <vctrs_error_names_must_be_unique>
Error in `select_loc()`:
! Names must be unique.
x These names are duplicated:
* "x" at locations 1 and 2.
Code
select_loc(df, !!(1:2))
Condition <vctrs_error_names_must_be_unique>
Error in `select_loc()`:
! Names must be unique.
x These names are duplicated:
* "x" at locations 1 and 2.

19 changes: 19 additions & 0 deletions tests/testthat/test-eval-walk.R
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,22 @@ test_that("can make empty selection with allow_rename = FALSE", {
c(cyl = 2L, am = 9L)
)
})

test_that("duplicate names are checked when literals are supplied (#346)", {
df <- set_names(data.frame(x = 1, x = 1), c("x", "x"))
expect_snapshot(
error = TRUE,
cnd_class = TRUE,
{
select_loc(df, "x")
select_loc(df, c("x"))

select_loc(df, c(!!1:2))
select_loc(df, !!(1:2))
}
)

# In these cases the selection manages to repair the data frame
expect_equal(select_loc(df, 1), c(x = 1L))
expect_equal(select_loc(df, 2), c(x = 2L))
})

0 comments on commit 4005748

Please sign in to comment.