Merge pull request #583 from olivroy/testing

Simplify dplyr code
rstudio · Dec 4, 2024 · 29ad2ec · 29ad2ec
2 parents b5b880b + 3ee55a4
commit 29ad2ec
Show file tree

Hide file tree

Showing 7 changed files with 41 additions and 49 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -16,7 +16,8 @@ Authors@R: c(
     person("Mauricio", "Vargas", , "[email protected]", c("aut"),
            comment = c(ORCID = "0000-0003-1017-7574")),
     person("June", "Choe", , "[email protected]", c("aut"),
-           comment = c(ORCID = "0000-0002-0701-921X"))
+           comment = c(ORCID = "0000-0002-0701-921X")),
+    person("Olivier", "Roy", role = c("ctb"))
     )
 License: MIT + file LICENSE
 URL: https://rstudio.github.io/pointblank/, https://github.com/rstudio/pointblank

diff --git a/R/get_sundered_data.R b/R/get_sundered_data.R
@@ -279,7 +279,7 @@ get_sundered_data <- function(
   # Get the row count of the input table
   row_count_input_tbl <-
     input_tbl %>%
-    dplyr::summarize(n = dplyr::n()) %>%
+    dplyr::count() %>%
     dplyr::pull(n) %>%
     as.numeric()
 
@@ -289,15 +289,15 @@ get_sundered_data <- function(
   # - are `active`
   validation_set_prefiltered <-
     agent$validation_set %>%
-    dplyr::filter(eval_error == FALSE) %>%
     dplyr::filter(
+      !eval_error,
       assertion_type %in%
         base::setdiff(
           row_based_validation_fns_vec(),
           c("rows_distinct", "col_vals_make_set", "col_vals_make_subset")
-        )
-    ) %>%
-    dplyr::filter(active == TRUE)
+        ),
+      active == TRUE
+    )
 
   # Get a character vector of preconditions
   preconditions_vec <-

diff --git a/R/interrogate.R b/R/interrogate.R
@@ -2414,9 +2414,7 @@ interrogate_distinct <- function(
     unduplicated <-
       table %>%
       dplyr::select({{ column_names }}) %>%
-      dplyr::group_by(!!!col_syms) %>%
-      dplyr::summarize(`pb_is_good_` = dplyr::n()) %>%
-      dplyr::ungroup() %>%
+      dplyr::count(!!!col_syms, name = "pb_is_good_") %>%
       dplyr::mutate(`pb_is_good_` = ifelse(`pb_is_good_` == 1, TRUE, FALSE)) %>%
       dplyr::filter(`pb_is_good_` == TRUE)
 
@@ -3001,7 +2999,7 @@ add_reporting_data <- function(
   # Get total count of rows
   row_count <-
     tbl_checked %>%
-    dplyr::summarize(n = dplyr::n()) %>%
+    dplyr::count() %>%
     dplyr::pull(n) %>%
     as.numeric()
 
@@ -3016,7 +3014,7 @@ add_reporting_data <- function(
     n_passed <-
       tbl_checked %>%
       dplyr::filter(pb_is_good_ == 1) %>%
-      dplyr::summarize(n = dplyr::n()) %>%
+      dplyr::count() %>%
       dplyr::pull(n) %>%
       as.numeric()
 
@@ -3027,7 +3025,7 @@ add_reporting_data <- function(
     n_passed <-
       tbl_checked %>%
       dplyr::filter(pb_is_good_ == TRUE) %>%
-      dplyr::summarize(n = dplyr::n()) %>%
+      dplyr::count() %>%
       dplyr::pull(n) %>%
       as.numeric()
   }
@@ -3043,7 +3041,7 @@ add_reporting_data <- function(
     n_failed <-
       tbl_checked %>%
       dplyr::filter(pb_is_good_ == 0) %>%
-      dplyr::summarize(n = dplyr::n()) %>%
+      dplyr::count() %>%
       dplyr::pull(n) %>%
       as.numeric()
 
@@ -3054,7 +3052,7 @@ add_reporting_data <- function(
     n_failed <-
       tbl_checked %>%
       dplyr::filter(pb_is_good_ == FALSE) %>%
-      dplyr::summarize(n = dplyr::n()) %>%
+      dplyr::count() %>%
       dplyr::pull(n) %>%
       as.numeric()
   }

diff --git a/R/scan_data.R b/R/scan_data.R
@@ -338,16 +338,14 @@ probe_overview_stats <- function(
 
   r_col_types_tbl <-
     dplyr::tibble(r_col_types = r_col_types) %>%
-    dplyr::group_by(r_col_types) %>%
-    dplyr::summarize(count = dplyr::n()) %>%
-    dplyr::arrange(dplyr::desc(count)) %>%
+    dplyr::count(r_col_types, name = "count", sort = TRUE) %>%
     utils::head(6E8)
 
   data_overview_gt <-
-    gt::gt(data_overview_tbl) %>%
+    gt::gt(data_overview_tbl, locale = locale) %>%
     gt::fmt_markdown(columns = "label") %>%
-    gt::fmt_number(columns = "value", decimals = 0, locale = locale) %>%
-    gt::fmt_percent(columns = "pct", decimals = 2, locale = locale) %>%
+    gt::fmt_integer(columns = "value") %>%
+    gt::fmt_percent(columns = "pct", decimals = 2) %>%
     gt::cols_merge(columns = c("value", "pct"), pattern = "{1} ({2})") %>%
     gt::cols_align(align = "right", columns = "value") %>%
     gt::text_transform(
@@ -684,8 +682,7 @@ get_descriptive_stats_gt <- function(
 
     descriptive_stats <-
       dplyr::tibble(mean = mean, variance = variance, sd = sd, cv = cv)
-    descriptive_stats <-
-      dplyr::summarize_all(descriptive_stats, ~ round(., 2))
+    descriptive_stats <- round(descriptive_stats, 2)
     descriptive_stats <- as.list(descriptive_stats)
 
   } else {
@@ -703,8 +700,7 @@ get_descriptive_stats_gt <- function(
           cv = ~ cv(.)
         )
       )
-    descriptive_stats <-
-      dplyr::summarize_all(descriptive_stats, ~ round(., 2))
+    descriptive_stats <- round(descriptive_stats, 2)
     descriptive_stats <- as.list(descriptive_stats)
   }
 
@@ -746,11 +742,8 @@ get_common_values_gt <- function(
 
   n_rows <- get_table_total_rows(data = data_column)
 
-  common_values_tbl <- dplyr::group_by_at(data_column, 1)
-  common_values_tbl <- dplyr::count(common_values_tbl)
-  common_values_tbl <- dplyr::arrange(common_values_tbl, dplyr::desc(n))
+  common_values_tbl <- dplyr::count(data_column, dplyr::pick(1), sort = TRUE)
   common_values_tbl <- utils::head(common_values_tbl, 6E8)
-  common_values_tbl <- dplyr::ungroup(common_values_tbl)
 
   n_rows_common_values_tbl <-
     dplyr::pull(dplyr::count(common_values_tbl, name = "n", wt = n), n)
@@ -885,9 +878,7 @@ get_top_bottom_slice <- function(
 
   data_column_freq <-
     data_column %>%
-    dplyr::group_by_at(1) %>%
-    dplyr::count() %>%
-    dplyr::ungroup()
+    dplyr::count(dplyr::pick(1))
 
   name_1 <- rlang::sym(get_lsv("table_scan/tbl_lab_value")[[lang]])
   name_2 <- rlang::sym(get_lsv("table_scan/tbl_lab_count")[[lang]])

diff --git a/R/utils-profiling.R b/R/utils-profiling.R
@@ -50,8 +50,11 @@ get_table_total_missing_values <- function(data) {
 
   collected <-
     dplyr::collect(
-      dplyr::summarise_all(
-        data, ~ sum(ifelse(is.na(.), 1, 0), na.rm = TRUE)
+      dplyr::summarise(
+        data, dplyr::across(
+          dplyr::everything(),
+          function(x) sum(ifelse(is.na(x), 1, 0), na.rm = TRUE)
+        )
       )
     )
 
@@ -145,7 +148,7 @@ get_df_column_qtile_stats <- function(data_column) {
       )
     ) %>%
     dplyr::mutate(range = max - min) %>%
-    dplyr::summarize_all(~ round(., 2)) %>%
+    round(2) %>%
     as.list()
 }
 
@@ -294,7 +297,7 @@ get_dbi_column_qtile_stats <- function(data_column) {
       iqr = q_3 - q_1,
       range = max - min
     ) %>%
-    dplyr::summarize_all(~ round(., 2)) %>%
+    round(2) %>%
     as.list()
 }
 
@@ -330,8 +333,7 @@ get_table_column_histogram <- function(data_column, lang, locale) {
     data_column %>%
       dplyr::mutate_all(.funs = nchar) %>%
       dplyr::rename(nchar = 1) %>%
-      dplyr::group_by(nchar) %>%
-      dplyr::summarize(n = dplyr::n()) %>%
+      dplyr::count(nchar) %>%
       dplyr::collect() %>%
       dplyr::filter(!is.na(nchar)) %>%
       dplyr::mutate_all(.funs = as.numeric) %>%

diff --git a/scripts/generate_roadmap.R b/scripts/generate_roadmap.R
@@ -7,7 +7,7 @@ myrepo <- create_repo_ref("rstudio", "pointblank")
 issues <- get_issues(myrepo, state = "open")
 issues_df <- dplyr::as_tibble(parse_issues(issues))
 
-tbl <- 
+tbl <-
   issues_df %>%
   dplyr::filter(!is.na(milestone_title)) %>%
   dplyr::select(number, title, labels_name, milestone_title) %>%
@@ -32,14 +32,14 @@ tbl <-
   dplyr::select(
     number, title, milestone_title, major, minor, patch, type, difficulty, effort, priority
   ) %>%
-  dplyr::mutate(major = gsub("v", "", major)) %>% 
-  dplyr::mutate_at(.vars = vars(major, minor, patch), .funs = as.integer) %>%
+  dplyr::mutate(major = gsub("v", "", major)) %>%
+  dplyr::mutate(dplyr::across(c(major, minor, patch), .fns = as.integer)) %>%
   dplyr::mutate(difficulty = gsub(".*?([1-3]).*", "\\1", difficulty)) %>%
   dplyr::mutate(effort = gsub(".*?([1-3]).*", "\\1", effort)) %>%
   dplyr::mutate(priority = gsub(".*?([1-3]).*", "\\1", priority)) %>%
   dplyr::mutate(priority = ifelse(grepl("[^1-3]", priority), 4, priority)) %>%
   dplyr::mutate(type = gsub(".*?Type: (.*?)\\\"\\)", "\\1", type)) %>%
-  dplyr::mutate_at(.vars = vars(priority, difficulty, effort), .funs = as.numeric) %>%
+  dplyr::mutate(dplyr::across(c(priority, difficulty, effort), .fns = as.numeric)) %>%
   dplyr::arrange(
     major,
     minor,
@@ -51,7 +51,7 @@ tbl <-
   dplyr::mutate(number = paste0("#", number)) %>%
   dplyr::select(-c(major, minor, patch))
 
-gt_tbl <- 
+gt_tbl <-
   tbl %>%
   gt(
     rowname_col = "number",
@@ -109,13 +109,13 @@ gt_tbl <-
     locations = cells_body(columns = priority),
     fn = function(x) {
       ifelse(x == "4", "♨︎", x)
-    } 
+    }
   ) %>%
   text_transform(
     locations = cells_body(columns = priority),
     fn = function(x) {
       ifelse(x == "4", "♨︎", x)
-    } 
+    }
   ) %>%
   tab_style(
     style = "height: 50px",
@@ -175,8 +175,8 @@ svg_object <-
   gsub("style>", ">", ., fixed = TRUE) %>%
   gsub("<p>", "<p style='margin:0'>", ., fixed = TRUE) %>%
   gsub(
-    "; width: 0px\">", 
-"; width: 0px; 
+    "; width: 0px\">",
+"; width: 0px;
 @-webkit-keyframes AnimationName {0% {background-position:50% 0%} 50% {background-position:51% 100%} 100% {background-position:50% 0%}}
 @-moz-keyframes AnimationName {0% {background-position:50% 0%} 50% {background-position:51% 100%} 100% {background-position:50% 0%}}
 @-o-keyframes AnimationName {0% {background-position:50% 0%} 50% {background-position:51% 100%} 100% {background-position:50% 0%}}

diff --git a/tests/testthat/test-yaml_exec.R b/tests/testthat/test-yaml_exec.R
@@ -146,8 +146,8 @@ test_that("The `yaml_exec()` function effectively processes .yml files", {
   )
 
   # Expect that neither the agent nor the informant were saved
-  expect_equal(length(fs::path_abs(fs::dir_ls(path = work_path, regexp = "agent.*?rds$"))), 0)
-  expect_equal(length(fs::path_abs(fs::dir_ls(path = work_path, regexp = "informant.*?rds$"))), 0)
+  expect_length(fs::path_abs(fs::dir_ls(path = work_path, regexp = "agent.*?rds$")), 0)
+  expect_length(fs::path_abs(fs::dir_ls(path = work_path, regexp = "informant.*?rds$")), 0)
 
   # Read just one of the YAML files (the agent) from the specified path,
   # write output to a path relative to the working directory; we will
@@ -171,7 +171,7 @@ test_that("The `yaml_exec()` function effectively processes .yml files", {
   # not all units passed, let's check for that as well) and no
   # table to be present (would be in `agent_1$tbl`)
   expect_false(all_passed(agent_1))
-  expect_equal(length(agent_1$extracts), 0)
+  expect_length(agent_1$extracts, 0)
   expect_null(agent_1$tbl)
 
   # Delete the written agent file