ready for CRAN, let Amith look once

talegari · Jun 27, 2024 · d14480a · d14480a
1 parent 97788de
commit d14480a
Show file tree

Hide file tree

Showing 96 changed files with 7,968 additions and 2,037 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -6,3 +6,4 @@
 docs
 ^README\.Rmd$
 ^\.github$
+^vignettes/articles$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: tidyrules
 Type: Package
 Title: Utilities to Retrieve Rulelists from Model Fits, Filter, Prune, Reorder and Predict on unseen data
-Version: 0.2.6
+Version: 0.2.7
 Authors@R: c(
     person("Srikanth", "Komala Sheshachala", email = "[email protected]", role = c("aut", "cre")),
     person("Amith Kumar", "Ullur Raghavendra", email = "[email protected]", role = c("aut"))
@@ -24,6 +24,7 @@ Imports:
     glue (>= 1.7.0),
     pheatmap (>= 1.0.12),
     proxy (>= 0.4.27),
+    tibble (>= 3.2.1),
 Suggests:
     AmesHousing (>= 0.0.3),
     dplyr (>= 0.8),
@@ -35,16 +36,14 @@ Suggests:
     testthat (>= 2.0.1),
     MASS (>= 7.3.50),
     mlbench (>= 2.1.1),
-    knitr (>= 1.23), 
     rmarkdown (>= 1.13),
     palmerpenguins (>= 0.1.1),
 Description: Provides a framework to work with decision rules. Rules can be extracted from supported models, augmented with (custom) metrics using validation data, manipulated using standard dataframe operations, reordered and pruned based on a metric, predict on unseen (test) data. Utilities include; Creating a rulelist manually, Exporting a rulelist as a SQL case statement and so on. The package offers two classes; rulelist and rulelset based on dataframe.
-URL: https://github.com/talegari/tidyrules
+URL: https://github.com/talegari/tidyrules, https://talegari.github.io/tidyrules/
 BugReports: https://github.com/talegari/tidyrules/issues
 License: GPL-3
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.3.1
-VignetteBuilder: knitr
 Roxygen: list(markdown = TRUE)
 
diff --git a/NAMESPACE b/NAMESPACE
@@ -40,6 +40,7 @@ importFrom(magrittr,"%>%")
 importFrom(rlang,"%||%")
 importFrom(stats,IQR)
 importFrom(stats,predict)
+importFrom(stats,reorder)
 importFrom(stats,runif)
 importFrom(stats,weighted.mean)
 importFrom(tidytable,across)
@@ -64,6 +65,7 @@ importFrom(tidytable,select)
 importFrom(tidytable,slice)
 importFrom(tidytable,summarise)
 importFrom(tidytable,unnest)
+importFrom(utils,capture.output)
 importFrom(utils,data)
 importFrom(utils,head)
 importFrom(utils,tail)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,10 @@
+# tidyrules 0.2.7
+
+- Major rewrite of tidyrules
+  - rulelist class introduced with many methods, mainly `predict`
+  - breaking change: `tidyRules` function no longer exists!
+  - Support added to `party` models
+
 # tidyrules 0.1.5
 
 - Maintenance release (replace package rsample with modeldata)

diff --git a/R/dev_mindmap.R b/R/dev_mindmap.R
diff --git a/R/package.R b/R/package.R
@@ -51,6 +51,7 @@
 #' @importFrom stats runif
 #' @importFrom utils head
 #' @importFrom utils tail
+#' @importFrom utils capture.output
 #'
 "_PACKAGE"
 

diff --git a/R/rulelist.R b/R/rulelist.R
@@ -343,8 +343,12 @@ set_validation_data = function(x, validation_data, y_name, weight = 1){
 
     res = rlang::duplicate(x)
 
-    checkmate::assert_data_frame(validation_data)
-    attr(res, "validation_data") = data.table::as.data.table(validation_data)
+    checkmate::assert_data_frame(validation_data, null.ok = TRUE)
+    if (!is.null(validation_data)) {
+      attr(res, "validation_data") =
+        data.table::as.data.table(validation_data)
+    }
+
     attr(res, "y_name") = y_name
     attr(res, "weight") = weight
 
@@ -376,48 +380,86 @@ print.rulelist = function(x, banner = TRUE, ...){
   model_type      = attr(rulelist, "model_type")
   validation_data = attr(rulelist, "validation_data")
 
+  text = character(0)
   if (banner) {
-    cli::cli_rule(left = "Rulelist")
-    cli::cli_text("")
+    text = c(text, "---- Rulelist --------------------------------")
   }
 
   if (is.null(keys)) {
-    cli::cli_alert_info("{.emph Keys}: {.strong NULL}")
+    text = c(text,
+             paste(cli::symbol$play,
+                   "Keys: NULL"
+                   )
+             )
   } else {
-    cli::cli_alert_info("{.emph keys}: {.val {keys}}")
+    text = c(text,
+             paste(cli::symbol$play,
+                   stringr::str_glue("Keys: {keys}")
+                   )
+             )
     n_combo = nrow(distinct(select(x, all_of(keys))))
-    cli::cli_alert_info("{.emph Number of distinct keys}: {.val {n_combo}}")
+    text = c(text,
+             paste(cli::symbol$play,
+                   stringr::str_glue("Number of distinct keys: {n_combo}")
+                   )
+             )
   }
 
-  cli::cli_alert_info("{.emph Number of rules}: {.val {nrow(x)}}")
+  text = c(text,
+           paste(cli::symbol$play,
+                 stringr::str_glue("Number of rules: {nrow(x)}")
+                 )
+           )
 
   if (is.null(model_type)){
-    cli::cli_alert_info("{.emph Model type}: {.strong NULL}")
+    text = c(text,
+             paste(cli::symbol$play,
+                   stringr::str_glue("Model Type: NULL")
+                   )
+             )
   } else {
-    cli::cli_alert_info("{.emph Model type}: {.val {model_type}}")
+    text = c(text,
+             paste(cli::symbol$play,
+                   stringr::str_glue("Model type: {model_type}")
+                   )
+             )
   }
 
-  if (is.null(estimation_type)){
-    cli::cli_alert_info("{.emph Estimation type}: {.strong NULL}")
+  if (is.null(estimation_type)) {
+    text = c(text,
+             paste(cli::symbol$play,
+                   stringr::str_glue("Estimation type: NULL")
+                   )
+             )
   } else {
-    cli::cli_alert_info("{.emph Estimation type}: {.val {estimation_type}}")
+    text = c(text,
+             paste(cli::symbol$play,
+                   stringr::str_glue("Estimation type: {estimation_type}")
+                   )
+             )
   }
 
-  if (is.null(validation_data)){
-    cli::cli_alert_warning("{.emph Is validation data set}: {.strong FALSE}")
+  if (is.null(validation_data)) {
+    text = c(text,
+             paste(cli::symbol$play,
+                   stringr::str_glue("Is validation data set: FALSE")
+                   )
+             )
   } else {
-    cli::cli_alert_success("{.emph Is validation data set}: {.strong TRUE}")
+    text = c(text,
+             paste(cli::symbol$play,
+                   stringr::str_glue("Is validation data set: TRUE")
+                   )
+             )
   }
 
-  cli::cli_text("")
-
-  class(rulelist) = setdiff(class(rulelist), "rulelist")
-  # now 'rulelist' is a dataframe and not a 'rulelist'
-  print(rulelist, ...)
+  print_output = capture.output(print(tibble::as_tibble(x), ...), file = NULL)
+  text = c(text, "\n", utils::tail(print_output, -1))
 
   if (banner) {
-    cli::cli_rule()
+    text = c(text, "----------------------------------------------")
   }
+  cat(paste(text, collapse = "\n"))
 
   return(invisible(x))
 }
@@ -592,7 +634,7 @@ predict_all_rulelist = function(rulelist, new_data){
     res =
       rulelist %>%
       as.data.frame() %>%
-      nest(data__ = tidytable::everything(), .by = keys) %>%
+      nest(data__ = tidytable::everything(), .by = all_of(keys)) %>%
       mutate(rn_df__ =
                purrr::map(data__,
                           ~ predict_all_nokeys_rulelist(.x, new_data)
@@ -603,7 +645,7 @@ predict_all_rulelist = function(rulelist, new_data){
       drop_na(row_nbr) %>%
       select(all_of(c("row_nbr", keys, "rule_nbr"))) %>%
       arrange(!!!rlang::syms(c("row_nbr", keys, "rule_nbr"))) %>%
-      nest(.by = c("row_nbr", keys), .key = "rule_nbr") %>%
+      nest(.by = all_of(c("row_nbr", keys)), .key = "rule_nbr") %>%
       mutate(rule_nbr = purrr::map(rule_nbr, ~ .x[[1]]))
   }
 
@@ -685,7 +727,7 @@ predict_rulelist = function(rulelist, new_data){
     res =
       rulelist %>%
       as.data.frame() %>%
-      nest(data__ = tidytable::everything(), .by = keys) %>%
+      nest(data__ = tidytable::everything(), .by = all_of(keys)) %>%
       mutate(rn_df__ =
                purrr::map(data__, ~ predict_nokeys_rulelist(.x, new_data))
              ) %>%
@@ -1759,10 +1801,9 @@ plot.prune_rulelist = function(x, ...) {
 #' @seealso [rulelist], [tidy], [augment][augment.rulelist],
 #'   [predict][predict.rulelist], [calculate][calculate.rulelist],
 #'   [prune][prune.rulelist], [reorder][reorder.rulelist]
+#' @importFrom stats reorder
 #' @export
-reorder = function(x, ...){
-  UseMethod("reorder", x)
-}
+stats::reorder
 
 #' @name reorder.rulelist
 #' @title Reorder the rules/rows of a [rulelist]
@@ -1891,7 +1932,7 @@ reorder.rulelist = function(x,
     rule_metrics           = purrr::map_dfr(splitted, wrapper_metric_fun)
     ord                    = do.call(base::order,
                                      c(rule_metrics,
-                                       list(decreasing = minimize)
+                                       list(decreasing = !minimize)
                                        )
                                      )
     pos                    = which(ord == 1)

diff --git a/R/ruleset.R b/R/ruleset.R
@@ -53,18 +53,22 @@ print.ruleset = function(x, banner = TRUE, ...){
 
   ruleset = rlang::duplicate(x)
 
+  text = character(0)
   if (banner) {
-    cli::cli_rule(left = "Ruleset")
-    cli::cli_text("")
+    text = c(text, "---- Ruleset -------------------------------")
   }
 
   class(ruleset) = setdiff(class(ruleset), "ruleset")
-  # now 'ruleset' is a rulelist
-  print(ruleset, banner = FALSE, ...)
+  text = c(text,
+           capture.output(print(ruleset, banner = FALSE, ...),
+                          file = NULL
+                          )
+           )
 
   if (banner) {
-    cli::cli_rule()
+    text = c(text, "--------------------------------------------")
   }
+  cat(paste(text, collapse = "\n"))
 
   return(invisible(x))
 }

diff --git a/R/utils.R b/R/utils.R
@@ -312,12 +312,14 @@ convert_rule_flavor = function(rule, flavor){
   } else if (flavor == "sql"){
     res =
       rule %>%
+      stringr::str_replace_all("==", "=") %>%
+
       stringr::str_replace_all("\\( ", "") %>%
       stringr::str_replace_all(" \\)", "") %>%
 
       stringr::str_replace_all("%in%", "IN") %>%
-      stringr::str_replace_all("c\\(", "[") %>%
-      stringr::str_replace_all("\\)", "]") %>%
+      stringr::str_replace_all("c\\(", "(") %>%
+      stringr::str_replace_all("\\)", ")") %>%
 
       stringr::str_replace_all("&", " ) AND (") %>%
 

diff --git a/README.Rmd b/README.Rmd
@@ -11,6 +11,7 @@ knitr::opts_chunk$set(
   fig.path = "man/figures/README-",
   out.width = "100%"
 )
+devtools::load_all() #todo
 ```
 # tidyrules
 
@@ -20,36 +21,39 @@ knitr::opts_chunk$set(
 [![R-CMD-check](https://github.com/talegari/tidyrules/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/talegari/tidyrules/actions/workflows/R-CMD-check.yaml)
 <!-- badges: end -->
 
-`tidyrules` converts textual rules from models to dataframes with parseable rules. Supported models are: `C5`, `cubist` and `rpart`.
+> [tidyrules](https://cran.r-project.org/package=tidyrules) [R](https://www.r-project.org/) [package](https://cran.r-project.org/) provides a framework to work with decision rules. Rules can be extracted from supported models, augmented with (custom) metrics using validation data, manipulated using standard dataframe operations, reordered and pruned based on a metric, predict on unseen (test) data. Utilities include; Creating a rulelist manually, Exporting a rulelist as a SQL case statement and so on. The package offers two classes; rulelist and rulelset based on dataframe.
+
+![](man/figures/tidyrules_schematic.png)
 
 ## Example
+<details>
+<summary>expand/collapse</summary>
 
 ```{r example}
 library(tidyrules)
 ```
 
 ```{r basic C5 example}
 model_c5 = C50::C5.0(Species ~ ., data = iris, rules = TRUE)
-summary(model_c5)
-```
-
-Tidy the rules:
-
-```{r tidyrules}
-pander::pandoc.table(tidyRules(model_c5), split.tables = 120)
+pander::pandoc.table(tidy(model_c5), split.tables = 120)
 ```
+</details>
 
 ## Installation
+<details>
+<summary>expand/collapse</summary>
 
 You can install the released version of tidyrules from [CRAN](https://CRAN.R-project.org) with:
 
-``` r
+```{r, eval = FALSE}
 install.packages("tidyrules")
 ```
 
 And the development version from [GitHub](https://github.com/) with:
 
-``` r
+```{r, eval = FALSE}
 # install.packages("devtools")
 devtools::install_github("talegari/tidyrules")
 ```
+</details>
+