diff --git a/.DS_Store b/.DS_Store index c2579a6..fbc356d 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.Rbuildignore b/.Rbuildignore index e840dee..c75e4c9 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,3 +6,4 @@ docs ^README\.Rmd$ ^\.github$ +^vignettes/articles$ diff --git a/DESCRIPTION b/DESCRIPTION index 7f38cc5..f3623e9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: tidyrules Type: Package Title: Utilities to Retrieve Rulelists from Model Fits, Filter, Prune, Reorder and Predict on unseen data -Version: 0.2.6 +Version: 0.2.7 Authors@R: c( person("Srikanth", "Komala Sheshachala", email = "sri.teach@gmail.com", role = c("aut", "cre")), person("Amith Kumar", "Ullur Raghavendra", email = "amith54@gmail.com", role = c("aut")) @@ -24,6 +24,7 @@ Imports: glue (>= 1.7.0), pheatmap (>= 1.0.12), proxy (>= 0.4.27), + tibble (>= 3.2.1), Suggests: AmesHousing (>= 0.0.3), dplyr (>= 0.8), @@ -35,16 +36,14 @@ Suggests: testthat (>= 2.0.1), MASS (>= 7.3.50), mlbench (>= 2.1.1), - knitr (>= 1.23), rmarkdown (>= 1.13), palmerpenguins (>= 0.1.1), Description: Provides a framework to work with decision rules. Rules can be extracted from supported models, augmented with (custom) metrics using validation data, manipulated using standard dataframe operations, reordered and pruned based on a metric, predict on unseen (test) data. Utilities include; Creating a rulelist manually, Exporting a rulelist as a SQL case statement and so on. The package offers two classes; rulelist and rulelset based on dataframe. -URL: https://github.com/talegari/tidyrules +URL: https://github.com/talegari/tidyrules, https://talegari.github.io/tidyrules/ BugReports: https://github.com/talegari/tidyrules/issues License: GPL-3 Encoding: UTF-8 LazyData: true RoxygenNote: 7.3.1 -VignetteBuilder: knitr Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index b39b6d1..93f001d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,6 +40,7 @@ importFrom(magrittr,"%>%") importFrom(rlang,"%||%") importFrom(stats,IQR) importFrom(stats,predict) +importFrom(stats,reorder) importFrom(stats,runif) importFrom(stats,weighted.mean) importFrom(tidytable,across) @@ -64,6 +65,7 @@ importFrom(tidytable,select) importFrom(tidytable,slice) importFrom(tidytable,summarise) importFrom(tidytable,unnest) +importFrom(utils,capture.output) importFrom(utils,data) importFrom(utils,head) importFrom(utils,tail) diff --git a/NEWS.md b/NEWS.md index 35b105b..582981b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# tidyrules 0.2.7 + +- Major rewrite of tidyrules + - rulelist class introduced with many methods, mainly `predict` + - breaking change: `tidyRules` function no longer exists! + - Support added to `party` models + # tidyrules 0.1.5 - Maintenance release (replace package rsample with modeldata) diff --git a/R/dev_mindmap.R b/R/dev_mindmap.R deleted file mode 100644 index e710b6f..0000000 --- a/R/dev_mindmap.R +++ /dev/null @@ -1,44 +0,0 @@ -################################################################################ -# This is the part of the 'tidyrules' R package hosted at -# https://github.com/talegari/tidyrules with GPL-3 license. -################################################################################ - -# Structure -# -# Model/fitted object to rules should happens via 'tidy' call -# We get the generic from generics::tidy -# Rules object will be one among: ruleset/rulelist. -# This is a wrapper over tidytable/dataframe. -# -# Methods for rulelist/set: print, predict, augment -# At high level, predict returns the rule_nbr for a row_nbr in new_data -# At high level, augment (TODO) returns some metrics on new_data as new column -# -# Models: -# -# C5 -# - (rulelist when fitted with rules = TRUE) -- implemented -# - (ruleset when fitted with rules = FALSE) -- NOT implemented -# -# rpart -# - (ruleset with classification aka class) -- implemented -# - (ruleset with regression aka anova) -- implemented -# - (ruleset with poisson) -- NOT implemented -# - (ruleset with survival) -- NOT implemented -# - (ruleset with exp) -- NOT implemented -# - (ruleset with used defined split) -- NOT implemented -# -# party -# - (ruleset with classification) -- NOT implemented -# - (ruleset with regression) -- NOT implemented -# - (ruleset with survival) -- NOT implemented -# - (ruleset with used defined split) -- NOT implemented -# -# cubist -# - (ruleset with regression) -- implemented -# -# ranger -# - (rulelist) -- NOT implemented -# -# sirus -# - (ruleset ??) -- NOT implemented \ No newline at end of file diff --git a/R/package.R b/R/package.R index 2ba6858..caacc5e 100644 --- a/R/package.R +++ b/R/package.R @@ -51,6 +51,7 @@ #' @importFrom stats runif #' @importFrom utils head #' @importFrom utils tail +#' @importFrom utils capture.output #' "_PACKAGE" diff --git a/R/rulelist.R b/R/rulelist.R index 141f203..02486e8 100644 --- a/R/rulelist.R +++ b/R/rulelist.R @@ -343,8 +343,12 @@ set_validation_data = function(x, validation_data, y_name, weight = 1){ res = rlang::duplicate(x) - checkmate::assert_data_frame(validation_data) - attr(res, "validation_data") = data.table::as.data.table(validation_data) + checkmate::assert_data_frame(validation_data, null.ok = TRUE) + if (!is.null(validation_data)) { + attr(res, "validation_data") = + data.table::as.data.table(validation_data) + } + attr(res, "y_name") = y_name attr(res, "weight") = weight @@ -376,48 +380,86 @@ print.rulelist = function(x, banner = TRUE, ...){ model_type = attr(rulelist, "model_type") validation_data = attr(rulelist, "validation_data") + text = character(0) if (banner) { - cli::cli_rule(left = "Rulelist") - cli::cli_text("") + text = c(text, "---- Rulelist --------------------------------") } if (is.null(keys)) { - cli::cli_alert_info("{.emph Keys}: {.strong NULL}") + text = c(text, + paste(cli::symbol$play, + "Keys: NULL" + ) + ) } else { - cli::cli_alert_info("{.emph keys}: {.val {keys}}") + text = c(text, + paste(cli::symbol$play, + stringr::str_glue("Keys: {keys}") + ) + ) n_combo = nrow(distinct(select(x, all_of(keys)))) - cli::cli_alert_info("{.emph Number of distinct keys}: {.val {n_combo}}") + text = c(text, + paste(cli::symbol$play, + stringr::str_glue("Number of distinct keys: {n_combo}") + ) + ) } - cli::cli_alert_info("{.emph Number of rules}: {.val {nrow(x)}}") + text = c(text, + paste(cli::symbol$play, + stringr::str_glue("Number of rules: {nrow(x)}") + ) + ) if (is.null(model_type)){ - cli::cli_alert_info("{.emph Model type}: {.strong NULL}") + text = c(text, + paste(cli::symbol$play, + stringr::str_glue("Model Type: NULL") + ) + ) } else { - cli::cli_alert_info("{.emph Model type}: {.val {model_type}}") + text = c(text, + paste(cli::symbol$play, + stringr::str_glue("Model type: {model_type}") + ) + ) } - if (is.null(estimation_type)){ - cli::cli_alert_info("{.emph Estimation type}: {.strong NULL}") + if (is.null(estimation_type)) { + text = c(text, + paste(cli::symbol$play, + stringr::str_glue("Estimation type: NULL") + ) + ) } else { - cli::cli_alert_info("{.emph Estimation type}: {.val {estimation_type}}") + text = c(text, + paste(cli::symbol$play, + stringr::str_glue("Estimation type: {estimation_type}") + ) + ) } - if (is.null(validation_data)){ - cli::cli_alert_warning("{.emph Is validation data set}: {.strong FALSE}") + if (is.null(validation_data)) { + text = c(text, + paste(cli::symbol$play, + stringr::str_glue("Is validation data set: FALSE") + ) + ) } else { - cli::cli_alert_success("{.emph Is validation data set}: {.strong TRUE}") + text = c(text, + paste(cli::symbol$play, + stringr::str_glue("Is validation data set: TRUE") + ) + ) } - cli::cli_text("") - - class(rulelist) = setdiff(class(rulelist), "rulelist") - # now 'rulelist' is a dataframe and not a 'rulelist' - print(rulelist, ...) + print_output = capture.output(print(tibble::as_tibble(x), ...), file = NULL) + text = c(text, "\n", utils::tail(print_output, -1)) if (banner) { - cli::cli_rule() + text = c(text, "----------------------------------------------") } + cat(paste(text, collapse = "\n")) return(invisible(x)) } @@ -592,7 +634,7 @@ predict_all_rulelist = function(rulelist, new_data){ res = rulelist %>% as.data.frame() %>% - nest(data__ = tidytable::everything(), .by = keys) %>% + nest(data__ = tidytable::everything(), .by = all_of(keys)) %>% mutate(rn_df__ = purrr::map(data__, ~ predict_all_nokeys_rulelist(.x, new_data) @@ -603,7 +645,7 @@ predict_all_rulelist = function(rulelist, new_data){ drop_na(row_nbr) %>% select(all_of(c("row_nbr", keys, "rule_nbr"))) %>% arrange(!!!rlang::syms(c("row_nbr", keys, "rule_nbr"))) %>% - nest(.by = c("row_nbr", keys), .key = "rule_nbr") %>% + nest(.by = all_of(c("row_nbr", keys)), .key = "rule_nbr") %>% mutate(rule_nbr = purrr::map(rule_nbr, ~ .x[[1]])) } @@ -685,7 +727,7 @@ predict_rulelist = function(rulelist, new_data){ res = rulelist %>% as.data.frame() %>% - nest(data__ = tidytable::everything(), .by = keys) %>% + nest(data__ = tidytable::everything(), .by = all_of(keys)) %>% mutate(rn_df__ = purrr::map(data__, ~ predict_nokeys_rulelist(.x, new_data)) ) %>% @@ -1759,10 +1801,9 @@ plot.prune_rulelist = function(x, ...) { #' @seealso [rulelist], [tidy], [augment][augment.rulelist], #' [predict][predict.rulelist], [calculate][calculate.rulelist], #' [prune][prune.rulelist], [reorder][reorder.rulelist] +#' @importFrom stats reorder #' @export -reorder = function(x, ...){ - UseMethod("reorder", x) -} +stats::reorder #' @name reorder.rulelist #' @title Reorder the rules/rows of a [rulelist] @@ -1891,7 +1932,7 @@ reorder.rulelist = function(x, rule_metrics = purrr::map_dfr(splitted, wrapper_metric_fun) ord = do.call(base::order, c(rule_metrics, - list(decreasing = minimize) + list(decreasing = !minimize) ) ) pos = which(ord == 1) diff --git a/R/ruleset.R b/R/ruleset.R index c5f2097..cbfa73b 100644 --- a/R/ruleset.R +++ b/R/ruleset.R @@ -53,18 +53,22 @@ print.ruleset = function(x, banner = TRUE, ...){ ruleset = rlang::duplicate(x) + text = character(0) if (banner) { - cli::cli_rule(left = "Ruleset") - cli::cli_text("") + text = c(text, "---- Ruleset -------------------------------") } class(ruleset) = setdiff(class(ruleset), "ruleset") - # now 'ruleset' is a rulelist - print(ruleset, banner = FALSE, ...) + text = c(text, + capture.output(print(ruleset, banner = FALSE, ...), + file = NULL + ) + ) if (banner) { - cli::cli_rule() + text = c(text, "--------------------------------------------") } + cat(paste(text, collapse = "\n")) return(invisible(x)) } diff --git a/R/utils.R b/R/utils.R index f48474b..3dc0919 100644 --- a/R/utils.R +++ b/R/utils.R @@ -312,12 +312,14 @@ convert_rule_flavor = function(rule, flavor){ } else if (flavor == "sql"){ res = rule %>% + stringr::str_replace_all("==", "=") %>% + stringr::str_replace_all("\\( ", "") %>% stringr::str_replace_all(" \\)", "") %>% stringr::str_replace_all("%in%", "IN") %>% - stringr::str_replace_all("c\\(", "[") %>% - stringr::str_replace_all("\\)", "]") %>% + stringr::str_replace_all("c\\(", "(") %>% + stringr::str_replace_all("\\)", ")") %>% stringr::str_replace_all("&", " ) AND (") %>% diff --git a/README.Rmd b/README.Rmd index 9e80aca..a04421d 100644 --- a/README.Rmd +++ b/README.Rmd @@ -11,6 +11,7 @@ knitr::opts_chunk$set( fig.path = "man/figures/README-", out.width = "100%" ) +devtools::load_all() #todo ``` # tidyrules @@ -20,9 +21,13 @@ knitr::opts_chunk$set( [![R-CMD-check](https://github.com/talegari/tidyrules/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/talegari/tidyrules/actions/workflows/R-CMD-check.yaml) -`tidyrules` converts textual rules from models to dataframes with parseable rules. Supported models are: `C5`, `cubist` and `rpart`. +> [tidyrules](https://cran.r-project.org/package=tidyrules) [R](https://www.r-project.org/) [package](https://cran.r-project.org/) provides a framework to work with decision rules. Rules can be extracted from supported models, augmented with (custom) metrics using validation data, manipulated using standard dataframe operations, reordered and pruned based on a metric, predict on unseen (test) data. Utilities include; Creating a rulelist manually, Exporting a rulelist as a SQL case statement and so on. The package offers two classes; rulelist and rulelset based on dataframe. + +![](man/figures/tidyrules_schematic.png) ## Example +
+expand/collapse ```{r example} library(tidyrules) @@ -30,26 +35,25 @@ library(tidyrules) ```{r basic C5 example} model_c5 = C50::C5.0(Species ~ ., data = iris, rules = TRUE) -summary(model_c5) -``` - -Tidy the rules: - -```{r tidyrules} -pander::pandoc.table(tidyRules(model_c5), split.tables = 120) +pander::pandoc.table(tidy(model_c5), split.tables = 120) ``` +
## Installation +
+expand/collapse You can install the released version of tidyrules from [CRAN](https://CRAN.R-project.org) with: -``` r +```{r, eval = FALSE} install.packages("tidyrules") ``` And the development version from [GitHub](https://github.com/) with: -``` r +```{r, eval = FALSE} # install.packages("devtools") devtools::install_github("talegari/tidyrules") ``` +
+ diff --git a/README.md b/README.md index 8a4d5db..d2cceb3 100644 --- a/README.md +++ b/README.md @@ -7,103 +7,61 @@ [![Build Status](https://travis-ci.org/talegari/tidyrules.svg?branch=master)](https://travis-ci.org/talegari/tidyrules) -[![CRAN\_Status\_Badge](https://www.r-pkg.org/badges/version/tidyrules)](https://cran.r-project.org/package=tidyrules) +[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/tidyrules)](https://cran.r-project.org/package=tidyrules) +[![R-CMD-check](https://github.com/talegari/tidyrules/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/talegari/tidyrules/actions/workflows/R-CMD-check.yaml) -`tidyrules` converts texual rules from models to dataframes with -parseable rules. Supported models are: `C5`, `cubist` and `rpart`. +> [tidyrules](https://cran.r-project.org/package=tidyrules) +> [R](https://www.r-project.org/) [package](https://cran.r-project.org/) +> provides a framework to work with decision rules. Rules can be +> extracted from supported models, augmented with (custom) metrics using +> validation data, manipulated using standard dataframe operations, +> reordered and pruned based on a metric, predict on unseen (test) data. +> Utilities include; Creating a rulelist manually, Exporting a rulelist +> as a SQL case statement and so on. The package offers two classes; +> rulelist and rulelset based on dataframe. + +![](man/figures/tidyrules_schematic.png) ## Example +
+ +expand/collapse + + ``` r library(tidyrules) ``` ``` r model_c5 = C50::C5.0(Species ~ ., data = iris, rules = TRUE) -summary(model_c5) -#> -#> Call: -#> C5.0.formula(formula = Species ~ ., data = iris, rules = TRUE) -#> -#> -#> C5.0 [Release 2.07 GPL Edition] Tue Dec 10 14:47:18 2019 -#> ------------------------------- -#> -#> Class specified by attribute `outcome' -#> -#> Read 150 cases (5 attributes) from undefined.data -#> -#> Rules: -#> -#> Rule 1: (50, lift 2.9) -#> Petal.Length <= 1.9 -#> -> class setosa [0.981] -#> -#> Rule 2: (48/1, lift 2.9) -#> Petal.Length > 1.9 -#> Petal.Length <= 4.9 -#> Petal.Width <= 1.7 -#> -> class versicolor [0.960] -#> -#> Rule 3: (46/1, lift 2.9) -#> Petal.Width > 1.7 -#> -> class virginica [0.958] -#> -#> Rule 4: (46/2, lift 2.8) -#> Petal.Length > 4.9 -#> -> class virginica [0.938] -#> -#> Default class: setosa -#> -#> -#> Evaluation on training data (150 cases): +pander::pandoc.table(tidy(model_c5), split.tables = 120) #> -#> Rules -#> ---------------- -#> No Errors +#> ---------------------------------------------------------------------------------------------- +#> rule_nbr trial_nbr LHS RHS support confidence lift +#> ---------- ----------- ---------------------------- ------------ --------- ------------ ------ +#> 1 1 ( Petal.Length <= 1.9 ) setosa 50 0.9808 2.9 #> -#> 4 4( 2.7%) << +#> 2 1 ( Petal.Length > 1.9 ) & ( versicolor 48 0.96 2.9 +#> Petal.Length <= 4.9 ) & ( +#> Petal.Width <= 1.7 ) #> +#> 3 1 ( Petal.Width > 1.7 ) virginica 46 0.9583 2.9 #> -#> (a) (b) (c) <-classified as -#> ---- ---- ---- -#> 50 (a): class setosa -#> 47 3 (b): class versicolor -#> 1 49 (c): class virginica -#> -#> -#> Attribute usage: -#> -#> 96.00% Petal.Length -#> 62.67% Petal.Width -#> -#> -#> Time: 0.0 secs +#> 4 1 ( Petal.Length > 4.9 ) virginica 46 0.9375 2.8 +#> ---------------------------------------------------------------------------------------------- ``` -Tidy the rules: - -``` r -pander::pandoc.table(tidyRules(model_c5), split.tables = 120) -#> -#> ---------------------------------------------------------------------------------------------------- -#> id LHS RHS support confidence lift rule_number trial_number -#> ---- ----------------------- ------------ --------- ------------ ------ ------------- -------------- -#> 1 Petal.Length <= 1.9 setosa 50 0.9808 2.9 1 1 -#> -#> 2 Petal.Length > 1.9 & versicolor 48 0.96 2.9 2 1 -#> Petal.Length <= 4.9 & -#> Petal.Width <= 1.7 -#> -#> 3 Petal.Width > 1.7 virginica 46 0.9583 2.9 3 1 -#> -#> 4 Petal.Length > 4.9 virginica 46 0.9375 2.8 4 1 -#> ---------------------------------------------------------------------------------------------------- -``` +
## Installation +
+ +expand/collapse + + You can install the released version of tidyrules from [CRAN](https://CRAN.R-project.org) with: @@ -117,3 +75,5 @@ And the development version from [GitHub](https://github.com/) with: # install.packages("devtools") devtools::install_github("talegari/tidyrules") ``` + +
diff --git a/docs/404.html b/docs/404.html index ec0611c..11667e4 100644 --- a/docs/404.html +++ b/docs/404.html @@ -1,66 +1,27 @@ - - - - + + + + - Page not found (404) • tidyrules - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + - - - - -
-
- + +
+ + + - - -
+
+
-
+ + - - diff --git a/docs/articles/index.html b/docs/articles/index.html index bf49217..91b2bf6 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -1,66 +1,12 @@ - - - - - - - -Articles • tidyrules - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Articles • tidyrules - + + - - - -
-
- -
- -
+
+
Using tidyrules
+
+
-
- - + + diff --git a/docs/articles/tidyrules_schematic.png b/docs/articles/tidyrules_schematic.png new file mode 100644 index 0000000..35eaed1 Binary files /dev/null and b/docs/articles/tidyrules_schematic.png differ diff --git a/docs/articles/using_tidyrules.html b/docs/articles/using_tidyrules.html new file mode 100644 index 0000000..9491d23 --- /dev/null +++ b/docs/articles/using_tidyrules.html @@ -0,0 +1,680 @@ + + + + + + + +Using tidyrules • tidyrules + + + + + + + + + + + + +
+
+ + + + +
+
+ + + + +
+

Abstract +

+
+

tidyrules +R package provides a framework to +work with decision rules. Rules can be extracted from supported models, +augmented with (custom) metrics using validation data, manipulated using +standard dataframe operations, reordered and pruned based on a metric, +predict on unseen (test) data. Utilities include; Creating a rulelist +manually, Exporting a rulelist as a SQL case statement and so on. The +package offers two classes; rulelist and rulelset based on +dataframe.

+
+
+
+

Schematic +

+

+
+
+

About +

+

This document provides a working example of a classification problem +where the functionality of package is showcased. We use +modeldata::attrition dataset where Attrition +column is the binary dependent variable.

+ +
+att = modeldata::attrition
+set.seed(1)
+valid_index = sample(c(TRUE, FALSE), nrow(att), replace = TRUE)
+att_train = att[!valid_index, ] # nrow: 742
+att_valid = att[valid_index, ]  # nrow: 728
+glimpse(att)
+
## Rows: 1,470
+## Columns: 31
+## $ Age                      <int> 41, 49, 37, 33, 27, 32, 59, 30, 38, 36, 35, 2…
+## $ Attrition                <fct> Yes, No, Yes, No, No, No, No, No, No, No, No,…
+## $ BusinessTravel           <fct> Travel_Rarely, Travel_Frequently, Travel_Rare…
+## $ DailyRate                <int> 1102, 279, 1373, 1392, 591, 1005, 1324, 1358,…
+## $ Department               <fct> Sales, Research_Development, Research_Develop…
+## $ DistanceFromHome         <int> 1, 8, 2, 3, 2, 2, 3, 24, 23, 27, 16, 15, 26, …
+## $ Education                <ord> College, Below_College, College, Master, Belo…
+## $ EducationField           <fct> Life_Sciences, Life_Sciences, Other, Life_Sci…
+## $ EnvironmentSatisfaction  <ord> Medium, High, Very_High, Very_High, Low, Very…
+## $ Gender                   <fct> Female, Male, Male, Female, Male, Male, Femal…
+## $ HourlyRate               <int> 94, 61, 92, 56, 40, 79, 81, 67, 44, 94, 84, 4…
+## $ JobInvolvement           <ord> High, Medium, Medium, High, High, High, Very_…
+## $ JobLevel                 <int> 2, 2, 1, 1, 1, 1, 1, 1, 3, 2, 1, 2, 1, 1, 1, …
+## $ JobRole                  <fct> Sales_Executive, Research_Scientist, Laborato…
+## $ JobSatisfaction          <ord> Very_High, Medium, High, High, Medium, Very_H…
+## $ MaritalStatus            <fct> Single, Married, Single, Married, Married, Si…
+## $ MonthlyIncome            <int> 5993, 5130, 2090, 2909, 3468, 3068, 2670, 269…
+## $ MonthlyRate              <int> 19479, 24907, 2396, 23159, 16632, 11864, 9964…
+## $ NumCompaniesWorked       <int> 8, 1, 6, 1, 9, 0, 4, 1, 0, 6, 0, 0, 1, 0, 5, …
+## $ OverTime                 <fct> Yes, No, Yes, Yes, No, No, Yes, No, No, No, N…
+## $ PercentSalaryHike        <int> 11, 23, 15, 11, 12, 13, 20, 22, 21, 13, 13, 1…
+## $ PerformanceRating        <ord> Excellent, Outstanding, Excellent, Excellent,…
+## $ RelationshipSatisfaction <ord> Low, Very_High, Medium, High, Very_High, High…
+## $ StockOptionLevel         <int> 0, 1, 0, 0, 1, 0, 3, 1, 0, 2, 1, 0, 1, 1, 0, …
+## $ TotalWorkingYears        <int> 8, 10, 7, 8, 6, 8, 12, 1, 10, 17, 6, 10, 5, 3…
+## $ TrainingTimesLastYear    <int> 0, 3, 3, 3, 3, 2, 3, 2, 2, 3, 5, 3, 1, 2, 4, …
+## $ WorkLifeBalance          <ord> Bad, Better, Better, Better, Better, Good, Go…
+## $ YearsAtCompany           <int> 6, 10, 0, 8, 2, 7, 1, 1, 9, 7, 5, 9, 5, 2, 4,…
+## $ YearsInCurrentRole       <int> 4, 7, 0, 7, 2, 7, 0, 0, 7, 7, 4, 5, 2, 2, 2, …
+## $ YearsSinceLastPromotion  <int> 0, 1, 0, 3, 2, 3, 0, 0, 1, 7, 0, 0, 4, 1, 0, …
+## $ YearsWithCurrManager     <int> 5, 7, 0, 0, 2, 6, 0, 0, 8, 7, 3, 8, 3, 2, 3, …
+
+
+

Tidy +

+

tidy generic creates rulelist from a +supported model fit. rulelist class is fundamental data +structure which offers many methods such as predict, +augment and so on. A rulelist is a dataframe +with some extra attributes. The order of rows of the dataframe defines +the order of preference of rules.

+

tidy supports these model fits:

+
    +
  • +C5 rule-based model (classification)
  • +
  • +rpart tree (classification / regression)
  • +
  • +party tree (classification / regression)
  • +
  • +cubist tree (regression)
  • +
+

Lets build a C5 model and then extract a rulelist:

+
+model_c5 = C50::C5.0(Attrition ~., data = att_train, rules = TRUE)
+model_c5
+
## 
+## Call:
+## C5.0.formula(formula = Attrition ~ ., data = att_train, rules = TRUE)
+## 
+## Rule-Based Model
+## Number of samples: 742 
+## Number of predictors: 30 
+## 
+## Number of Rules: 19 
+## 
+## Non-standard options: attempt to group attributes
+
+tidy_c5 = tidy(model_c5)
+tidy_c5
+## ---- Rulelist --------------------------------
+## ▶ Keys: trial_nbr
+## ▶ Number of distinct keys: 1
+## ▶ Number of rules: 19
+## ▶ Model type: C5
+## ▶ Estimation type: classification
+## ▶ Is validation data set: FALSE
+## 
+## 
+##    rule_nbr trial_nbr LHS                         RHS   support confidence  lift
+##       <int>     <int> <chr>                       <fct>   <int>      <dbl> <dbl>
+##  1        1         1 ( Age > 26 ) & ( Environme… No        189      0.963   1.2
+##  2        2         1 ( Age > 26 ) & ( Environme… No        244      0.951   1.1
+##  3        3         1 ( BusinessTravel == 'Non-T… No         74      0.947   1.1
+##  4        4         1 ( Age <= 31 ) & ( Educatio… Yes        12      0.929   5.4
+##  5        5         1 ( JobSatisfaction %in% c('… No        157      0.924   1.1
+##  6        6         1 ( Age > 26 ) & ( Environme… No        351      0.924   1.1
+##  7        7         1 ( EnvironmentSatisfaction … Yes         8      0.9     5.3
+##  8        8         1 ( OverTime == 'Yes' ) & ( … Yes         8      0.9     5.3
+##  9        9         1 ( BusinessTravel %in% c('T… Yes         8      0.9     5.3
+## 10       10         1 ( EnvironmentSatisfaction … Yes         7      0.889   5.2
+## 11       11         1 ( JobInvolvement == 'Low' … Yes         7      0.889   5.2
+## 12       12         1 ( OverTime == 'No' )        No        516      0.888   1.1
+## 13       13         1 ( EnvironmentSatisfaction … Yes         5      0.857   5  
+## 14       14         1 ( MaritalStatus %in% c('Ma… Yes        17      0.842   4.9
+## 15       15         1 ( NumCompaniesWorked > 6 )… Yes        10      0.833   4.9
+## 16       16         1 ( EnvironmentSatisfaction … Yes         8      0.8     4.7
+## 17       17         1 ( Age <= 26 ) & ( Environm… Yes        22      0.75    4.4
+## 18       18         1 ( EnvironmentSatisfaction … Yes         9      0.636   3.7
+## 19       19         1 ( EnvironmentSatisfaction … Yes        28      0.633   3.7
+## ----------------------------------------------
+
+
+

Rulelist +

+

A rulelist is expected to have these mandatory columns:

+
    +
  • +rule_nbr: Something that identifies a rule uniquely per +keys. Typically, an integer vector starting from 1.
  • +
  • +LHS: A character vector of R-parsable strings
  • +
  • +RHS: factor (for classification), numeric (for +regression) or character vector of R-parsable strings (to be +evaluated)
  • +
+

trial_nbr is a key. C5 model builds +multiple boosting iterations indexed by trial_nbr (default +is set to 1). rule_nbr’s start from 1 for each +trial_nbr. In general, keys columns along with +rule_nbr column should be unique.

+

Attribute estimation_type is central to further methods +where metrics get computed. At this moment, the package supports these: +classification, regression.

+

The rulelist (obtained from C5 model) ordered by +confidence column, by default.

+

A rulelist can be either created using tidy on a +supported model or a from a dataframe using +as_rulelist.

+
+

☺☺☺ rulelist is simply a dataframe with some attributes. Manipulate +them with standard dataframe operations (dplyr, +data.table …). tibble::as_tibble or +as.data.frame will convert to a tibble/dataframe (with +attributes). as_rulelist can be used to convert to a +rulelist.

+
+
+
+

Predict +

+

The mainstay of package is the predict method of the +rulelist class. predict provides the first rule (in the +order as per the rulelist) that is applicable for a observation/row in +the test data. If a row is not covered by any rule, then +rule_nbr is missing.

+
+predict(tidy_c5, att_valid)
+
## # A tibble: 728 × 3
+##    row_nbr trial_nbr rule_nbr
+##      <int>     <int>    <int>
+##  1       1         1        8
+##  2       2         1        6
+##  3       3         1        6
+##  4       4         1        6
+##  5       5         1       NA
+##  6       6         1       12
+##  7       7         1       12
+##  8       8         1        5
+##  9       9         1       12
+## 10      10         1        1
+## # ℹ 718 more rows
+
+

☺☺☺ To know all rules applicable for a row, use argument +multiple = TRUE. Alternately, predict on a +ruleset always yields all rules applicable per row.

+
+
+predict(tidy_c5, att_valid, multiple = TRUE)
+
## # A tibble: 728 × 3
+##    row_nbr trial_nbr rule_nbr 
+##      <int>     <int> <list>   
+##  1       1         1 <int [2]>
+##  2       2         1 <int [1]>
+##  3       3         1 <int [1]>
+##  4       4         1 <int [2]>
+##  5       5         1 <int [1]>
+##  6       6         1 <int [1]>
+##  7       7         1 <int [1]>
+##  8       8         1 <int [2]>
+##  9       9         1 <int [1]>
+## 10      10         1 <int [3]>
+## # ℹ 718 more rows
+
+
+

Setters +

+
    +
  • set_validation_data: Setting (or removing) +validation data adds a validation data to a rulelist which gets used for +augment, calculate and other methods.

  • +
  • set_keys: Sets (or removes) keys.

  • +
+
+tidy_c5 =
+  tidy_c5 %>%
+  set_validation_data(att_valid, y_name = "Attrition", weight = 1) %>% 
+  set_keys(NULL)
+
+tidy_c5
+
## ---- Rulelist --------------------------------
+## ▶ Keys: NULL
+## ▶ Number of rules: 19
+## ▶ Model type: C5
+## ▶ Estimation type: classification
+## ▶ Is validation data set: TRUE
+## 
+## 
+##    rule_nbr trial_nbr LHS                         RHS   support confidence  lift
+##       <int>     <int> <chr>                       <fct>   <int>      <dbl> <dbl>
+##  1        1         1 ( Age > 26 ) & ( Environme… No        189      0.963   1.2
+##  2        2         1 ( Age > 26 ) & ( Environme… No        244      0.951   1.1
+##  3        3         1 ( BusinessTravel == 'Non-T… No         74      0.947   1.1
+##  4        4         1 ( Age <= 31 ) & ( Educatio… Yes        12      0.929   5.4
+##  5        5         1 ( JobSatisfaction %in% c('… No        157      0.924   1.1
+##  6        6         1 ( Age > 26 ) & ( Environme… No        351      0.924   1.1
+##  7        7         1 ( EnvironmentSatisfaction … Yes         8      0.9     5.3
+##  8        8         1 ( OverTime == 'Yes' ) & ( … Yes         8      0.9     5.3
+##  9        9         1 ( BusinessTravel %in% c('T… Yes         8      0.9     5.3
+## 10       10         1 ( EnvironmentSatisfaction … Yes         7      0.889   5.2
+## 11       11         1 ( JobInvolvement == 'Low' … Yes         7      0.889   5.2
+## 12       12         1 ( OverTime == 'No' )        No        516      0.888   1.1
+## 13       13         1 ( EnvironmentSatisfaction … Yes         5      0.857   5  
+## 14       14         1 ( MaritalStatus %in% c('Ma… Yes        17      0.842   4.9
+## 15       15         1 ( NumCompaniesWorked > 6 )… Yes        10      0.833   4.9
+## 16       16         1 ( EnvironmentSatisfaction … Yes         8      0.8     4.7
+## 17       17         1 ( Age <= 26 ) & ( Environm… Yes        22      0.75    4.4
+## 18       18         1 ( EnvironmentSatisfaction … Yes         9      0.636   3.7
+## 19       19         1 ( EnvironmentSatisfaction … Yes        28      0.633   3.7
+## ----------------------------------------------
+
+

☺☺☺ Setting weight argument (other than 1 which means equal weigth) +leads to calculating weighted metrics.

+
+
+
+

Augment +

+

augment adds metrics related to validation data in a new +column ‘augmented_stats’.

+
+tidy_c5 %>% 
+  augment() %>% 
+  tibble::as_tibble() %>% 
+  tidytable::unnest(names_sep = "__") %>% 
+  glimpse()
+
## Rows: 19
+## Columns: 10
+## $ rule_nbr                    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,…
+## $ trial_nbr                   <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
+## $ LHS                         <chr> "( Age > 26 ) & ( EnvironmentSatisfaction …
+## $ RHS                         <fct> No, No, No, Yes, No, No, Yes, Yes, Yes, Ye…
+## $ support                     <int> 189, 244, 74, 12, 157, 351, 8, 8, 8, 7, 7,…
+## $ confidence                  <dbl> 0.9634000, 0.9512000, 0.9474000, 0.9285714…
+## $ lift                        <dbl> 1.2, 1.1, 1.1, 5.4, 1.1, 1.1, 5.3, 5.3, 5.…
+## $ augmented_stats__support    <dbl> 212, 236, 76, 13, 153, 306, 10, 17, 10, 6,…
+## $ augmented_stats__confidence <dbl> 0.89622642, 0.91101695, 0.88157895, 0.4615…
+## $ augmented_stats__lift       <dbl> 1.0285265, 1.0455004, 1.0117168, 3.5880893…
+
+

☺☺☺ If augmented metrics differ from train data metrics, then it +could indicate drift in the data!

+
+
+

☺☺☺ augment also supports custom metrics in +dplyr::summarise syntax!

+
+
+
+

Plot +

+

Plotting a rulelist as a heatmap helps in understanding these +things:

+
    +
  • Cluster of rows which are covered by same set of rules +(with hclust)
  • +
  • Cluster rules based on the common rows they *cover (with +hclust)
  • +
  • Row and column labels with dependent variable help us identify +potential outliers and potential under-fitting.
  • +
+
+plot(tidy_c5)
+

+
+

☺☺☺ distance metric for rules is jaccard and distance +metric for row clusters is euclidean. Former can be changed +to any distance supported by proxy package or a custom +distance function for custom insight!

+
+
+

☺☺☺ When you have a rulelist which is a combination of multiple +classifiers, rule clusters quickly reveal ‘correlated’ rules! The ones +which cover almost same rows, but LHS of each reads different!

+
+
+
+

Calculate +

+

calculate computes cumulative metrics (as rules are +applied in the row order) depending on attribute +estimation_type.

+
+calculate(tidy_c5)
+
## # A tibble: 19 × 4
+##    rule_nbr cumulative_coverage cumulative_overlap cumulative_accuracy
+##       <int>               <dbl>              <dbl>               <dbl>
+##  1        1                 212                  0               0.896
+##  2        2                 277                171               0.910
+##  3        3                 323                187               0.895
+##  4        4                 336                187               0.878
+##  5        5                 413                217               0.877
+##  6        6                 495                301               0.875
+##  7        7                 500                306               0.866
+##  8        8                 515                308               0.852
+##  9        9                 525                308               0.844
+## 10       10                 527                312               0.843
+## 11       11                 529                314               0.841
+## 12       12                 686                456               0.848
+## 13       13                 687                460               0.849
+## 14       14                 697                462               0.845
+## 15       15                 702                466               0.840
+## 16       16                 702                470               0.840
+## 17       17                 710                480               0.835
+## 18       18                 710                484               0.835
+## 19       19                 725                495               0.826
+
+

☺☺☺ calculate allows a custom metric of your choice!

+
+
+
+

Reorder +

+

reorder intends to reorder the order of rules. At the +moment, the greedy implementation adds one rule at a time to a new +rulelist (from the input rulelist) such that a metric (see +calculate) is maximixed/minimized.

+

Suppose, you wanted to find a smaller ruleset with least overlap that +would still cover 80% of the validation_data. Then,

+
+reorder(tidy_c5,
+        metric = c("cumulative_overlap",
+                   "cumulative_coverage",
+                   "cumulative_accuracy"
+                   ),
+        minimize = TRUE
+        ) %>% 
+  mutate(rel_cum_overlap = 
+           cumulative_overlap / max(cumulative_overlap),
+         rel_cum_coverage = 
+           cumulative_coverage / max(cumulative_coverage)
+         ) %>% 
+  select(rule_nbr, LHS, RHS,
+         rel_cum_overlap, rel_cum_coverage,
+         cumulative_accuracy
+         )
+
## ⠙ Reordering ... 4 done (1.4/s) | 2.8s
+
## ⠹ Reordering ... 5 done (1.3/s) | 3.7s
+
## ⠸ Reordering ... 8 done (1.1/s) | 7s
+
## ⠼ Reordering ... 11 done (1.1/s) | 10.4s
+
## ⠴ Reordering ... 14 done (1/s) | 13.4s
+
## ⠦ Reordering ... 18 done (1.1/s) | 15.9s
+
## ⠦ Reordering ... 19 done (1.2/s) | 16.1s
+
## 
+
## ---- Rulelist --------------------------------
+## ▶ Keys: NULL
+## ▶ Number of rules: 19
+## ▶ Model type: C5
+## ▶ Estimation type: classification
+## ▶ Is validation data set: TRUE
+## 
+## 
+##    rule_nbr LHS       RHS   rel_cum_overlap rel_cum_coverage cumulative_accuracy
+##       <int> <chr>     <fct>           <dbl>            <dbl>               <dbl>
+##  1       16 ( Enviro… Yes            0                0.0124               0.556
+##  2        9 ( Busine… Yes            0                0.0262               0.474
+##  3        8 ( OverTi… Yes            0                0.0497               0.444
+##  4        4 ( Age <=… Yes            0                0.0676               0.449
+##  5        3 ( Busine… No             0.0101           0.166                0.708
+##  6       15 ( NumCom… Yes            0.0141           0.175                0.677
+##  7       14 ( Marita… Yes            0.0222           0.189                0.672
+##  8       13 ( Enviro… Yes            0.0242           0.196                0.655
+##  9       12 ( OverTi… No             0.190            0.819                0.854
+## 10       17 ( Age <=… Yes            0.208            0.832                0.846
+## 11       11 ( JobInv… Yes            0.214            0.833                0.844
+## 12       10 ( Enviro… Yes            0.220            0.836                0.843
+## 13        7 ( Enviro… Yes            0.232            0.836                0.843
+## 14        6 ( Age > … No             0.638            0.942                0.837
+## 15       18 ( Enviro… Yes            0.646            0.943                0.836
+## 16        5 ( JobSat… No             0.780            0.950                0.837
+## 17        2 ( Age > … No             0.927            0.972                0.833
+## 18       19 ( Enviro… Yes            0.949            0.993                0.824
+## 19        1 ( Age > … No             1                1                    0.822
+## ----------------------------------------------
+

we infer that first 9 rules (~ 20% overlap) in the reordered rulelist +would do still ensuring an accuracy of 85% !

+
+

☺☺☺ In the above code, 2nd metric onwards are used to break ties! +(similar to base::order)

+
+
+

☺☺☺ Reordering changes the decision bourdaries of your fit! It is a +post-hoc method to overlap the learnt rules to optimize for the +metric you need! But remember, greedy optimization method does guarantee +the global minima (maxima)!

+
+
+

☺☺☺ reorder comes with a init = k argument +which leaves a predecided top k rules in their order and reorders only +bottom ones. This might be required when policy layer needs to be +incorporated into the rule engine!

+
+
+
+

Prune +

+

prune suggests k th rule to stop at based on some +stopping criteria.

+

Suppose, we seek to find a smaller rulelist with maximum possible +accuracy with a minimum (relative) coverage of 70% and (relative) +overlap not more than half the number of rows. Then,

+
+prune_suggestion = 
+  reorder(tidy_c5, "cumulative_accuracy", minimize = FALSE) %>% 
+  prune(stop_expr_string = "relative__cumulative_coverage >= 0.7 & cumulative_overlap <= 728/2")
+
## ⠙ Reordering ... 4 done (1.5/s) | 2.6s
+
## ⠹ Reordering ... 7 done (1.2/s) | 5.8s
+
## ⠸ Reordering ... 10 done (1.1/s) | 9.2s
+
## ⠼ Reordering ... 13 done (1/s) | 12.4s
+
## ⠴ Reordering ... 16 done (1.1/s) | 15s
+
## ⠴ Reordering ... 19 done (1.2/s) | 16.1s
+
## 
+
+prune_suggestion
+
## ── Prune Suggestion ────────────────────────────────────────────────────────────
+
##  Keep first 4 out of 19
+
## 
+
##  Metrics after 4 rules:
+
## 
+
##  Run `plot(x)` for details; `x$pruned` to get pruned rulelist
+
## ────────────────────────────────────────────────────────────────────────────────
+
+plot(prune_suggestion)
+

+
+prune_suggestion$pruned
+
## ---- Rulelist --------------------------------
+## ▶ Keys: NULL
+## ▶ Number of rules: 4
+## ▶ Model type: C5
+## ▶ Estimation type: classification
+## ▶ Is validation data set: TRUE
+## 
+## 
+##   rule_nbr trial_nbr LHS      RHS   support confidence  lift cumulative_accuracy
+##      <int>     <int> <chr>    <fct>   <int>      <dbl> <dbl>               <dbl>
+## 1        4         1 ( Age <… Yes        12      0.929   5.4               0.462
+## 2        5         1 ( JobSa… No        157      0.924   1.1               0.865
+## 3        6         1 ( Age >… No        351      0.924   1.1               0.886
+## 4       12         1 ( OverT… No        516      0.888   1.1               0.883
+## ----------------------------------------------
+
+

☺☺☺ prune is powerful when combined with +reorder! While reorder chases a metric, +prune takes care of constraints! This might lead to small +rulelists, very good for explainability!

+
+
+
+

Out in the Wild +

+

Use to_sql_case to get SQL case when code chunk from a +rulelist.

+
+to_sql_case(head(tidy_c5, 5))
+
CASE
+WHEN (Age > 26)
+    AND (EnvironmentSatisfaction IN ( 'Medium', 'High', 'Very_High' ))
+    AND (PercentSalaryHike <= 17)
+    AND (StockOptionLevel > 0)
+    AND (StockOptionLevel <= 2)
+    AND (TotalWorkingYears > 2) THEN
+   'No'
+WHEN (Age > 26)
+    AND (EnvironmentSatisfaction IN ( 'Medium', 'High', 'Very_High' ))
+    AND (StockOptionLevel > 0)
+    AND (YearsAtCompany > 3) THEN
+   'No'
+WHEN (BusinessTravel = 'Non-Travel') THEN
+   'No'
+WHEN (Age <= 31)
+    AND (EducationField = 'Technical_Degree')
+    AND (StockOptionLevel <= 0) THEN
+   'Yes'
+WHEN (JobSatisfaction IN ( 'Low', 'Medium', 'High' ))
+    AND (MonthlyIncome > 3210)
+    AND (RelationshipSatisfaction IN ( 'Medium', 'High', 'Very_High' ))
+    AND (TrainingTimesLastYear > 2) THEN
+   'No'
+ELSE
+   NULL
+END AS output
+
+
+

More +

+
    +
  • We will add tidy support to more models. Your +contributions are welcome!
  • +
  • Ideas for methods are welcome!
  • +
+

✔ `For dev and issues, reach us at http://github.com/talegari/tidyrules

+

✔ ‘master’ branch always holds the ‘tested’ dev code!

+

✔ Get the latest stable version from CRAN!

+

Yours truly,
+Amith (ಅಮಿತ್) and Srikanth (ಶ್ರೀಕಾಂತ)

+
+
+ + + +
+ + + + +
+ + + + + + + + diff --git a/docs/articles/using_tidyrules_files/figure-html/unnamed-chunk-12-1.png b/docs/articles/using_tidyrules_files/figure-html/unnamed-chunk-12-1.png new file mode 100644 index 0000000..ce172d1 Binary files /dev/null and b/docs/articles/using_tidyrules_files/figure-html/unnamed-chunk-12-1.png differ diff --git a/docs/articles/using_tidyrules_files/figure-html/unnamed-chunk-16-1.png b/docs/articles/using_tidyrules_files/figure-html/unnamed-chunk-16-1.png new file mode 100644 index 0000000..30d5a64 Binary files /dev/null and b/docs/articles/using_tidyrules_files/figure-html/unnamed-chunk-16-1.png differ diff --git a/docs/authors.html b/docs/authors.html index 75c5bcc..dba7cdd 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -1,66 +1,12 @@ - - - - - - - -Authors • tidyrules - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Authors and Citation • tidyrules + + - - - - - -
-
-
- -
+
- @@ -142,22 +90,20 @@

Authors

-
- +
- - + + diff --git a/docs/index.html b/docs/index.html index 9fcd74d..4f4eb92 100644 --- a/docs/index.html +++ b/docs/index.html @@ -5,20 +5,22 @@ -Obtain Rules from Rule Based Models as Tidy Dataframe • tidyrules - +Utilities to Retrieve Rulelists from Model Fits, Filter, Prune, Reorder and Predict on unseen data • tidyrules + - - + + + +
-
- +
+ -

tidyrules converts texual rules from models to dataframes with parseable rules. Supported models are: C5, cubist and rpart.

-
-

-Example

-
library(tidyrules)
-
model_c5 = C50::C5.0(Species ~ ., data = iris, rules = TRUE)
-summary(model_c5)
-#> 
-#> Call:
-#> C5.0.formula(formula = Species ~ ., data = iris, rules = TRUE)
-#> 
-#> 
-#> C5.0 [Release 2.07 GPL Edition]      Tue Dec 10 14:47:18 2019
-#> -------------------------------
-#> 
-#> Class specified by attribute `outcome'
-#> 
-#> Read 150 cases (5 attributes) from undefined.data
-#> 
-#> Rules:
-#> 
-#> Rule 1: (50, lift 2.9)
-#>  Petal.Length <= 1.9
-#>  ->  class setosa  [0.981]
-#> 
-#> Rule 2: (48/1, lift 2.9)
-#>  Petal.Length > 1.9
-#>  Petal.Length <= 4.9
-#>  Petal.Width <= 1.7
-#>  ->  class versicolor  [0.960]
-#> 
-#> Rule 3: (46/1, lift 2.9)
-#>  Petal.Width > 1.7
-#>  ->  class virginica  [0.958]
-#> 
-#> Rule 4: (46/2, lift 2.8)
-#>  Petal.Length > 4.9
-#>  ->  class virginica  [0.938]
-#> 
-#> Default class: setosa
-#> 
-#> 
-#> Evaluation on training data (150 cases):
-#> 
-#>          Rules     
-#>    ----------------
-#>      No      Errors
-#> 
-#>       4    4( 2.7%)   <<
-#> 
-#> 
-#>     (a)   (b)   (c)    <-classified as
-#>    ----  ----  ----
-#>      50                (a): class setosa
-#>            47     3    (b): class versicolor
-#>             1    49    (c): class virginica
-#> 
-#> 
-#>  Attribute usage:
-#> 
-#>   96.00% Petal.Length
-#>   62.67% Petal.Width
-#> 
-#> 
-#> Time: 0.0 secs
-

Tidy the rules:

-
pander::pandoc.table(tidyRules(model_c5), split.tables = 120)
-#> 
-#> ----------------------------------------------------------------------------------------------------
-#>  id            LHS               RHS       support   confidence   lift   rule_number   trial_number 
-#> ---- ----------------------- ------------ --------- ------------ ------ ------------- --------------
-#>  1     Petal.Length <= 1.9      setosa       50        0.9808     2.9         1             1       
-#> 
-#>  2    Petal.Length > 1.9 &    versicolor     48         0.96      2.9         2             1       
-#>       Petal.Length <= 4.9 &                                                                         
-#>        Petal.Width <= 1.7                                                                           
-#> 
-#>  3      Petal.Width > 1.7     virginica      46        0.9583     2.9         3             1       
-#> 
-#>  4     Petal.Length > 4.9     virginica      46        0.9375     2.8         4             1       
-#> ----------------------------------------------------------------------------------------------------
+
+

tidyrules R package provides a framework to work with decision rules. Rules can be extracted from supported models, augmented with (custom) metrics using validation data, manipulated using standard dataframe operations, reordered and pruned based on a metric, predict on unseen (test) data. Utilities include; Creating a rulelist manually, Exporting a rulelist as a SQL case statement and so on. The package offers two classes; rulelist and rulelset based on dataframe.

+
+

+
+

Example +

+
+expand/collapse + +
+model_c5 = C50::C5.0(Species ~ ., data = iris, rules = TRUE)
+pander::pandoc.table(tidy(model_c5), split.tables = 120)
+#> 
+#> ----------------------------------------------------------------------------------------------
+#>  rule_nbr   trial_nbr              LHS                  RHS       support   confidence   lift 
+#> ---------- ----------- ---------------------------- ------------ --------- ------------ ------
+#>     1           1        ( Petal.Length <= 1.9 )       setosa       50        0.9808     2.9  
+#> 
+#>     2           1       ( Petal.Length > 1.9 ) & (   versicolor     48         0.96      2.9  
+#>                         Petal.Length <= 4.9 ) & (                                             
+#>                            Petal.Width <= 1.7 )                                               
+#> 
+#>     3           1         ( Petal.Width > 1.7 )      virginica      46        0.9583     2.9  
+#> 
+#>     4           1         ( Petal.Length > 4.9 )     virginica      46        0.9375     2.8  
+#> ----------------------------------------------------------------------------------------------
+
-
-

-Installation

-

You can install the released version of tidyrules from CRAN with:

-
install.packages("tidyrules")
-

And the development version from GitHub with:

-
# install.packages("devtools")
-devtools::install_github("talegari/tidyrules")
+
+

Installation +

+
+expand/collapse +

You can install the released version of tidyrules from CRAN with:

+
+install.packages("tidyrules")
+

And the development version from GitHub with:

+
+# install.packages("devtools")
+devtools::install_github("talegari/tidyrules")
+
-

Site built with pkgdown 1.5.1.

+

+

Site built with pkgdown 2.0.9.

@@ -231,5 +191,7 @@

Dev status

+ + diff --git a/docs/news/index.html b/docs/news/index.html index 87c88db..0c0b38d 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -1,66 +1,12 @@ - - - - - - - -Changelog • tidyrules - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Changelog • tidyrules - - + + - - -
-
- -
- -
+
-
-

-tidyrules 0.1.5 Unreleased -

-
    -
  • Maintenance release (replace package rsample with modeldata)
  • -
-
-
-

-tidyrules 0.1.4 Unreleased -

-
    -
  • Added rules parsable in python and SQL (default: R)
  • -
-
-
-

-tidyrules 0.1.3 2020-01-30 -

-
    -
  • Rules for rpart regression model (issue)
  • -
-
-
-

-tidyrules 0.1.2 Unreleased -

-
    -
  • Default option to compute confidence for C5 models is now implemented with laplace correction (issue)
  • -
-
+
+ +
  • Major rewrite of tidyrules +
    • rulelist class introduced with many methods, mainly predict +
    • +
    • breaking change: tidyRules function no longer exists!
    • +
    • Support added to party models
    • +
  • +
+
+ +
  • Maintenance release (replace package rsample with modeldata)
  • +
+
+ +
  • Added rules parsable in python and SQL (default: R)
  • +
+
+ +
  • Rules for rpart regression model (issue)
  • +
+
+ +
  • Default option to compute confidence for C5 models is now implemented with laplace correction (issue)
  • +
+
-
- +
- - + + diff --git a/docs/pkgdown.css b/docs/pkgdown.css index c01e592..80ea5b8 100644 --- a/docs/pkgdown.css +++ b/docs/pkgdown.css @@ -56,8 +56,10 @@ img.icon { float: right; } -img { +/* Ensure in-page images don't run outside their container */ +.contents img { max-width: 100%; + height: auto; } /* Fix bug in bootstrap (only seen in firefox) */ @@ -78,11 +80,10 @@ dd { /* Section anchors ---------------------------------*/ a.anchor { - margin-left: -30px; - display:inline-block; - width: 30px; - height: 30px; - visibility: hidden; + display: none; + margin-left: 5px; + width: 20px; + height: 20px; background-image: url(./link.svg); background-repeat: no-repeat; @@ -90,17 +91,15 @@ a.anchor { background-position: center center; } -.hasAnchor:hover a.anchor { - visibility: visible; -} - -@media (max-width: 767px) { - .hasAnchor:hover a.anchor { - visibility: hidden; - } +h1:hover .anchor, +h2:hover .anchor, +h3:hover .anchor, +h4:hover .anchor, +h5:hover .anchor, +h6:hover .anchor { + display: inline-block; } - /* Fixes for fixed navbar --------------------------*/ .contents h1, .contents h2, .contents h3, .contents h4 { @@ -244,14 +243,14 @@ nav[data-toggle='toc'] .nav .nav > .active:focus > a { .ref-index th {font-weight: normal;} -.ref-index td {vertical-align: top;} +.ref-index td {vertical-align: top; min-width: 100px} .ref-index .icon {width: 40px;} .ref-index .alias {width: 40%;} .ref-index-icons .alias {width: calc(40% - 40px);} .ref-index .title {width: 60%;} .ref-arguments th {text-align: right; padding-right: 10px;} -.ref-arguments th, .ref-arguments td {vertical-align: top;} +.ref-arguments th, .ref-arguments td {vertical-align: top; min-width: 100px} .ref-arguments .name {width: 20%;} .ref-arguments .desc {width: 80%;} @@ -264,31 +263,26 @@ table { /* Syntax highlighting ---------------------------------------------------- */ -pre { - word-wrap: normal; - word-break: normal; - border: 1px solid #eee; -} - -pre, code { +pre, code, pre code { background-color: #f8f8f8; color: #333; } +pre, pre code { + white-space: pre-wrap; + word-break: break-all; + overflow-wrap: break-word; +} -pre code { - overflow: auto; - word-wrap: normal; - white-space: pre; +pre { + border: 1px solid #eee; } -pre .img { +pre .img, pre .r-plt { margin: 5px 0; } -pre .img img { +pre .img img, pre .r-plt img { background-color: #fff; - display: block; - height: auto; } code a, pre a { @@ -305,9 +299,8 @@ a.sourceLine:hover { .kw {color: #264D66;} /* keyword */ .co {color: #888888;} /* comment */ -.message { color: black; font-weight: bolder;} -.error { color: orange; font-weight: bolder;} -.warning { color: #6A0366; font-weight: bolder;} +.error {font-weight: bolder;} +.warning {font-weight: bolder;} /* Clipboard --------------------------*/ @@ -365,3 +358,27 @@ mark { content: ""; } } + +/* Section anchors --------------------------------- + Added in pandoc 2.11: https://github.com/jgm/pandoc-templates/commit/9904bf71 +*/ + +div.csl-bib-body { } +div.csl-entry { + clear: both; +} +.hanging-indent div.csl-entry { + margin-left:2em; + text-indent:-2em; +} +div.csl-left-margin { + min-width:2em; + float:left; +} +div.csl-right-inline { + margin-left:2em; + padding-left:1em; +} +div.csl-indent { + margin-left: 2em; +} diff --git a/docs/pkgdown.js b/docs/pkgdown.js index 7e7048f..6f0eee4 100644 --- a/docs/pkgdown.js +++ b/docs/pkgdown.js @@ -80,7 +80,7 @@ $(document).ready(function() { var copyButton = ""; - $(".examples, div.sourceCode").addClass("hasCopyButton"); + $("div.sourceCode").addClass("hasCopyButton"); // Insert copy buttons: $(copyButton).prependTo(".hasCopyButton"); @@ -91,7 +91,7 @@ // Initialize clipboard: var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { text: function(trigger) { - return trigger.parentNode.textContent; + return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); } }); diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 267be8e..7ed8b17 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,7 +1,7 @@ -pandoc: 2.7.3 -pkgdown: 1.5.1 +pandoc: 3.1.11 +pkgdown: 2.0.9 pkgdown_sha: ~ articles: - tidyrules_vignette: tidyrules_vignette.html -last_built: 2020-06-04T12:05Z + using_tidyrules: using_tidyrules.html +last_built: 2024-06-27T19:57Z diff --git a/docs/reference/Rplot001.png b/docs/reference/Rplot001.png new file mode 100644 index 0000000..17a3580 Binary files /dev/null and b/docs/reference/Rplot001.png differ diff --git a/docs/reference/Rplot002.png b/docs/reference/Rplot002.png new file mode 100644 index 0000000..8cda292 Binary files /dev/null and b/docs/reference/Rplot002.png differ diff --git a/docs/reference/Rplot003.png b/docs/reference/Rplot003.png new file mode 100644 index 0000000..a039152 Binary files /dev/null and b/docs/reference/Rplot003.png differ diff --git a/docs/reference/addBackquotes.html b/docs/reference/addBackquotes.html index 150a26f..c9dcc37 100644 --- a/docs/reference/addBackquotes.html +++ b/docs/reference/addBackquotes.html @@ -1,67 +1,12 @@ - - - - - - - -Add backquotes — addBackquotes • tidyrules - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Add backquotes — addBackquotes • tidyrules - - + + - - -
-
- -
- -
+
@@ -132,51 +62,53 @@

Add backquotes

(vectorized) Add backquotes when a string has a space in it

-
addBackquotes(string)
- -

Arguments

- - - - - - -
string

character vector

+
+
addBackquotes(string)
+
-

Value

+
+

Arguments

+
string
+

character vector

-

character vector

+
+
+

Value

+ -

Examples

-
# \donttest{ -tidyrules:::addBackquotes(c("ab", "a b"))
#> [1] "ab" "`a b`"
# } +

character vector

+
-
+
+

Examples

+
# \donttest{
+tidyrules:::addBackquotes(c("ab", "a b"))
+#> [1] "ab"    "`a b`"
+# }
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/as_rulelist.data.frame.html b/docs/reference/as_rulelist.data.frame.html new file mode 100644 index 0000000..07b4103 --- /dev/null +++ b/docs/reference/as_rulelist.data.frame.html @@ -0,0 +1,157 @@ + +as_rulelist method for a data.frame — as_rulelist.data.frame • tidyrules + + +
+
+ + + +
+
+ + +
+

Convert a set of rules in a dataframe to a rulelist

+
+ +
+
# S3 method for data.frame
+as_rulelist(x, keys = NULL, model_type = NULL, estimation_type, ...)
+
+ +
+

Arguments

+
x
+

dataframe to be coerced to a rulelist

+ + +
keys
+

(character vector, default: NULL) column names which form the key

+ + +
model_type
+

(string, default: NULL) Name of the model which generated +the rules

+ + +
estimation_type
+

(string) One among: 'regression', +'classification'

+ + +
...
+

currently unused

+ +
+
+

Value

+ + +

rulelist object

+
+
+

Details

+

Input dataframe should contain these columns: rule_nbr, LHS, +RHS. Providing other inputs helps augment better.

+
+ + +
+

Examples

+
rules_df = tidytable::tidytable(rule_nbr = 1:2,
+                                LHS      = c("var_1 > 50", "var_2 < 30"),
+                                RHS      = c(2, 1)
+                                )
+as_rulelist(rules_df, estimation_type = "regression")
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 2
+#> ▶ Model Type: NULL
+#> ▶ Estimation type: regression
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>   rule_nbr LHS          RHS
+#>      <int> <chr>      <dbl>
+#> 1        1 var_1 > 50     2
+#> 2        2 var_2 < 30     1
+#> ----------------------------------------------
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/as_rulelist.html b/docs/reference/as_rulelist.html new file mode 100644 index 0000000..450a248 --- /dev/null +++ b/docs/reference/as_rulelist.html @@ -0,0 +1,117 @@ + +as_rulelist generic from tidyrules package — as_rulelist • tidyrules + + +
+
+ + + +
+
+ + +
+

as_rulelist generic

+
+ +
+
as_rulelist(x, ...)
+
+ +
+

Arguments

+
x
+

object to be coerced to a rulelist

+ + +
...
+

for methods to use

+ +
+
+

Value

+ + +

A rulelist

+ + +
+ + +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/as_ruleset.html b/docs/reference/as_ruleset.html new file mode 100644 index 0000000..e33e5a1 --- /dev/null +++ b/docs/reference/as_ruleset.html @@ -0,0 +1,137 @@ + +Get a ruleset from a rulelist — as_ruleset • tidyrules + + +
+
+ + + +
+
+ + +
+

Returns a ruleset object

+
+ +
+
as_ruleset(rulelist)
+
+ +
+

Arguments

+
rulelist
+

A rulelist

+ +
+
+

Value

+ + +

A ruleset

+ + +
+
+

See also

+ +
+ +
+

Examples

+
model_class_party = partykit::ctree(species ~ .,
+                                    data = palmerpenguins::penguins
+                                    )
+as_ruleset(tidy(model_class_party))
+#> ---- Ruleset -------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 6
+#> ▶ Model type: constparty
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>   rule_nbr LHS                   RHS   support confidence  lift terminal_node_id
+#>      <int> <chr>                 <fct>   <dbl>      <dbl> <dbl> <chr>           
+#> 1        1 ( island %in% c('Bis… Gent…     123      1      2.77 6               
+#> 2        2 ( island %in% c('Bis… Adel…      38      1      2.26 4               
+#> 3        3 ( island %in% c('Dre… Adel…     100      0.99   2.24 9               
+#> 4        4 ( island %in% c('Dre… Chin…      64      0.969  4.90 11              
+#> 5        5 ( island %in% c('Bis… Adel…       7      0.857  1.94 5               
+#> 6        6 ( island %in% c('Dre… Adel…      12      0.583  1.32 10              
+#> --------------------------------------------
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/augment.html b/docs/reference/augment.html new file mode 100644 index 0000000..d356b9c --- /dev/null +++ b/docs/reference/augment.html @@ -0,0 +1,109 @@ + +augment is re-export of generics::augment from tidyrules package — augment • tidyrules + + +
+
+ + + +
+
+ + + + +
+
augment(x, ...)
+
+ +
+

Arguments

+
x
+

A rulelist

+ + +
...
+

For methods to use

+ +
+ + +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/augment.rulelist.html b/docs/reference/augment.rulelist.html new file mode 100644 index 0000000..d95b540 --- /dev/null +++ b/docs/reference/augment.rulelist.html @@ -0,0 +1,410 @@ + +Augment a rulelist — augment.rulelist • tidyrules + + +
+
+ + + +
+
+ + +
+

augment outputs a rulelist with an additional column named +augmented_stats based on summary statistics calculated using attribute +validation_data.

+
+ +
+
# S3 method for rulelist
+augment(x, ...)
+
+ +
+

Arguments

+
x
+

A rulelist

+ + +
...
+

(expressions) To be send to tidytable::summarise for custom +aggregations. See examples.

+ +
+
+

Value

+ + +

A rulelist with a new dataframe-column named augmented_stats.

+
+
+

Details

+

The dataframe-column augmented_stats will have these columns +corresponding to the estimation_type:

  • For regression: support, IQR, RMSE

  • +
  • For classification: support, confidence, lift

  • +

along with custom aggregations.

+
+
+

See also

+ +
+ +
+

Examples

+
# Examples for augment ------------------------------------------------------
+library("magrittr")
+
+# C5 ----
+att = modeldata::attrition
+set.seed(100)
+train_index = sample(c(TRUE, FALSE), nrow(att), replace = TRUE)
+
+model_c5 = C50::C5.0(Attrition ~., data = att[train_index, ], rules = TRUE)
+tidy_c5  =
+  model_c5 %>%
+  tidy() %>%
+  set_validation_data(att[!train_index, ], "Attrition")
+
+tidy_c5
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: trial_nbr
+#> ▶ Number of distinct keys: 1
+#> ▶ Number of rules: 23
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: TRUE
+#> 
+#> 
+#>    rule_nbr trial_nbr LHS                         RHS   support confidence  lift
+#>       <int>     <int> <chr>                       <fct>   <int>      <dbl> <dbl>
+#>  1        1         1 ( Age > 30 ) & ( DistanceF… No         69      0.986   1.2
+#>  2        2         1 ( DistanceFromHome <= 12 )… No        149      0.960   1.1
+#>  3        3         1 ( Department == 'Research_… No        211      0.953   1.1
+#>  4        4         1 ( Age > 30 ) & ( DistanceF… No        249      0.948   1.1
+#>  5        5         1 ( JobInvolvement %in% c('M… No        353      0.944   1.1
+#>  6        6         1 ( OverTime == 'No' ) & ( S… No        263      0.943   1.1
+#>  7        7         1 ( Education %in% c('Master… No        101      0.942   1.1
+#>  8        8         1 ( OverTime == 'No' ) & ( R… No         95      0.938   1.1
+#>  9        9         1 ( BusinessTravel %in% c('N… No        352      0.915   1.1
+#> 10       10         1 ( Education %in% c('Below_… No        265      0.910   1.1
+#> # ℹ 13 more rows
+#> ----------------------------------------------
+
+augment(tidy_c5) %>%
+  tidytable::unnest(augmented_stats, names_sep = "__") %>%
+  tidytable::glimpse()
+#> Rows: 23
+#> Columns: 10
+#> $ rule_nbr                    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,…
+#> $ trial_nbr                   <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
+#> $ LHS                         <chr> "( Age > 30 ) & ( DistanceFromHome <= 12 )…
+#> $ RHS                         <fct> No, No, No, No, No, No, No, No, No, No, Ye…
+#> $ support                     <int> 69, 149, 211, 249, 353, 263, 101, 95, 352,…
+#> $ confidence                  <dbl> 0.9859155, 0.9603000, 0.9531000, 0.9482000…
+#> $ lift                        <dbl> 1.2, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.…
+#> $ augmented_stats__support    <dbl> 77, 122, 245, 282, 376, 305, 84, 111, 390,…
+#> $ augmented_stats__confidence <dbl> 0.9220779, 0.9098361, 0.9346939, 0.9113475…
+#> $ augmented_stats__lift       <dbl> 9.3667749, 1.0091812, 1.0367533, 1.0108577…
+
+# augment with custom aggregator
+augment(tidy_c5,output_counts = list(table(Attrition))) %>%
+  tidytable::unnest(augmented_stats, names_sep = "__") %>%
+  tidytable::glimpse()
+#> Rows: 23
+#> Columns: 11
+#> $ rule_nbr                       <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, …
+#> $ trial_nbr                      <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
+#> $ LHS                            <chr> "( Age > 30 ) & ( DistanceFromHome <= 1…
+#> $ RHS                            <fct> No, No, No, No, No, No, No, No, No, No,…
+#> $ support                        <int> 69, 149, 211, 249, 353, 263, 101, 95, 3…
+#> $ confidence                     <dbl> 0.9859155, 0.9603000, 0.9531000, 0.9482…
+#> $ lift                           <dbl> 1.2, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1,…
+#> $ augmented_stats__support       <dbl> 77, 122, 245, 282, 376, 305, 84, 111, 3…
+#> $ augmented_stats__confidence    <dbl> 0.9220779, 0.9098361, 0.9346939, 0.9113…
+#> $ augmented_stats__output_counts <list> <<table[2]>>, <<table[2]>>, <<table[2]…
+#> $ augmented_stats__lift          <dbl> 9.3667749, 1.0091812, 1.0367533, 1.0108…
+
+# rpart ----
+set.seed(100)
+train_index = sample(c(TRUE, FALSE), nrow(iris), replace = TRUE)
+
+model_class_rpart = rpart::rpart(Species ~ ., data = iris[train_index, ])
+tidy_class_rpart  = tidy(model_class_rpart) %>%
+  set_validation_data(iris[!train_index, ], "Species")
+tidy_class_rpart
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 3
+#> ▶ Model type: rpart
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: TRUE
+#> 
+#> 
+#>   rule_nbr LHS                                    RHS   support confidence  lift
+#>      <int> <chr>                                  <fct>   <int>      <dbl> <dbl>
+#> 1        1 ( Petal.Width < 1.7 ) & ( Petal.Lengt… vers…      28      0.967  2.53
+#> 2        2 ( Petal.Width < 1.7 ) & ( Petal.Lengt… seto…      21      0.957  3.46
+#> 3        3 ( Petal.Width >= 1.7 )                 virg…      27      0.931  2.72
+#> ----------------------------------------------
+
+model_regr_rpart = rpart::rpart(Sepal.Length ~ ., data = iris[train_index, ])
+tidy_regr_rpart  = tidy(model_regr_rpart) %>%
+  set_validation_data(iris[!train_index, ], "Sepal.Length")
+tidy_regr_rpart
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 5
+#> ▶ Model type: rpart
+#> ▶ Estimation type: regression
+#> ▶ Is validation data set: TRUE
+#> 
+#> 
+#>   rule_nbr LHS                                                       RHS support
+#>      <int> <chr>                                                   <dbl>   <int>
+#> 1        1 ( Petal.Length >= 4.25 ) & ( Petal.Length < 5.85 ) & (…  6.15      23
+#> 2        2 ( Petal.Length < 4.25 ) & ( Petal.Length < 1.65 )        4.99      17
+#> 3        3 ( Petal.Length < 4.25 ) & ( Petal.Length >= 1.65 )       5.6       16
+#> 4        4 ( Petal.Length >= 4.25 ) & ( Petal.Length < 5.85 ) & (…  6.53      12
+#> 5        5 ( Petal.Length >= 4.25 ) & ( Petal.Length >= 5.85 )      7.34       8
+#> ----------------------------------------------
+
+# augment (classification case)
+augment(tidy_class_rpart) %>%
+  tidytable::unnest(augmented_stats, names_sep = "__") %>%
+  tidytable::glimpse()
+#> Rows: 3
+#> Columns: 9
+#> $ rule_nbr                    <int> 1, 2, 3
+#> $ LHS                         <chr> "( Petal.Width < 1.7 ) & ( Petal.Length >=…
+#> $ RHS                         <fct> versicolor, setosa, virginica
+#> $ support                     <int> 28, 21, 27
+#> $ confidence                  <dbl> 0.9666667, 0.9565217, 0.9310345
+#> $ lift                        <dbl> 2.533333, 3.461698, 2.721485
+#> $ augmented_stats__support    <dbl> 24, 29, 21
+#> $ augmented_stats__confidence <dbl> 0.8333333, 1.0000000, 0.9523810
+#> $ augmented_stats__lift       <dbl> 2.936508, 2.551724, 3.356009
+
+# augment (regression case)
+augment(tidy_regr_rpart) %>%
+  tidytable::unnest(augmented_stats, names_sep = "__") %>%
+  tidytable::glimpse()
+#> Rows: 5
+#> Columns: 7
+#> $ rule_nbr                 <int> 1, 2, 3, 4, 5
+#> $ LHS                      <chr> "( Petal.Length >= 4.25 ) & ( Petal.Length < …
+#> $ RHS                      <dbl> 6.147826, 4.988235, 5.600000, 6.533333, 7.337…
+#> $ support                  <int> 23, 17, 16, 12, 8
+#> $ augmented_stats__support <dbl> 20, 27, 13, 9, 5
+#> $ augmented_stats__IQR     <dbl> 0.60, 0.45, 0.60, 0.50, 0.40
+#> $ augmented_stats__RMSE    <dbl> 0.5085832, 0.3548713, 0.4497863, 0.3294215, 0…
+
+# party ----
+pen = palmerpenguins::penguins %>%
+  tidytable::drop_na(bill_length_mm)
+set.seed(100)
+train_index = sample(c(TRUE, FALSE), nrow(pen), replace = TRUE)
+
+model_class_party = partykit::ctree(species ~ ., data = pen[train_index, ])
+tidy_class_party  = tidy(model_class_party) %>%
+  set_validation_data(pen[!train_index, ], "species")
+tidy_class_party
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 5
+#> ▶ Model type: constparty
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: TRUE
+#> 
+#> 
+#>   rule_nbr LHS                   RHS   support confidence  lift terminal_node_id
+#>      <int> <chr>                 <fct>   <dbl>      <dbl> <dbl> <chr>           
+#> 1        1 ( flipper_length_mm … Gent…      50      1      3.02 8               
+#> 2        2 ( flipper_length_mm … Adel…      73      0.986  2.08 3               
+#> 3        3 ( flipper_length_mm … Chin…      17      0.941  4.86 5               
+#> 4        4 ( flipper_length_mm … Chin…       7      0.714  3.69 9               
+#> 5        5 ( flipper_length_mm … Chin…      13      0.692  3.57 6               
+#> ----------------------------------------------
+
+model_regr_party =
+  partykit::ctree(bill_length_mm ~ ., data = pen[train_index, ])
+tidy_regr_party  = tidy(model_regr_party) %>%
+  set_validation_data(pen[!train_index, ], "bill_length_mm")
+tidy_regr_party
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 5
+#> ▶ Model type: constparty
+#> ▶ Estimation type: regression
+#> ▶ Is validation data set: TRUE
+#> 
+#> 
+#>   rule_nbr LHS                          RHS support   IQR  RMSE terminal_node_id
+#>      <int> <chr>                      <dbl>   <dbl> <dbl> <dbl> <chr>           
+#> 1        1 ( species %in% c('Chinstr…  51.6      13  2     1.78 9               
+#> 2        2 ( species %in% c('Adelie'…  37.3      41  2.80  1.83 3               
+#> 3        3 ( species %in% c('Adelie'…  40.4      35  2.55  2.08 4               
+#> 4        4 ( species %in% c('Chinstr…  49.0      29  2     2.12 8               
+#> 5        5 ( species %in% c('Chinstr…  45.9      42  2.88  2.50 6               
+#> ----------------------------------------------
+
+# augment (classification case)
+augment(tidy_class_party) %>%
+  tidytable::unnest(augmented_stats, names_sep = "__") %>%
+  tidytable::glimpse()
+#> Rows: 5
+#> Columns: 10
+#> $ rule_nbr                    <int> 1, 2, 3, 4, 5
+#> $ LHS                         <chr> "( flipper_length_mm > 205 ) & ( bill_dept…
+#> $ RHS                         <fct> Gentoo, Adelie, Chinstrap, Chinstrap, Chin…
+#> $ support                     <dbl> 50, 73, 17, 7, 13
+#> $ confidence                  <dbl> 1.0000000, 0.9863014, 0.9411765, 0.7142857…
+#> $ lift                        <dbl> 3.018868, 2.076424, 4.857685, 3.686636, 3.…
+#> $ terminal_node_id            <chr> "8", "3", "5", "9", "6"
+#> $ augmented_stats__support    <dbl> 69, 66, 19, 4, 24
+#> $ augmented_stats__confidence <dbl> 1.0000000, 1.0000000, 0.8947368, 0.2500000…
+#> $ augmented_stats__lift       <dbl> 2.6000000, 2.4266667, 2.1712281, 0.6066667…
+
+# augment (regression case)
+augment(tidy_regr_party) %>%
+  tidytable::unnest(augmented_stats, names_sep = "__") %>%
+  tidytable::glimpse()
+#> Rows: 5
+#> Columns: 10
+#> $ rule_nbr                 <int> 1, 2, 3, 4, 5
+#> $ LHS                      <chr> "( species %in% c('Chinstrap', 'Gentoo') ) & …
+#> $ RHS                      <dbl> 51.59231, 37.29512, 40.37143, 49.04828, 45.87…
+#> $ support                  <dbl> 13, 41, 35, 29, 42
+#> $ IQR                      <dbl> 2.000, 2.800, 2.550, 2.000, 2.875
+#> $ RMSE                     <dbl> 1.778704, 1.827694, 2.080875, 2.124183, 2.499…
+#> $ terminal_node_id         <chr> "9", "3", "4", "8", "6"
+#> $ augmented_stats__support <dbl> 16, 32, 39, 39, 51
+#> $ augmented_stats__IQR     <dbl> 1.525, 2.900, 2.750, 3.100, 1.500
+#> $ augmented_stats__RMSE    <dbl> 1.336547, 2.232729, 2.420163, 3.048512, 2.499…
+
+# cubist ----
+att         = modeldata::attrition
+set.seed(100)
+train_index = sample(c(TRUE, FALSE), nrow(att), replace = TRUE)
+cols_att    = setdiff(colnames(att), c("MonthlyIncome", "Attrition"))
+
+model_cubist = Cubist::cubist(x = att[train_index, cols_att],
+                              y = att[train_index, "MonthlyIncome"]
+                              )
+
+tidy_cubist = tidy(model_cubist) %>%
+  set_validation_data(att[!train_index, ], "MonthlyIncome")
+tidy_cubist
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: committee
+#> ▶ Number of distinct keys: 1
+#> ▶ Number of rules: 4
+#> ▶ Model type: cubist
+#> ▶ Estimation type: regression
+#> ▶ Is validation data set: TRUE
+#> 
+#> 
+#>   rule_nbr committee LHS                  RHS   support   mean   min   max error
+#>      <int>     <int> <chr>                <chr>   <int>  <dbl> <dbl> <dbl> <dbl>
+#> 1        1         1 ( JobLevel > 1 ) & … (455…      33  4436.  2272  5301  392.
+#> 2        2         1 ( JobLevel <= 1 )    (110…     251  2789   1081  4968  563.
+#> 3        3         1 ( JobRole %in% c('M… (299…      89 16714  11031 19999  761 
+#> 4        4         1 ( JobLevel > 1 ) & … (-14…     334  6843.  2306 13973 1036.
+#> ----------------------------------------------
+
+augment(tidy_cubist) %>%
+  tidytable::unnest(augmented_stats, names_sep = "__") %>%
+  tidytable::glimpse()
+#> Rows: 4
+#> Columns: 12
+#> $ rule_nbr                 <int> 1, 2, 3, 4
+#> $ committee                <int> 1, 1, 1, 1
+#> $ LHS                      <chr> "( JobLevel > 1 ) & ( TotalWorkingYears <= 5 …
+#> $ RHS                      <chr> "(4559)", "(1108) + (874 * JobLevel) + (48 * …
+#> $ support                  <int> 33, 251, 89, 334
+#> $ mean                     <dbl> 4435.6, 2789.0, 16714.0, 6842.9
+#> $ min                      <dbl> 2272, 1081, 11031, 2306
+#> $ max                      <dbl> 5301, 4968, 19999, 13973
+#> $ error                    <dbl> 391.7, 562.7, 761.0, 1035.9
+#> $ augmented_stats__support <dbl> 24, 292, 93, 354
+#> $ augmented_stats__IQR     <dbl> 439.25, 906.00, 4950.00, 3685.50
+#> $ augmented_stats__RMSE    <dbl> 283.9729, 754.9632, 865.8137, 1446.1057
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/augment_class_keys.html b/docs/reference/augment_class_keys.html new file mode 100644 index 0000000..88f6cf9 --- /dev/null +++ b/docs/reference/augment_class_keys.html @@ -0,0 +1,95 @@ + +as the name says — augment_class_keys • tidyrules + + +
+
+ + + +
+
+ + +
+

as the name says +not to be exported

+
+ +
+
augment_class_keys(x, new_data, y_name, weight, ...)
+
+ + +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/augment_class_no_keys.html b/docs/reference/augment_class_no_keys.html new file mode 100644 index 0000000..df8ee1c --- /dev/null +++ b/docs/reference/augment_class_no_keys.html @@ -0,0 +1,95 @@ + +as the name says — augment_class_no_keys • tidyrules + + +
+
+ + + +
+
+ + +
+

as the name says +not to be exported

+
+ +
+
augment_class_no_keys(x, new_data, y_name, weight, ...)
+
+ + +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/augment_regr_keys.html b/docs/reference/augment_regr_keys.html new file mode 100644 index 0000000..daabc7e --- /dev/null +++ b/docs/reference/augment_regr_keys.html @@ -0,0 +1,95 @@ + +as the name says — augment_regr_keys • tidyrules + + +
+
+ + + +
+
+ + +
+

as the name says +not to be exported

+
+ +
+
augment_regr_keys(x, new_data, y_name, weight, ...)
+
+ + +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/augment_regr_no_keys.html b/docs/reference/augment_regr_no_keys.html new file mode 100644 index 0000000..b2b7f5e --- /dev/null +++ b/docs/reference/augment_regr_no_keys.html @@ -0,0 +1,95 @@ + +as the name says — augment_regr_no_keys • tidyrules + + +
+
+ + + +
+
+ + +
+

as the name says +not to be exported

+
+ +
+
augment_regr_no_keys(x, new_data, y_name, weight, ...)
+
+ + +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/calculate.html b/docs/reference/calculate.html new file mode 100644 index 0000000..162efba --- /dev/null +++ b/docs/reference/calculate.html @@ -0,0 +1,109 @@ + +calculate is re-export of generics::calculate from tidyrules package — calculate • tidyrules + + +
+
+ + + +
+
+ + + + +
+
calculate(x, ...)
+
+ +
+

Arguments

+
x
+

A rulelist

+ + +
...
+

See calculate.rulelist

+ +
+ + +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/calculate.rulelist.html b/docs/reference/calculate.rulelist.html new file mode 100644 index 0000000..6e67f09 --- /dev/null +++ b/docs/reference/calculate.rulelist.html @@ -0,0 +1,246 @@ + +calculate metrics for a rulelist — calculate.rulelist • tidyrules + + +
+
+ + + +
+
+ + +
+

Computes some metrics (based on estimation_type) in cumulative +window function style over the rulelist (in the same order) ignoring the +keys.

+
+ +
+
# S3 method for rulelist
+calculate(x, metrics_to_exclude = NULL, ...)
+
+ +
+

Arguments

+
x
+

A rulelist

+ + +
metrics_to_exclude
+

(character vector) Names of metrics to exclude

+ + +
...
+

Named list of custom metrics. See 'details'.

+ +
+
+

Value

+ + +

A dataframe of metrics with a rule_nbr column.

+
+
+

Details

+ +
+

Default Metrics

+ + +

These metrics are calculated by default:

  • cumulative_coverage: For nth rule in the rulelist, number of distinct row_nbrs (of new_data) covered by nth and all preceding rules (in order). In weighted case, we sum the weights corresponding to the distinct row_nbrs.

  • +
  • cumulative_overlap: Up til nth rule in the rulelist, number of distinct row_nbrs (of new_data) already covered by some preceding rule (in order). In weighted case, we sum the weights corresponding to the distinct row_nbrs.

  • +

For classification:

  • cumulative_accuracy: For nth rule in the rulelist, fraction of row_nbrs such that RHS matches the y_name column (of new_data) by nth and all preceding rules (in order). In weighted case, weighted accuracy is computed.

  • +

For regression:

  • cumulative_RMSE: For nth rule in the rulelist, weighted RMSE of all predictions (RHS) predicted by nth rule and all preceding rules.

  • +
+ +
+

Custom metrics

+ + +

Custom metrics to be computed should be passed a named list of function(s) in +.... The custom metric function should take these arguments in same order: +rulelist, new_data, y_name, weight. The custom metric function should +return a numeric vector of same length as the number of rows of rulelist.

+
+ +
+
+

See also

+ +
+ +
+

Examples

+
library("magrittr")
+model_c5  = C50::C5.0(Attrition ~., data = modeldata::attrition, rules = TRUE)
+tidy_c5   = tidy(model_c5) %>%
+            set_validation_data(modeldata::attrition, "Attrition") %>%
+            set_keys(NULL)
+
+# calculate default metrics (classification)
+calculate(tidy_c5)
+
+model_rpart = rpart::rpart(MonthlyIncome ~., data = modeldata::attrition)
+tidy_rpart  =
+  tidy(model_rpart) %>%
+  set_validation_data(modeldata::attrition, "MonthlyIncome") %>%
+  set_keys(NULL)
+
+# calculate default metrics (regression)
+calculate(tidy_rpart)
+
+# calculate default metrics with a custom metric
+#' custom function to get cumulative MAE
+library("tidytable")
+#> 
+#> Attaching package: ‘tidytable’
+#> The following object is masked from ‘package:magrittr’:
+#> 
+#>     extract
+#> The following objects are masked from ‘package:stats’:
+#> 
+#>     dt, filter, lag
+#> The following object is masked from ‘package:base’:
+#> 
+#>     %in%
+get_cumulative_MAE = function(rulelist, new_data, y_name, weight){
+
+  priority_df =
+    rulelist %>%
+    select(rule_nbr) %>%
+    mutate(priority = 1:nrow(rulelist)) %>%
+    select(rule_nbr, priority)
+
+  pred_df =
+    predict(rulelist, new_data) %>%
+    left_join(priority_df, by = "rule_nbr") %>%
+    mutate(weight = local(weight)) %>%
+    select(rule_nbr, row_nbr, weight, priority)
+
+  new_data2 =
+    new_data %>%
+    mutate(row_nbr = 1:n()) %>%
+    select(all_of(c("row_nbr", y_name)))
+
+  rmse_till_rule = function(rn){
+
+    if (is.character(rulelist$RHS)) {
+      inter_df =
+        pred_df %>%
+        tidytable::filter(priority <= rn) %>%
+        left_join(mutate(new_data, row_nbr = 1:n()), by = "row_nbr") %>%
+        left_join(select(rulelist, rule_nbr, RHS), by = "rule_nbr") %>%
+        nest(.by = c("RHS", "rule_nbr", "row_nbr", "priority", "weight")) %>%
+        mutate(RHS = purrr::map2_dbl(RHS,
+                                     data,
+                                     ~ eval(parse(text = .x), envir = .y)
+                                     )
+               ) %>%
+        unnest(data)
+    } else {
+
+      inter_df =
+        pred_df %>%
+        tidytable::filter(priority <= rn) %>%
+        left_join(new_data2, by = "row_nbr") %>%
+        left_join(select(rulelist, rule_nbr, RHS), by = "rule_nbr")
+    }
+
+    inter_df %>%
+      summarise(rmse = MetricsWeighted::mae(RHS,
+                                             .data[[y_name]],
+                                             weight,
+                                             na.rm = TRUE
+                                             )
+                ) %>%
+      `[[`("rmse")
+  }
+
+  res = purrr::map_dbl(1:nrow(rulelist), rmse_till_rule)
+  return(res)
+}
+
+calculate(tidy_rpart,
+          metrics_to_exclude = NULL,
+          list("cumulative_mae" = get_cumulative_MAE)
+          )
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/convert_rule_flavor.html b/docs/reference/convert_rule_flavor.html new file mode 100644 index 0000000..5530034 --- /dev/null +++ b/docs/reference/convert_rule_flavor.html @@ -0,0 +1,115 @@ + +Convert a R parsable rule to python/sql parsable rule — convert_rule_flavor • tidyrules + + +
+
+ + + +
+
+ + +
+

Convert a R parsable rule to python/sql parsable rule

+
+ +
+
convert_rule_flavor(rule, flavor)
+
+ +
+

Arguments

+
rule
+

(chr vector) R parsable rule(s)

+ + +
flavor
+

(string) One among: 'python', 'sql'

+ +
+
+

Value

+ + +

(chr vector) of rules

+
+
+

See also

+

rulelist, tidy, augment, predict, to_sql_case

+

Other Auxiliary Rulelist Utility: +to_sql_case()

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/figures/tidyrules_schematic.png b/docs/reference/figures/tidyrules_schematic.png new file mode 100644 index 0000000..35eaed1 Binary files /dev/null and b/docs/reference/figures/tidyrules_schematic.png differ diff --git a/docs/reference/index.html b/docs/reference/index.html index c6e3ede..b356b6d 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,66 +1,12 @@ - - - - - - - -Function reference • tidyrules - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Function reference • tidyrules - + + - - - -
-
- -
- -
+
- - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + +
-

All functions

+ - - - - - - - - - + + + + + + - - - - + - - - - + - - - - + - - - - + - - - - + - - - - + - - - - + - - - - + - - - - + - - - - + - - - - + - - - - + - - - - + - - - - + - - - -
+

All functions

-

addBackquotes()

+
+

as_rulelist()

+

as_rulelist generic from tidyrules package

+

as_rulelist(<data.frame>)

+

as_rulelist method for a data.frame

+

as_ruleset()

+

Get a ruleset from a rulelist

+

augment()

Add backquotes

-

package_tidyrules

+

augment is re-export of generics::augment from tidyrules package

+

augment(<rulelist>)

About 'tidyrules' package

-

positionSpaceOutsideSinglequotes()

+

Augment a rulelist

+

calculate()

Position of space outside single quotes

-

removeEmptyLines()

+

calculate is re-export of generics::calculate from tidyrules package

+

calculate(<rulelist>)

Remove empty lines

-

ruleRToPython()

+

calculate metrics for a rulelist

+

convert_rule_flavor()

Convert a R parsable rule to python parsable rule

-

ruleRToSQL()

+

Convert a R parsable rule to python/sql parsable rule

+

tidyrules tidyrules-package package_tidyrules

Convert a R parsable rule to SQL parsable rule

-

strHead()

+

tidyrules

+

plot(<prune_rulelist>)

Vectorized semantic equivalent of 'head' for a string

-

strReplaceReduce()

+

Plot method for prune_rulelist class

+

plot(<rulelist>)

Sequential string replace

-

strSplitSingle()

+

Plot method for rulelist

+

predict(<rulelist>)

String split a string

-

strTail()

+

predict method for a rulelist

+

predict(<ruleset>)

Vectorized semantic equivalent of tail for a string

-

tidyRules(<C5.0>)

+

predict method for a ruleset

+

print(<prune_rulelist>)

Obtain rules as a tidy tibble from a C5.0 model

-

tidyRules()

+

Print method for prune_rulelist class

+

print(<rulelist>)

Obtain rules as a tidy tibble

-

tidyRules(<cubist>)

+

Print method for rulelist class

+

print(<ruleset>)

Obtain rules as a tidy tibble from a cubist model

-

tidyRules(<rpart>)

+

Print method for ruleset class

+

prune()

Obtain rules as a tidy tibble from a rpart model

-

varSpec()

+

prune is re-export of generics::prune from tidyrules package

+

prune(<rulelist>)

Get variable specification for a Cubist/C5 object

- +

prune rules of a rulelist

+

reorder()

+

reorder generic

+

reorder(<rulelist>)

+

Reorder the rules/rows of a rulelist

+

rulelist

+

Rulelist

+

ruleset

+

Ruleset

+

set_keys()

+

Set keys for a rulelist

+

set_validation_data()

+

Add validation_data to a rulelist

+

tidy(<C5.0>)

+

Get the rulelist from a C5 model

+

tidy()

+

tidy is re-export of generics::tidy from tidyrules package

+

tidy(<constparty>)

+

Get the rulelist from a party model

+

tidy(<cubist>)

+

Get the rulelist from a cubist model

+

tidy(<rpart>)

+

Get the rulelist from a rpart model

+

to_sql_case()

+

Extract SQL case statement from a rulelist

+
-
- +
- - + + diff --git a/docs/reference/package_tidyrules.html b/docs/reference/package_tidyrules.html index 170a095..0534976 100644 --- a/docs/reference/package_tidyrules.html +++ b/docs/reference/package_tidyrules.html @@ -1,67 +1,19 @@ - - - - - - - -About 'tidyrules' package — package_tidyrules • tidyrules - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -tidyrules — package_tidyrules • tidyrules - - + + - - -
-
- -
- -
+
-

Obtain rules as tidy dataframes

+

tidyrules package provides a framework to work with decision +rules. Rules can be extracted from supported models using tidy, augmented +using validation data by augment, manipulated using +standard dataframe operations, (modified) rulelists can be used to +predict on unseen (test) data. Utilities include: +Create a rulelist manually (as_rulelist), Export +a rulelist to SQL (to_sql_case) and so on. The package offers two +classes; rulelist and ruleset based on dataframe.

- -

See also

- - +
+

See also

+ +
+
+

Author

+

Maintainer: Srikanth Komala Sheshachala sri.teach@gmail.com

+

Authors:

+
-
- +
- - + + diff --git a/docs/reference/plot.prune_rulelist.html b/docs/reference/plot.prune_rulelist.html new file mode 100644 index 0000000..1fa8ac0 --- /dev/null +++ b/docs/reference/plot.prune_rulelist.html @@ -0,0 +1,110 @@ + +Plot method for prune_rulelist class — plot.prune_rulelist • tidyrules + + +
+
+ + + +
+
+ + +
+

Plot method for prune_rulelist class

+
+ +
+
# S3 method for prune_rulelist
+plot(x, ...)
+
+ +
+

Arguments

+
x
+

A 'prune_rulelist' object

+ + +
...
+

unused

+ +
+
+

Value

+ + +

ggplot2 object (invisibly)

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/plot.rulelist-1.png b/docs/reference/plot.rulelist-1.png new file mode 100644 index 0000000..0e6b89c Binary files /dev/null and b/docs/reference/plot.rulelist-1.png differ diff --git a/docs/reference/plot.rulelist.html b/docs/reference/plot.rulelist.html new file mode 100644 index 0000000..edd7ef0 --- /dev/null +++ b/docs/reference/plot.rulelist.html @@ -0,0 +1,138 @@ + +Plot method for rulelist — plot.rulelist • tidyrules + + +
+
+ + + +
+
+ + +
+

Plots a heatmap with rule_nbr's on x-side and clusters of +row_nbr's on y-side of a binary matrix with 1 if a rule is applicable for +a row.

+
+ +
+
# S3 method for rulelist
+plot(x, thres_cluster_rows = 1000, dist_metric = "jaccard", ...)
+
+ +
+

Arguments

+
x
+

A rulelist

+ + +
thres_cluster_rows
+

(positive integer) Maximum number of rows beyond +which a x-side dendrogram is not computed

+ + +
dist_metric
+

(string or function, default: "jaccard") Distance metric +for y-side (rule_nbr) passed to method argument of proxy::dist

+ + +
...
+

Arguments to be passed to pheatmap::pheatmap

+ +
+
+

Details

+

Number of clusters is set to min(number of unique rows in the +row_nbr X rule_nbr matrix and thres_cluster_rows)

+
+ +
+

Examples

+
library("magrittr")
+att = modeldata::attrition
+tidy_c5 =
+  C50::C5.0(Attrition ~., data = att, rules = TRUE) %>%
+  tidy() %>%
+  set_validation_data(att, "Attrition") %>%
+  set_keys(NULL)
+
+plot(tidy_c5)
+
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/positionSpaceOutsideSinglequotes.html b/docs/reference/positionSpaceOutsideSinglequotes.html index 3bf367f..20af037 100644 --- a/docs/reference/positionSpaceOutsideSinglequotes.html +++ b/docs/reference/positionSpaceOutsideSinglequotes.html @@ -1,68 +1,13 @@ - - - - - - - -Position of space outside single quotes — positionSpaceOutsideSinglequotes • tidyrules - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Position of space outside single quotes — positionSpaceOutsideSinglequotes • tidyrules - - + + - - -
-
- -
- -
+

(vectorised) Detect the position of space in a string not within - a pair of single quotes

+a pair of single quotes

-
positionSpaceOutsideSinglequotes(string)
- -

Arguments

- - - - - - -
string

A chracter vector

+
+
positionSpaceOutsideSinglequotes(string)
+
-

Value

+
+

Arguments

+
string
+

A chracter vector

-

A integer vector of positions

+
+
+

Value

+ -

Examples

-
# \donttest{ -tidyrules:::positionSpaceOutsideSinglequotes(c("hello", "hel' 'o "))
#> [[1]] -#> integer(0) -#> -#> [[2]] -#> [1] 8 -#>
# } +

A integer vector of positions

+
-
+
+

Examples

+
# \donttest{
+tidyrules:::positionSpaceOutsideSinglequotes(c("hello", "hel' 'o "))
+#> [[1]]
+#> integer(0)
+#> 
+#> [[2]]
+#> [1] 8
+#> 
+# }
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/predict.rulelist.html b/docs/reference/predict.rulelist.html new file mode 100644 index 0000000..14354a8 --- /dev/null +++ b/docs/reference/predict.rulelist.html @@ -0,0 +1,186 @@ + +predict method for a rulelist — predict.rulelist • tidyrules + + +
+
+ + + +
+
+ + +
+

Predicts rule_nbr applicable (as per the order in rulelist) +for a row_nbr (per key) in new_data

+
+ +
+
# S3 method for rulelist
+predict(object, new_data, multiple = FALSE, ...)
+
+ +
+

Arguments

+
object
+

A rulelist

+ + +
new_data
+

(dataframe)

+ + +
multiple
+

(flag, default: FALSE) Whether to output all rule numbers +applicable for a row. If FALSE, the first satisfying rule is provided.

+ + +
...
+

unused

+ +
+
+

Value

+ + +

A dataframe. See Details.

+
+
+

Details

+

If a row_nbr is covered more than one rule_nbr per 'keys', then +rule_nbr appearing earlier (as in row order of the rulelist) takes +precedence.

+

Output Format

+ +
  • When multiple is FALSE(default), output is a dataframe with three +or more columns: row_number (int), columns corresponding to 'keys', +rule_nbr (int).

  • +
  • When multiple is TRUE, output is a dataframe with three +or more columns: row_number (int), columns corresponding to 'keys', +rule_nbr (list column of integers).

  • +
  • If a row number and 'keys' combination is not covered by any rule, then +rule_nbr column has missing value.

  • +
+ +
+
+

See also

+ +
+ +
+

Examples

+
model_c5 = C50::C5.0(species ~.,
+                     data = palmerpenguins::penguins,
+                     trials = 5,
+                     rules = TRUE
+                     )
+tidy_c5 = tidy(model_c5)
+tidy_c5
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: trial_nbr
+#> ▶ Number of distinct keys: 5
+#> ▶ Number of rules: 26
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>    rule_nbr trial_nbr LHS                         RHS   support confidence  lift
+#>       <int>     <int> <chr>                       <fct>   <int>      <dbl> <dbl>
+#>  1        1         1 ( island == 'Biscoe' ) & (… Gent…     122      0.992   2.8
+#>  2        2         1 ( bill_length_mm <= 43.3 )… Adel…      68      0.986   2.2
+#>  3        3         1 ( island == 'Dream' ) & ( … Chin…      48      0.98    5  
+#>  4        4         1 ( bill_length_mm > 42.3 ) … Chin…      34      0.944   4.8
+#>  5        5         1 ( flipper_length_mm <= 206… Adel…     213      0.698   1.6
+#>  6        1         2 ( bill_length_mm <= 40.8 )  Adel…      86      0.989   3  
+#>  7        2         2 ( island == 'Torgersen' )   Adel…      39      0.976   2.9
+#>  8        3         2 ( island == 'Biscoe' ) & (… Adel…      32      0.971   2.9
+#>  9        4         2 ( island == 'Dream' ) & ( … Chin…      87      0.910   3.9
+#> 10        5         2 ( island == 'Biscoe' )      Gent…     183      0.816   1.9
+#> # ℹ 16 more rows
+#> ----------------------------------------------
+
+output_1 = predict(tidy_c5, palmerpenguins::penguins)
+output_1 # different rules per 'keys' (`trial_nbr` here)
+
+output_2 = predict(tidy_c5, palmerpenguins::penguins, multiple = TRUE)
+output_2 # `rule_nbr` is a list-column of integer vectors
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/predict.ruleset.html b/docs/reference/predict.ruleset.html new file mode 100644 index 0000000..32d9eca --- /dev/null +++ b/docs/reference/predict.ruleset.html @@ -0,0 +1,162 @@ + +predict method for a ruleset — predict.ruleset • tidyrules + + +
+
+ + + +
+
+ + +
+

Predicts multiple rule_nbr(s) applicable for a row_nbr (per +key) in new_data

+
+ +
+
# S3 method for ruleset
+predict(object, new_data, ...)
+
+ +
+

Arguments

+
object
+

A ruleset

+ + +
new_data
+

(dataframe)

+ + +
...
+

unused

+ +
+
+

Value

+ + +

A dataframe with three or more columns: row_number (int), columns +corresponding to 'keys', rule_nbr (list column of integers). If a row +number and 'keys' combination is not covered by any rule, then rule_nbr

+ + +

column has missing value.

+
+
+

See also

+ +
+ +
+

Examples

+
model_c5 = C50::C5.0(species ~.,
+                     data = palmerpenguins::penguins,
+                     trials = 5,
+                     rules = TRUE
+                     )
+tidy_c5_ruleset = as_ruleset(tidy(model_c5))
+tidy_c5_ruleset
+#> ---- Ruleset -------------------------------
+#> ▶ Keys: trial_nbr
+#> ▶ Number of distinct keys: 5
+#> ▶ Number of rules: 26
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>    rule_nbr trial_nbr LHS                         RHS   support confidence  lift
+#>       <int>     <int> <chr>                       <fct>   <int>      <dbl> <dbl>
+#>  1        1         1 ( island == 'Biscoe' ) & (… Gent…     122      0.992   2.8
+#>  2        2         1 ( bill_length_mm <= 43.3 )… Adel…      68      0.986   2.2
+#>  3        3         1 ( island == 'Dream' ) & ( … Chin…      48      0.98    5  
+#>  4        4         1 ( bill_length_mm > 42.3 ) … Chin…      34      0.944   4.8
+#>  5        5         1 ( flipper_length_mm <= 206… Adel…     213      0.698   1.6
+#>  6        1         2 ( bill_length_mm <= 40.8 )  Adel…      86      0.989   3  
+#>  7        2         2 ( island == 'Torgersen' )   Adel…      39      0.976   2.9
+#>  8        3         2 ( island == 'Biscoe' ) & (… Adel…      32      0.971   2.9
+#>  9        4         2 ( island == 'Dream' ) & ( … Chin…      87      0.910   3.9
+#> 10        5         2 ( island == 'Biscoe' )      Gent…     183      0.816   1.9
+#> # ℹ 16 more rows
+#> --------------------------------------------
+
+predict(tidy_c5_ruleset, palmerpenguins::penguins)
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/predict_all_nokeys_rulelist.html b/docs/reference/predict_all_nokeys_rulelist.html new file mode 100644 index 0000000..fc3d510 --- /dev/null +++ b/docs/reference/predict_all_nokeys_rulelist.html @@ -0,0 +1,109 @@ + +as the name says — predict_all_nokeys_rulelist • tidyrules + + +
+
+ + + +
+
+ + +
+

as the name says

+
+ +
+
predict_all_nokeys_rulelist(rulelist, new_data)
+
+ +
+

Arguments

+
rulelist
+

rulelist

+ + +
new_data
+

new_data

+ +
+
+

Value

+ + +

dataframe

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/predict_all_rulelist.html b/docs/reference/predict_all_rulelist.html new file mode 100644 index 0000000..63dfb68 --- /dev/null +++ b/docs/reference/predict_all_rulelist.html @@ -0,0 +1,109 @@ + +with or without keys — predict_all_rulelist • tidyrules + + +
+
+ + + +
+
+ + +
+

uses predict_all_nokeys_rulelist

+
+ +
+
predict_all_rulelist(rulelist, new_data)
+
+ +
+

Arguments

+
rulelist
+

rulelist

+ + +
new_data
+

new_data

+ +
+
+

Value

+ + +

dataframe

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/predict_nokeys_rulelist.html b/docs/reference/predict_nokeys_rulelist.html new file mode 100644 index 0000000..1e90466 --- /dev/null +++ b/docs/reference/predict_nokeys_rulelist.html @@ -0,0 +1,109 @@ + +as the name says — predict_nokeys_rulelist • tidyrules + + +
+
+ + + +
+
+ + +
+

as the name says

+
+ +
+
predict_nokeys_rulelist(rulelist, new_data)
+
+ +
+

Arguments

+
rulelist
+

rulelist

+ + +
new_data
+

new_data

+ +
+
+

Value

+ + +

dataframe

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/predict_rulelist.html b/docs/reference/predict_rulelist.html new file mode 100644 index 0000000..4964c24 --- /dev/null +++ b/docs/reference/predict_rulelist.html @@ -0,0 +1,109 @@ + +with or without keys — predict_rulelist • tidyrules + + +
+
+ + + +
+
+ + +
+

uses predict_nokeys_rulelist

+
+ +
+
predict_rulelist(rulelist, new_data)
+
+ +
+

Arguments

+
rulelist
+

rulelist

+ + +
new_data
+

new_data

+ +
+
+

Value

+ + +

dataframe

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/print.prune_rulelist.html b/docs/reference/print.prune_rulelist.html new file mode 100644 index 0000000..61dd30c --- /dev/null +++ b/docs/reference/print.prune_rulelist.html @@ -0,0 +1,104 @@ + +Print method for prune_rulelist class — print.prune_rulelist • tidyrules + + +
+
+ + + +
+
+ + +
+

Print method for prune_rulelist class

+
+ +
+
# S3 method for prune_rulelist
+print(x, ...)
+
+ +
+

Arguments

+
x
+

A 'prune_rulelist' object

+ + +
...
+

unused

+ +
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/print.rulelist.html b/docs/reference/print.rulelist.html new file mode 100644 index 0000000..8ac090c --- /dev/null +++ b/docs/reference/print.rulelist.html @@ -0,0 +1,120 @@ + +Print method for rulelist class — print.rulelist • tidyrules + + +
+
+ + + +
+
+ + +
+

Prints rulelist attributes and first few rows.

+
+ +
+
# S3 method for rulelist
+print(x, banner = TRUE, ...)
+
+ +
+

Arguments

+
x
+

A rulelist object

+ + +
banner
+

(flag, default: TRUE) Should the banner be displayed

+ + +
...
+

Passed to tidytable::print

+ +
+
+

Value

+ + +

input rulelist (invisibly)

+
+ + +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/print.ruleset.html b/docs/reference/print.ruleset.html new file mode 100644 index 0000000..91059c7 --- /dev/null +++ b/docs/reference/print.ruleset.html @@ -0,0 +1,144 @@ + +Print method for ruleset class — print.ruleset • tidyrules + + +
+
+ + + +
+
+ + +
+

Prints the ruleset object

+
+ +
+
# S3 method for ruleset
+print(x, banner = TRUE, ...)
+
+ +
+

Arguments

+
x
+

A rulelist

+ + +
banner
+

(flag, default: TRUE) Should the banner be displayed

+ + +
...
+

Passed to print.rulelist

+ +
+
+

Value

+ + +

(invisibly) Returns the ruleset object

+
+
+

See also

+ +
+ +
+

Examples

+
model_class_party = partykit::ctree(species ~ .,
+                                    data = palmerpenguins::penguins
+                                    )
+as_ruleset(tidy(model_class_party))
+#> ---- Ruleset -------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 6
+#> ▶ Model type: constparty
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>   rule_nbr LHS                   RHS   support confidence  lift terminal_node_id
+#>      <int> <chr>                 <fct>   <dbl>      <dbl> <dbl> <chr>           
+#> 1        1 ( island %in% c('Bis… Gent…     122      1      2.77 6               
+#> 2        2 ( island %in% c('Dre… Adel…      99      0.990  2.24 9               
+#> 3        3 ( island %in% c('Bis… Adel…      39      0.974  2.21 4               
+#> 4        4 ( island %in% c('Dre… Chin…      65      0.954  4.83 11              
+#> 5        5 ( island %in% c('Bis… Adel…       7      0.857  1.94 5               
+#> 6        6 ( island %in% c('Dre… Adel…      12      0.583  1.32 10              
+#> --------------------------------------------
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/prune.html b/docs/reference/prune.html new file mode 100644 index 0000000..7db5205 --- /dev/null +++ b/docs/reference/prune.html @@ -0,0 +1,109 @@ + +prune is re-export of generics::prune from tidyrules package — prune • tidyrules + + +
+
+ + + +
+
+ + +
+

See prune.rulelist

+
+ +
+
prune(tree, ...)
+
+ +
+

Arguments

+
tree
+

A rulelist

+ + +
...
+

See prune.rulelist

+ +
+ + +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/prune.rulelist-1.png b/docs/reference/prune.rulelist-1.png new file mode 100644 index 0000000..c4f36e2 Binary files /dev/null and b/docs/reference/prune.rulelist-1.png differ diff --git a/docs/reference/prune.rulelist-2.png b/docs/reference/prune.rulelist-2.png new file mode 100644 index 0000000..b51f24c Binary files /dev/null and b/docs/reference/prune.rulelist-2.png differ diff --git a/docs/reference/prune.rulelist-3.png b/docs/reference/prune.rulelist-3.png new file mode 100644 index 0000000..e81346c Binary files /dev/null and b/docs/reference/prune.rulelist-3.png differ diff --git a/docs/reference/prune.rulelist.html b/docs/reference/prune.rulelist.html new file mode 100644 index 0000000..821f603 --- /dev/null +++ b/docs/reference/prune.rulelist.html @@ -0,0 +1,260 @@ + +prune rules of a rulelist — prune.rulelist • tidyrules + + +
+
+ + + +
+
+ + +
+

Prune the rulelist by suggesting to keep first 'k' rules based +on metrics computed by calculate

+
+ +
+
# S3 method for rulelist
+prune(
+  tree,
+  metrics_to_exclude = NULL,
+  stop_expr_string = "relative__cumulative_coverage >= 0.9",
+  min_n_rules = 1,
+  ...
+)
+
+ +
+

Arguments

+
tree
+

A rulelist

+ + +
metrics_to_exclude
+

(character vector or NULL) Names of metrics not to +be calculated. See calculate for the list of default +metrics.

+ + +
stop_expr_string
+

(string default: "relative__cumulative_coverage >= +0.9") Parsable condition

+ + +
min_n_rules
+

(positive integer) Minumum number of rules to keep

+ + +
...
+

Named list of custom metrics passed to +calculate

+ +
+
+

Value

+ + +

Object of class 'prune_ruleslist' with these components: 1. pruned: +ruleset keeping only first 'pos' rows. 2. n_pruned_rules: pos. If stop +criteria is never met, then pos = nrow(ruleset) 3. n_total_rules: +nrow(ruleset), 4. metrics_df: Dataframe with metrics and relative metrics +5. stop_expr_string

+
+
+

Details

+ +
  1. Metrics are computed using calculate. 2. +Relative metrics (prepended by 'relative__') are calculated by dividing +each metric by its max value. 3. The first rule in rulelist order which +meets the 'stop_expr_string' criteria is stored (say 'pos'). Print method +suggests to keep rules until pos.

  2. +
+
+

See also

+ +
+ +
+

Examples

+
library("magrittr")
+model_c5  = C50::C5.0(Attrition ~., data = modeldata::attrition, rules = TRUE)
+tidy_c5   = tidy(model_c5) %>%
+            set_validation_data(modeldata::attrition, "Attrition") %>%
+            set_keys(NULL)
+
+#' prune with defaults
+prune_obj = prune(tidy_c5)
+#' note that all other metrics are visible in the print output
+prune_obj
+#> ── Prune Suggestion ────────────────────────────────────────────────────────────
+#> 
+#>  Keep first 10 out of 24
+#> 
+#>  Metrics after 10 rules: 
+#> 
+#>  Run `plot(x)` for details; `x$pruned` to get pruned rulelist
+#> ────────────────────────────────────────────────────────────────────────────────
+plot(prune_obj)
+
+prune_obj$pruned
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 10
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: TRUE
+#> 
+#> 
+#>    rule_nbr trial_nbr LHS                         RHS   support confidence  lift
+#>       <int>     <int> <chr>                       <fct>   <int>      <dbl> <dbl>
+#>  1        1         1 ( JobLevel <= 1 ) & ( Mont… Yes        16      0.944   5.9
+#>  2        2         1 ( EnvironmentSatisfaction … No        521      0.941   1.1
+#>  3        3         1 ( DailyRate <= 722 ) & ( J… Yes        13      0.933   5.8
+#>  4        4         1 ( JobRole == 'Research_Sci… No        195      0.924   1.1
+#>  5        5         1 ( EnvironmentSatisfaction … Yes         9      0.909   5.6
+#>  6        6         1 ( EnvironmentSatisfaction … Yes         9      0.909   5.6
+#>  7        7         1 ( JobRole %in% c('Laborato… Yes        14      0.875   5.4
+#>  8        8         1 ( JobRole == 'Laboratory_T… Yes         6      0.875   5.4
+#>  9        9         1 ( Department == 'Sales' ) … Yes        13      0.867   5.4
+#> 10       10         1 ( TotalWorkingYears > 2 )   No       1347      0.864   1  
+#> ----------------------------------------------
+
+#' prune with a different stop_expr_string threshold
+prune_obj = prune(tidy_c5,
+                  stop_expr_string = "relative__cumulative_coverage >= 0.2"
+                  )
+prune_obj #' as expected, has smaller then 10 rules as compared to default args
+#> ── Prune Suggestion ────────────────────────────────────────────────────────────
+#> 
+#>  Keep first 2 out of 24
+#> 
+#>  Metrics after 2 rules: 
+#> 
+#>  Run `plot(x)` for details; `x$pruned` to get pruned rulelist
+#> ────────────────────────────────────────────────────────────────────────────────
+plot(prune_obj)
+
+prune_obj$pruned
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 2
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: TRUE
+#> 
+#> 
+#>   rule_nbr trial_nbr LHS                          RHS   support confidence  lift
+#>      <int>     <int> <chr>                        <fct>   <int>      <dbl> <dbl>
+#> 1        1         1 ( JobLevel <= 1 ) & ( Month… Yes        16      0.944   5.9
+#> 2        2         1 ( EnvironmentSatisfaction %… No        521      0.941   1.1
+#> ----------------------------------------------
+
+#' prune with a different stop_expr_string metric
+st = "relative__cumulative_overlap <= 0.7 & relative__cumulative_overlap > 0"
+prune_obj = prune(tidy_c5, stop_expr_string = st)
+prune_obj #' as expected, has smaller then 10 rules as compared to default args
+#> ── Prune Suggestion ────────────────────────────────────────────────────────────
+#> 
+#>  Keep first 3 out of 24
+#> 
+#>  Metrics after 3 rules: 
+#> 
+#>  Run `plot(x)` for details; `x$pruned` to get pruned rulelist
+#> ────────────────────────────────────────────────────────────────────────────────
+plot(prune_obj)
+
+prune_obj$pruned
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 3
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: TRUE
+#> 
+#> 
+#>   rule_nbr trial_nbr LHS                          RHS   support confidence  lift
+#>      <int>     <int> <chr>                        <fct>   <int>      <dbl> <dbl>
+#> 1        1         1 ( JobLevel <= 1 ) & ( Month… Yes        16      0.944   5.9
+#> 2        2         1 ( EnvironmentSatisfaction %… No        521      0.941   1.1
+#> 3        3         1 ( DailyRate <= 722 ) & ( Jo… Yes        13      0.933   5.8
+#> ----------------------------------------------
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/removeEmptyLines.html b/docs/reference/removeEmptyLines.html index 9ac4304..5de8f50 100644 --- a/docs/reference/removeEmptyLines.html +++ b/docs/reference/removeEmptyLines.html @@ -1,67 +1,12 @@ - - - - - - - -Remove empty lines — removeEmptyLines • tidyrules - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Remove empty lines — removeEmptyLines • tidyrules - - + + - - -
-
- -
- -
+
@@ -132,51 +62,53 @@

Remove empty lines

Remove empty strings from a character vector

-
removeEmptyLines(strings)
- -

Arguments

- - - - - - -
strings

A character vector

+
+
removeEmptyLines(strings)
+
-

Value

+
+

Arguments

+
strings
+

A character vector

-

A character vector

+
+
+

Value

+ -

Examples

-
# \donttest{ -tidyrules:::removeEmptyLines(c("abc", "", "d"))
#> [1] "abc" "d"
# } +

A character vector

+
-
+
+

Examples

+
# \donttest{
+tidyrules:::removeEmptyLines(c("abc", "", "d"))
+#> [1] "abc" "d"  
+# }
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/reorder.html b/docs/reference/reorder.html new file mode 100644 index 0000000..67daafd --- /dev/null +++ b/docs/reference/reorder.html @@ -0,0 +1,109 @@ + +reorder generic — reorder • tidyrules + + +
+
+ + + +
+
+ + +
+

reorder generic for rulelist

+
+ +
+
reorder(x, ...)
+
+ +
+

Arguments

+
x
+

A rulelist

+ + +
...
+

See reorder.rulelist

+ +
+ + +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/reorder.rulelist.html b/docs/reference/reorder.rulelist.html new file mode 100644 index 0000000..e0c9efc --- /dev/null +++ b/docs/reference/reorder.rulelist.html @@ -0,0 +1,200 @@ + +Reorder the rules/rows of a rulelist — reorder.rulelist • tidyrules + + +
+
+ + + +
+
+ + +
+

Implements a greedy strategy to add one rule at a time which +maximizes/minimizes a metric.

+
+ +
+
# S3 method for rulelist
+reorder(x, metric = "cumulative_coverage", minimize = FALSE, init = NULL, ...)
+
+ +
+

Arguments

+
x
+

A rulelist

+ + +
metric
+

(character vector or named list) Name of metrics or a custom +function(s). See calculate. The 'n+1'th metric is +used when there is a match at 'nth' level, similar to base::order. If +there is a match at final level, row order of the rulelist comes into play.

+ + +
minimize
+

(logical vector) Whether to minimize. Either TRUE/FALSE or a +logical vector of same length as metric

+ + +
init
+

(positive integer) Initial number of rows after which reordering +should begin

+ + +
...
+

passed to calculate

+ +
+
+

See also

+ +
+ +
+

Examples

+
library("magrittr")
+att = modeldata::attrition
+tidy_c5 =
+  C50::C5.0(Attrition ~., data = att, rules = TRUE) %>%
+  tidy() %>%
+  set_validation_data(att, "Attrition") %>%
+  set_keys(NULL) %>%
+  head(5)
+
+# with defaults
+reorder(tidy_c5)
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 5
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: TRUE
+#> 
+#> 
+#>   rule_nbr trial_nbr LHS      RHS   support confidence  lift cumulative_coverage
+#>      <int>     <int> <chr>    <fct>   <int>      <dbl> <dbl>               <dbl>
+#> 1        3         1 ( Daily… Yes        13      0.933   5.8                  13
+#> 2        4         1 ( JobRo… No        195      0.924   1.1                 208
+#> 3        2         1 ( Envir… No        521      0.941   1.1                 645
+#> 4        1         1 ( JobLe… Yes        16      0.944   5.9                 656
+#> 5        5         1 ( Envir… Yes         9      0.909   5.6                 664
+#> ----------------------------------------------
+
+# use 'cumulative_overlap' to break ties (if any)
+reorder(tidy_c5, metric = c("cumulative_coverage", "cumulative_overlap"))
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 5
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: TRUE
+#> 
+#> 
+#>   rule_nbr trial_nbr LHS      RHS   support confidence  lift cumulative_coverage
+#>      <int>     <int> <chr>    <fct>   <int>      <dbl> <dbl>               <dbl>
+#> 1        3         1 ( Daily… Yes        13      0.933   5.8                  13
+#> 2        4         1 ( JobRo… No        195      0.924   1.1                 208
+#> 3        2         1 ( Envir… No        521      0.941   1.1                 645
+#> 4        1         1 ( JobLe… Yes        16      0.944   5.9                 656
+#> 5        5         1 ( Envir… Yes         9      0.909   5.6                 664
+#> # ℹ 1 more variable: cumulative_overlap <dbl>
+#> ----------------------------------------------
+
+# reorder after 2 rules
+reorder(tidy_c5, init = 2)
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 5
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: TRUE
+#> 
+#> 
+#>   rule_nbr trial_nbr LHS      RHS   support confidence  lift cumulative_coverage
+#>      <int>     <int> <chr>    <fct>   <int>      <dbl> <dbl>               <dbl>
+#> 1        1         1 ( JobLe… Yes        16      0.944   5.9                  16
+#> 2        2         1 ( Envir… No        521      0.941   1.1                 537
+#> 3        4         1 ( JobRo… No        195      0.924   1.1                 648
+#> 4        3         1 ( Daily… Yes        13      0.933   5.8                 656
+#> 5        5         1 ( Envir… Yes         9      0.909   5.6                 664
+#> ----------------------------------------------
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/rulelist.html b/docs/reference/rulelist.html new file mode 100644 index 0000000..4e6829f --- /dev/null +++ b/docs/reference/rulelist.html @@ -0,0 +1,303 @@ + +Rulelist — rulelist • tidyrules + + +
+
+ + + +
+
+ + +
+ +
+

Structure

+ + +

A rulelist is ordered list of rules stored as a dataframe. Each row, +specifies a rule (LHS), expected outcome (RHS) and some other details.

+

It has these mandatory columns:

  • rule_nbr: (integer vector) Rule number

  • +
  • LHS: (character vector) A rule is a string that can be parsed using base::parse()

  • +
  • RHS: (character vector or a literal)

  • +
+ +
+

Example

+ + +

| rule_nbr|LHS                                                                  |RHS       | support| confidence|     lift|
+|--------:|:--------------------------------------------------------------------|:---------|-------:|----------:|--------:|
+|        1|( island %in% c('Biscoe') ) & ( flipper_length_mm > 203 )            |Gentoo    |     122|  1.0000000| 2.774193|
+|        2|( island %in% c('Biscoe') ) & ( flipper_length_mm <= 203 )           |Adelie    |      46|  0.9565217| 2.164760|
+|        3|( island %in% c('Dream', 'Torgersen') ) & ( bill_length_mm > 44.1 )  |Chinstrap |      65|  0.9538462| 4.825339|
+|        4|( island %in% c('Dream', 'Torgersen') ) & ( bill_length_mm <= 44.1 ) |Adelie    |     111|  0.9459459| 2.140825|

+
+ +
+

Create a rulelist

+ + +

A rulelist can be created using tidy() on some supported model fits +(run: utils::methods(tidy)). It can also be created manually from a +existing dataframe using as_rulelist.

+
+ +
+

Keys and attributes

+ + +

Columns identified as 'keys' along with rule_nbr form a unique +combination +-- a group of rules. For example, rule-based C5 model with multiple trials +creates rules per each trial_nbr. predict method understands 'keys', +thereby provides/predicts a rule number (for each row in new data / test +data) within the same trial_nbr.

+

A rulelist has these mandatory attributes:

  • estimation_type: One among regression, classification

    +

    A rulelist has these optional attributes:

  • +
  • keys: (character vector)Names of the column that forms a key.

  • +
  • model_type: (string) Name of the model

  • +
+

Set Validation data

+ + +

This helps a few methods like augment, calculate, prune, reorder +require few additional attributes which can be set using +set_validation_data.

+
+ +
+

Methods for rulelist

+ +
  1. Predict: Given a dataframe (possibly without a +dependent variable column aka 'test data'), predicts the first rule (as +ordered in the rulelist) per 'keys' that is applicable for each row. When +multiple = TRUE, returns all rules applicable for a row (per key).

  2. +
  3. Augment: Outputs summary statistics per rule over +validation data and returns a rulelist with a new dataframe-column.

  4. +
  5. Calculate: Computes metrics for a rulelist in a +cumulative manner such as cumulative_coverage, cumulative_overlap, +cumulative_accuracy.

  6. +
  7. Prune: Suggests pruning a rulelist such that some +expectation are met (based on metrics). Example: cumulative_coverage of 80% +can be met with a first few rules.

  8. +
  9. Reorder: Reorders a rulelist in order to maximize a +metric.

  10. +
+ +
+

Manipulating a rulelist

+ + +

Rulelists are essentially dataframes. Hence, any dataframe operations which +preferably preserve attributes will output a rulelist. as_rulelist and +as.data.frame will help in moving back and forth between rulelist and +dataframe worlds.

+
+ +
+

Utilities for a rulelist

+ +
  1. as_rulelist: Create a rulelist from a +dataframe with some mandatory columns.

  2. +
  3. set_keys: Set or Unset 'keys' of a rulelist.

  4. +
  5. to_sql_case: Outputs a SQL case statement for a rulelist.

  6. +
  7. convert_rule_flavor: Converts R-parsable rule strings to python/SQL +parsable rule strings.

  8. +
+ +
+ +
+ + +
+

See also

+ +
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/ruleset.html b/docs/reference/ruleset.html new file mode 100644 index 0000000..b78b54d --- /dev/null +++ b/docs/reference/ruleset.html @@ -0,0 +1,92 @@ + +Ruleset — ruleset • tidyrules + + +
+
+ + + +
+
+ + +
+

ruleset class is a piggyback class that inherits rulelist +class for convenience of print and predict methods.

+
+ + + +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/set_keys.html b/docs/reference/set_keys.html new file mode 100644 index 0000000..9dda998 --- /dev/null +++ b/docs/reference/set_keys.html @@ -0,0 +1,263 @@ + +Set keys for a rulelist — set_keys • tidyrules + + +
+
+ + + +
+
+ + +
+

'keys' are a set of column(s) which identify a group of rules in +a rulelist. Methods like predict, +augment produce output per key combination.

+
+ +
+
set_keys(x, keys, reset = FALSE)
+
+ +
+

Arguments

+
x
+

A rulelist

+ + +
keys
+

(character vector or NULL)

+ + +
reset
+

(flag) Whether to reset the keys to sequential numbers startign +with 1 when keys is set to NULL

+ +
+
+

Value

+ + +

A rulelist object

+
+
+

Details

+

A new rulelist is returned with attr keys is modified. The input +rulelist object is unaltered.

+
+
+

See also

+

rulelist, tidy, augment, +predict, calculate, +prune, reorder

+

Other Core Rulelist Utility: +set_validation_data()

+
+ +
+

Examples

+
model_c5 = C50::C5.0(Attrition ~., data = modeldata::attrition, rules = TRUE)
+tidy_c5 = tidy(model_c5)
+tidy_c5 # keys are: "trial_nbr"
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: trial_nbr
+#> ▶ Number of distinct keys: 1
+#> ▶ Number of rules: 24
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>    rule_nbr trial_nbr LHS                         RHS   support confidence  lift
+#>       <int>     <int> <chr>                       <fct>   <int>      <dbl> <dbl>
+#>  1        1         1 ( JobLevel <= 1 ) & ( Mont… Yes        16      0.944   5.9
+#>  2        2         1 ( EnvironmentSatisfaction … No        521      0.941   1.1
+#>  3        3         1 ( DailyRate <= 722 ) & ( J… Yes        13      0.933   5.8
+#>  4        4         1 ( JobRole == 'Research_Sci… No        195      0.924   1.1
+#>  5        5         1 ( EnvironmentSatisfaction … Yes         9      0.909   5.6
+#>  6        6         1 ( EnvironmentSatisfaction … Yes         9      0.909   5.6
+#>  7        7         1 ( JobRole %in% c('Laborato… Yes        14      0.875   5.4
+#>  8        8         1 ( JobRole == 'Laboratory_T… Yes         6      0.875   5.4
+#>  9        9         1 ( Department == 'Sales' ) … Yes        13      0.867   5.4
+#> 10       10         1 ( TotalWorkingYears > 2 )   No       1347      0.864   1  
+#> # ℹ 14 more rows
+#> ----------------------------------------------
+
+tidy_c5[["rule_nbr"]] = 1:nrow(tidy_c5)
+new_tidy_c5 = set_keys(tidy_c5, NULL) # remove all keys
+new_tidy_c5
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 24
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>    rule_nbr trial_nbr LHS                         RHS   support confidence  lift
+#>       <int>     <int> <chr>                       <fct>   <int>      <dbl> <dbl>
+#>  1        1         1 ( JobLevel <= 1 ) & ( Mont… Yes        16      0.944   5.9
+#>  2        2         1 ( EnvironmentSatisfaction … No        521      0.941   1.1
+#>  3        3         1 ( DailyRate <= 722 ) & ( J… Yes        13      0.933   5.8
+#>  4        4         1 ( JobRole == 'Research_Sci… No        195      0.924   1.1
+#>  5        5         1 ( EnvironmentSatisfaction … Yes         9      0.909   5.6
+#>  6        6         1 ( EnvironmentSatisfaction … Yes         9      0.909   5.6
+#>  7        7         1 ( JobRole %in% c('Laborato… Yes        14      0.875   5.4
+#>  8        8         1 ( JobRole == 'Laboratory_T… Yes         6      0.875   5.4
+#>  9        9         1 ( Department == 'Sales' ) … Yes        13      0.867   5.4
+#> 10       10         1 ( TotalWorkingYears > 2 )   No       1347      0.864   1  
+#> # ℹ 14 more rows
+#> ----------------------------------------------
+
+new_2_tidy_c5 = set_keys(new_tidy_c5, "trial_nbr") # set "trial_nbr" as key
+new_2_tidy_c5
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: trial_nbr
+#> ▶ Number of distinct keys: 1
+#> ▶ Number of rules: 24
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>    rule_nbr trial_nbr LHS                         RHS   support confidence  lift
+#>       <int>     <int> <chr>                       <fct>   <int>      <dbl> <dbl>
+#>  1        1         1 ( JobLevel <= 1 ) & ( Mont… Yes        16      0.944   5.9
+#>  2        2         1 ( EnvironmentSatisfaction … No        521      0.941   1.1
+#>  3        3         1 ( DailyRate <= 722 ) & ( J… Yes        13      0.933   5.8
+#>  4        4         1 ( JobRole == 'Research_Sci… No        195      0.924   1.1
+#>  5        5         1 ( EnvironmentSatisfaction … Yes         9      0.909   5.6
+#>  6        6         1 ( EnvironmentSatisfaction … Yes         9      0.909   5.6
+#>  7        7         1 ( JobRole %in% c('Laborato… Yes        14      0.875   5.4
+#>  8        8         1 ( JobRole == 'Laboratory_T… Yes         6      0.875   5.4
+#>  9        9         1 ( Department == 'Sales' ) … Yes        13      0.867   5.4
+#> 10       10         1 ( TotalWorkingYears > 2 )   No       1347      0.864   1  
+#> # ℹ 14 more rows
+#> ----------------------------------------------
+
+# Note that `tidy_c5` and `new_tidy_c5` are not altered.
+tidy_c5
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: trial_nbr
+#> ▶ Number of distinct keys: 1
+#> ▶ Number of rules: 24
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>    rule_nbr trial_nbr LHS                         RHS   support confidence  lift
+#>       <int>     <int> <chr>                       <fct>   <int>      <dbl> <dbl>
+#>  1        1         1 ( JobLevel <= 1 ) & ( Mont… Yes        16      0.944   5.9
+#>  2        2         1 ( EnvironmentSatisfaction … No        521      0.941   1.1
+#>  3        3         1 ( DailyRate <= 722 ) & ( J… Yes        13      0.933   5.8
+#>  4        4         1 ( JobRole == 'Research_Sci… No        195      0.924   1.1
+#>  5        5         1 ( EnvironmentSatisfaction … Yes         9      0.909   5.6
+#>  6        6         1 ( EnvironmentSatisfaction … Yes         9      0.909   5.6
+#>  7        7         1 ( JobRole %in% c('Laborato… Yes        14      0.875   5.4
+#>  8        8         1 ( JobRole == 'Laboratory_T… Yes         6      0.875   5.4
+#>  9        9         1 ( Department == 'Sales' ) … Yes        13      0.867   5.4
+#> 10       10         1 ( TotalWorkingYears > 2 )   No       1347      0.864   1  
+#> # ℹ 14 more rows
+#> ----------------------------------------------
+new_tidy_c5
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 24
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>    rule_nbr trial_nbr LHS                         RHS   support confidence  lift
+#>       <int>     <int> <chr>                       <fct>   <int>      <dbl> <dbl>
+#>  1        1         1 ( JobLevel <= 1 ) & ( Mont… Yes        16      0.944   5.9
+#>  2        2         1 ( EnvironmentSatisfaction … No        521      0.941   1.1
+#>  3        3         1 ( DailyRate <= 722 ) & ( J… Yes        13      0.933   5.8
+#>  4        4         1 ( JobRole == 'Research_Sci… No        195      0.924   1.1
+#>  5        5         1 ( EnvironmentSatisfaction … Yes         9      0.909   5.6
+#>  6        6         1 ( EnvironmentSatisfaction … Yes         9      0.909   5.6
+#>  7        7         1 ( JobRole %in% c('Laborato… Yes        14      0.875   5.4
+#>  8        8         1 ( JobRole == 'Laboratory_T… Yes         6      0.875   5.4
+#>  9        9         1 ( Department == 'Sales' ) … Yes        13      0.867   5.4
+#> 10       10         1 ( TotalWorkingYears > 2 )   No       1347      0.864   1  
+#> # ℹ 14 more rows
+#> ----------------------------------------------
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/set_validation_data.html b/docs/reference/set_validation_data.html new file mode 100644 index 0000000..01e3c09 --- /dev/null +++ b/docs/reference/set_validation_data.html @@ -0,0 +1,224 @@ + +Add validation_data to a rulelist — set_validation_data • tidyrules + + +
+
+ + + +
+
+ + +
+

Returns a rulelist with three new attributes set: +validation_data, y_name and weight. Methods such as +augment, calculate, +prune, reorder require this to be set.

+
+ +
+
set_validation_data(x, validation_data, y_name, weight = 1)
+
+ +
+

Arguments

+
x
+

A rulelist

+ + +
validation_data
+

(dataframe) Data to used for computing some metrics. +It is expected to contain y_name column.

+ + +
y_name
+

(string) Name of the dependent variable column.

+ + +
weight
+

(non-negative numeric vector, default: 1) Weight per +observation/row of validation_data. This is expected to have same length +as the number of rows in validation_data. Only exception is when it is a +single positive number, which means that all rows have equal weight.

+ +
+
+

Value

+ + +

A rulelist with some extra attributes set.

+
+
+

See also

+

rulelist, tidy, augment, +predict, calculate, +prune, reorder

+

Other Core Rulelist Utility: +set_keys()

+
+ +
+

Examples

+
att = modeldata::attrition
+set.seed(100)
+index = sample(c(TRUE, FALSE), nrow(att), replace = TRUE)
+model_c5 = C50::C5.0(Attrition ~., data = att[index, ], rules = TRUE)
+
+tidy_c5 = tidy(model_c5)
+tidy_c5
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: trial_nbr
+#> ▶ Number of distinct keys: 1
+#> ▶ Number of rules: 23
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>    rule_nbr trial_nbr LHS                         RHS   support confidence  lift
+#>       <int>     <int> <chr>                       <fct>   <int>      <dbl> <dbl>
+#>  1        1         1 ( Age > 30 ) & ( DistanceF… No         69      0.986   1.2
+#>  2        2         1 ( DistanceFromHome <= 12 )… No        149      0.960   1.1
+#>  3        3         1 ( Department == 'Research_… No        211      0.953   1.1
+#>  4        4         1 ( Age > 30 ) & ( DistanceF… No        249      0.948   1.1
+#>  5        5         1 ( JobInvolvement %in% c('M… No        353      0.944   1.1
+#>  6        6         1 ( OverTime == 'No' ) & ( S… No        263      0.943   1.1
+#>  7        7         1 ( Education %in% c('Master… No        101      0.942   1.1
+#>  8        8         1 ( OverTime == 'No' ) & ( R… No         95      0.938   1.1
+#>  9        9         1 ( BusinessTravel %in% c('N… No        352      0.915   1.1
+#> 10       10         1 ( Education %in% c('Below_… No        265      0.910   1.1
+#> # ℹ 13 more rows
+#> ----------------------------------------------
+
+tidy_c5_2 = set_validation_data(tidy_c5,
+                                validation_data = att[!index, ],
+                                y_name = "Attrition",
+                                weight = 1 # default
+                                )
+tidy_c5_2
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: trial_nbr
+#> ▶ Number of distinct keys: 1
+#> ▶ Number of rules: 23
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: TRUE
+#> 
+#> 
+#>    rule_nbr trial_nbr LHS                         RHS   support confidence  lift
+#>       <int>     <int> <chr>                       <fct>   <int>      <dbl> <dbl>
+#>  1        1         1 ( Age > 30 ) & ( DistanceF… No         69      0.986   1.2
+#>  2        2         1 ( DistanceFromHome <= 12 )… No        149      0.960   1.1
+#>  3        3         1 ( Department == 'Research_… No        211      0.953   1.1
+#>  4        4         1 ( Age > 30 ) & ( DistanceF… No        249      0.948   1.1
+#>  5        5         1 ( JobInvolvement %in% c('M… No        353      0.944   1.1
+#>  6        6         1 ( OverTime == 'No' ) & ( S… No        263      0.943   1.1
+#>  7        7         1 ( Education %in% c('Master… No        101      0.942   1.1
+#>  8        8         1 ( OverTime == 'No' ) & ( R… No         95      0.938   1.1
+#>  9        9         1 ( BusinessTravel %in% c('N… No        352      0.915   1.1
+#> 10       10         1 ( Education %in% c('Below_… No        265      0.910   1.1
+#> # ℹ 13 more rows
+#> ----------------------------------------------
+tidy_c5 # not altered
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: trial_nbr
+#> ▶ Number of distinct keys: 1
+#> ▶ Number of rules: 23
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>    rule_nbr trial_nbr LHS                         RHS   support confidence  lift
+#>       <int>     <int> <chr>                       <fct>   <int>      <dbl> <dbl>
+#>  1        1         1 ( Age > 30 ) & ( DistanceF… No         69      0.986   1.2
+#>  2        2         1 ( DistanceFromHome <= 12 )… No        149      0.960   1.1
+#>  3        3         1 ( Department == 'Research_… No        211      0.953   1.1
+#>  4        4         1 ( Age > 30 ) & ( DistanceF… No        249      0.948   1.1
+#>  5        5         1 ( JobInvolvement %in% c('M… No        353      0.944   1.1
+#>  6        6         1 ( OverTime == 'No' ) & ( S… No        263      0.943   1.1
+#>  7        7         1 ( Education %in% c('Master… No        101      0.942   1.1
+#>  8        8         1 ( OverTime == 'No' ) & ( R… No         95      0.938   1.1
+#>  9        9         1 ( BusinessTravel %in% c('N… No        352      0.915   1.1
+#> 10       10         1 ( Education %in% c('Below_… No        265      0.910   1.1
+#> # ℹ 13 more rows
+#> ----------------------------------------------
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/strHead.html b/docs/reference/strHead.html index 87ae332..9ccc21a 100644 --- a/docs/reference/strHead.html +++ b/docs/reference/strHead.html @@ -1,67 +1,12 @@ - - - - - - - -Vectorized semantic equivalent of 'head' for a string — strHead • tidyrules - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Vectorized semantic equivalent of 'head' for a string — strHead • tidyrules + + - - - - -
-
- -
- -
+
@@ -132,58 +62,63 @@

Vectorized semantic equivalent of 'head' for a string

Picks the substring starting from the first character

-
strHead(string, n)
+
+
strHead(string, n)
+
-

Arguments

- - - - - - - - - - -
string

string

n

(integer) Number of characters

+
+

Arguments

+
string
+

string

-

Value

-

A string

-

Details

+
n
+

(integer) Number of characters

-

'n' can be in the interval [-len + 1, len] (both ends inclusive)

+
+
+

Value

+ -

Examples

-
# \donttest{ -tidyrules:::strHead(c("string", "string2"), 2)
#> [1] "st" "st"
tidyrules:::strHead(c("string", "string2"), -1)
#> [1] "strin" "string"
# } +

A string

+
+
+

Details

+

'n' can be in the interval [-len + 1, len] (both ends inclusive)

+
-
+
+

Examples

+
# \donttest{
+tidyrules:::strHead(c("string", "string2"), 2)
+#> [1] "st" "st"
+tidyrules:::strHead(c("string", "string2"), -1)
+#> [1] "strin"  "string"
+# }
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/strReplaceReduce.html b/docs/reference/strReplaceReduce.html index 23ccb5f..2dc463a 100644 --- a/docs/reference/strReplaceReduce.html +++ b/docs/reference/strReplaceReduce.html @@ -1,67 +1,12 @@ - - - - - - - -Sequential string replace — strReplaceReduce • tidyrules - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Sequential string replace — strReplaceReduce • tidyrules - - + + - - -
-
- -
- -
+
@@ -132,59 +62,61 @@

Sequential string replace

Sequential string replace via reduce

-
strReplaceReduce(string, pattern, replacement)
- -

Arguments

- - - - - - - - - - - - - - -
string

string

pattern

pattern

replacement

replacement

- -

Value

- -

character vector

- -

Examples

-
# \donttest{ -tidyrules:::strReplaceReduce("abcd", c("ab", "dc"), c("cd", "ab"))
#> [1] "cabd"
# } - -
+
+
strReplaceReduce(string, pattern, replacement)
+
+ +
+

Arguments

+
string
+

string

+ + +
pattern
+

pattern

+ + +
replacement
+

replacement

+ +
+
+

Value

+ + +

character vector

+
+ +
+

Examples

+
# \donttest{
+tidyrules:::strReplaceReduce("abcd", c("ab", "dc"), c("cd", "ab"))
+#> [1] "cabd"
+# }
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/strSplitSingle.html b/docs/reference/strSplitSingle.html index f108989..c1ee8f7 100644 --- a/docs/reference/strSplitSingle.html +++ b/docs/reference/strSplitSingle.html @@ -1,67 +1,12 @@ - - - - - - - -String split a string — strSplitSingle • tidyrules - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -String split a string — strSplitSingle • tidyrules - - + + - - -
-
- -
- -
+
@@ -132,55 +62,57 @@

String split a string

and return a character vector (not a list)

-
strSplitSingle(string, pattern)
+
+
strSplitSingle(string, pattern)
+
+ +
+

Arguments

+
string
+

A string

-

Arguments

- - - - - - - - - - -
string

A string

pattern

Passed as-is to 'stringr::str_split'

-

Value

+
pattern
+

Passed as-is to 'stringr::str_split'

-

A character vector

+
+
+

Value

+ -

Examples

-
# \donttest{ -tidyrules:::strSplitSingle("abc,d", ",")
#> [1] "abc" "d"
# } +

A character vector

+
-
+
+

Examples

+
# \donttest{
+tidyrules:::strSplitSingle("abc,d", ",")
+#> [1] "abc" "d"  
+# }
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/strTail.html b/docs/reference/strTail.html index 5174c2b..6ea9cb5 100644 --- a/docs/reference/strTail.html +++ b/docs/reference/strTail.html @@ -1,67 +1,12 @@ - - - - - - - -Vectorized semantic equivalent of tail for a string — strTail • tidyrules - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Vectorized semantic equivalent of tail for a string — strTail • tidyrules + + - - - - -
-
- -
- -
+
@@ -132,58 +62,63 @@

Vectorized semantic equivalent of tail for a string

Picks the substring starting from the first character

-
strTail(string, n)
+
+
strTail(string, n)
+
-

Arguments

- - - - - - - - - - -
string

string

n

(integer) Number of characters

+
+

Arguments

+
string
+

string

-

Value

-

A string

-

Details

+
n
+

(integer) Number of characters

-

'n' can be in the interval [-len + 1, len] (both ends inclusive)

+
+
+

Value

+ -

Examples

-
# \donttest{ -tidyrules:::strTail(c("string", "string2"), 2)
#> [1] "ng" "g2"
tidyrules:::strTail(c("string", "string2"), -1)
#> [1] "tring" "tring2"
# } +

A string

+
+
+

Details

+

'n' can be in the interval [-len + 1, len] (both ends inclusive)

+
-
+
+

Examples

+
# \donttest{
+tidyrules:::strTail(c("string", "string2"), 2)
+#> [1] "ng" "g2"
+tidyrules:::strTail(c("string", "string2"), -1)
+#> [1] "tring"  "tring2"
+# }
+
+
+
+
-
- +
- - + + diff --git a/docs/reference/tidy.C5.0.html b/docs/reference/tidy.C5.0.html new file mode 100644 index 0000000..3248c50 --- /dev/null +++ b/docs/reference/tidy.C5.0.html @@ -0,0 +1,161 @@ + +Get the rulelist from a C5 model — tidy.C5.0 • tidyrules + + +
+
+ + + +
+
+ + +
+

Each row corresponds to a rule per trial_nbr

+
+ +
+
# S3 method for C5.0
+tidy(x, ...)
+
+ +
+

Arguments

+
x
+

C50::C5.0 model fitted with rules = TRUE

+ + +
...
+

Other arguments (See details)

+ +
+
+

Value

+ + +

A rulelist object

+
+
+

Details

+ +
  • The output columns are: rule_nbr, trial_nbr, LHS, RHS, +support, confidence, lift.

  • +
  • Rules per trial_nbr are sorted in this order: desc(confidence), +desc(lift), desc(support).

  • +

Optional named arguments:

  • laplace (flag, default: TRUE) is supported. This +computes confidence with laplace correction as documented under 'Rulesets' +here: C5 doc.

  • +
+
+

See also

+

rulelist, tidy, augment, +predict, calculate, +prune, reorder

+

Other Core Tidy Utility: +tidy(), +tidy.cubist(), +tidy.rpart()

+
+ +
+

Examples

+
model_c5 = C50::C5.0(Attrition ~., data = modeldata::attrition, rules = TRUE)
+tidy(model_c5)
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: trial_nbr
+#> ▶ Number of distinct keys: 1
+#> ▶ Number of rules: 24
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>    rule_nbr trial_nbr LHS                         RHS   support confidence  lift
+#>       <int>     <int> <chr>                       <fct>   <int>      <dbl> <dbl>
+#>  1        1         1 ( JobLevel <= 1 ) & ( Mont… Yes        16      0.944   5.9
+#>  2        2         1 ( EnvironmentSatisfaction … No        521      0.941   1.1
+#>  3        3         1 ( DailyRate <= 722 ) & ( J… Yes        13      0.933   5.8
+#>  4        4         1 ( JobRole == 'Research_Sci… No        195      0.924   1.1
+#>  5        5         1 ( EnvironmentSatisfaction … Yes         9      0.909   5.6
+#>  6        6         1 ( EnvironmentSatisfaction … Yes         9      0.909   5.6
+#>  7        7         1 ( JobRole %in% c('Laborato… Yes        14      0.875   5.4
+#>  8        8         1 ( JobRole == 'Laboratory_T… Yes         6      0.875   5.4
+#>  9        9         1 ( Department == 'Sales' ) … Yes        13      0.867   5.4
+#> 10       10         1 ( TotalWorkingYears > 2 )   No       1347      0.864   1  
+#> # ℹ 14 more rows
+#> ----------------------------------------------
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/tidy.constparty.html b/docs/reference/tidy.constparty.html new file mode 100644 index 0000000..6fb17de --- /dev/null +++ b/docs/reference/tidy.constparty.html @@ -0,0 +1,173 @@ + +Get the rulelist from a party model — tidy.constparty • tidyrules + + +
+
+ + + +
+
+ + +
+

Each row corresponds to a rule

+
+ +
+
# S3 method for constparty
+tidy(x, ...)
+
+ +
+

Arguments

+
x
+

partykit::party model typically built using partykit::ctree

+ + +
...
+

Other arguments (currently unused)

+ +
+
+

Value

+ + +

A rulelist object

+
+
+

Details

+

These types of party models are supported: +regression (y is numeric), classification (y is factor)

+

For party classification model:

  • Output columns are: rule_nbr, LHS, RHS, support, confidence, lift, terminal_node_id.

  • +
  • Rules are sorted in this order: desc(confidence), desc(lift), +desc(support).

  • +

For party regression model:

  • Output columns are: rule_nbr, LHS, RHS, support, IQR, RMSE, terminal_node_id.

  • +
  • Rules are sorted in this order: RMSE, desc(support).

  • +
+ + +
+

Examples

+
pen = palmerpenguins::penguins
+model_class_party = partykit::ctree(species ~ ., data = pen)
+tidy(model_class_party)
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 6
+#> ▶ Model type: constparty
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>   rule_nbr LHS                   RHS   support confidence  lift terminal_node_id
+#>      <int> <chr>                 <fct>   <dbl>      <dbl> <dbl> <chr>           
+#> 1        1 ( island %in% c('Bis… Gent…     123      1      2.77 6               
+#> 2        2 ( island %in% c('Bis… Adel…      38      1      2.26 4               
+#> 3        3 ( island %in% c('Dre… Adel…     100      0.99   2.24 9               
+#> 4        4 ( island %in% c('Dre… Chin…      64      0.969  4.90 11              
+#> 5        5 ( island %in% c('Bis… Adel…       7      0.857  1.94 5               
+#> 6        6 ( island %in% c('Dre… Adel…      12      0.583  1.32 10              
+#> ----------------------------------------------
+model_regr_party = partykit::ctree(bill_length_mm ~ ., data = pen)
+tidy(model_regr_party)
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 9
+#> ▶ Model type: constparty
+#> ▶ Estimation type: regression
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>   rule_nbr LHS                          RHS support   IQR  RMSE terminal_node_id
+#>      <int> <chr>                      <dbl>   <dbl> <dbl> <dbl> <chr>           
+#> 1        1 ( species %in% c('Chinstr…  48.3       8  1.03 0.884 15              
+#> 2        2 ( species %in% c('Adelie'…  39.4      30  2.18 1.38  5               
+#> 3        3 ( species %in% c('Chinstr…  50.2      28  1.28 1.88  16              
+#> 4        4 ( species %in% c('Chinstr…  47.4      19  2.80 1.97  13              
+#> 5        5 ( species %in% c('Adelie'…  37.2      78  2.80 2.00  3               
+#> 6        6 ( species %in% c('Chinstr…  45.5      77  3.1  2.08  9               
+#> 7        7 ( species %in% c('Chinstr…  51.3      43  1.95 2.31  17              
+#> 8        8 ( species %in% c('Adelie'…  41.1      44  2.47 2.48  6               
+#> 9        9 ( species %in% c('Chinstr…  47.7      17  4.1  3.27  10              
+#> ----------------------------------------------
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/tidy.cubist.html b/docs/reference/tidy.cubist.html new file mode 100644 index 0000000..1f51b59 --- /dev/null +++ b/docs/reference/tidy.cubist.html @@ -0,0 +1,158 @@ + +Get the rulelist from a cubist model — tidy.cubist • tidyrules + + +
+
+ + + +
+
+ + +
+

Each row corresponds to a rule per committee

+
+ +
+
# S3 method for cubist
+tidy(x, ...)
+
+ +
+

Arguments

+
x
+

Cubist::cubist model

+ + +
...
+

Other arguments (currently unused)

+ +
+
+

Value

+ + +

A rulelist object

+
+
+

Details

+ +
  • The output columns are: rule_nbr, committee, LHS, RHS, support, mean, min, max, error.

  • +
  • Rules are sorted in this order per committee: +error, desc(support)

  • +
+
+

See also

+

rulelist, tidy, augment, +predict, calculate, +prune, reorder

+

Other Core Tidy Utility: +tidy(), +tidy.C5.0(), +tidy.rpart()

+
+ +
+

Examples

+
att = modeldata::attrition
+cols_att    = setdiff(colnames(att), c("MonthlyIncome", "Attrition"))
+model_cubist = Cubist::cubist(x = att[, cols_att],
+                              y = att[["MonthlyIncome"]]
+                              )
+tidy(model_cubist)
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: committee
+#> ▶ Number of distinct keys: 1
+#> ▶ Number of rules: 8
+#> ▶ Model type: cubist
+#> ▶ Estimation type: regression
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>   rule_nbr committee LHS                  RHS   support   mean   min   max error
+#>      <int>     <int> <chr>                <chr>   <int>  <dbl> <dbl> <dbl> <dbl>
+#> 1        1         1 ( JobLevel > 1 ) & … (-20…      57  4459   2272  5301  328.
+#> 2        2         1 ( JobLevel > 4 )     (136…      69 19192. 18041 19999  416 
+#> 3        3         1 ( JobRole %in% c('M… (-13…      26 12857. 11031 17603  517.
+#> 4        4         1 ( JobLevel <= 1 )    (226…     543  2787.  1009  4968  559.
+#> 5        5         1 ( JobLevel <= 4 ) &… (416…      87 15824  12061 17924  694.
+#> 6        6         1 ( JobRole %in% c('H… (-11…     245  8469.  2592 13973  932.
+#> 7        7         1 ( JobLevel > 1 ) & … (185…     387  6261.  2176  9998  995.
+#> 8        8         1 ( JobLevel > 1 ) & … (-39…     124  4672.  2042  9724 1013.
+#> ----------------------------------------------
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/tidy.html b/docs/reference/tidy.html new file mode 100644 index 0000000..bb1e2f0 --- /dev/null +++ b/docs/reference/tidy.html @@ -0,0 +1,115 @@ + +tidy is re-export of generics::tidy from tidyrules package — tidy • tidyrules + + +
+
+ + + +
+
+ + +
+

tidy applied on a supported model fit creates a rulelist. +See Also section links to documentation of specific methods.

+
+ +
+
tidy(x, ...)
+
+ +
+

Arguments

+
x
+

A supported model object

+ + +
...
+

For model specific implementations to use

+ +
+
+

See also

+

rulelist, tidy, augment, +predict, calculate, +prune, reorder

+

Other Core Tidy Utility: +tidy.C5.0(), +tidy.cubist(), +tidy.rpart()

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/tidy.rpart.html b/docs/reference/tidy.rpart.html new file mode 100644 index 0000000..fcd597d --- /dev/null +++ b/docs/reference/tidy.rpart.html @@ -0,0 +1,171 @@ + +Get the rulelist from a rpart model — tidy.rpart • tidyrules + + +
+
+ + + +
+
+ + +
+

Each row corresponds to a rule

+
+ +
+
# S3 method for rpart
+tidy(x, ...)
+
+ +
+

Arguments

+
x
+

rpart::rpart model

+ + +
...
+

Other arguments (currently unused)

+ +
+
+

Value

+ + +

A rulelist object

+
+
+

Details

+

For rpart rules, one should build the model without ordered factor variable. We recommend you to convert ordered factor to factor or integer class.

+

For rpart::rpart classification model:

  • Output columns are: rule_nbr, LHS, RHS, support, confidence, lift.

  • +
  • The rules are sorted in this order: desc(confidence), desc(lift), +desc(support).

  • +

For rpart::rpart regression(anova) model:

  • Output columns are: rule_nbr, LHS, RHS, support.

  • +
  • The rules are sorted in this order: desc(support).

  • +
+
+

See also

+

rulelist, tidy, augment, +predict, calculate, +prune, reorder

+

Other Core Tidy Utility: +tidy(), +tidy.C5.0(), +tidy.cubist()

+
+ +
+

Examples

+
model_class_rpart = rpart::rpart(Species ~ ., data = iris)
+tidy(model_class_rpart)
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 3
+#> ▶ Model type: rpart
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>   rule_nbr LHS                                    RHS   support confidence  lift
+#>      <int> <chr>                                  <fct>   <int>      <dbl> <dbl>
+#> 1        1 ( Petal.Length < 2.45 )                seto…      50      0.981  2.94
+#> 2        2 ( Petal.Length >= 2.45 ) & ( Petal.Wi… virg…      46      0.958  2.88
+#> 3        3 ( Petal.Length >= 2.45 ) & ( Petal.Wi… vers…      54      0.893  2.68
+#> ----------------------------------------------
+
+model_regr_rpart = rpart::rpart(Sepal.Length ~ ., data = iris)
+tidy(model_regr_rpart)
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: NULL
+#> ▶ Number of rules: 7
+#> ▶ Model type: rpart
+#> ▶ Estimation type: regression
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>   rule_nbr LHS                                                       RHS support
+#>      <int> <chr>                                                   <dbl>   <int>
+#> 1        1 ( Petal.Length < 4.25 ) & ( Petal.Length < 3.4 ) & ( S…  5.17      33
+#> 2        2 ( Petal.Length >= 4.25 ) & ( Petal.Length < 6.05 ) & (…  6.05      33
+#> 3        3 ( Petal.Length >= 4.25 ) & ( Petal.Length < 6.05 ) & (…  6.60      25
+#> 4        4 ( Petal.Length < 4.25 ) & ( Petal.Length < 3.4 ) & ( S…  4.73      20
+#> 5        5 ( Petal.Length < 4.25 ) & ( Petal.Length >= 3.4 )        5.64      20
+#> 6        6 ( Petal.Length >= 4.25 ) & ( Petal.Length < 6.05 ) & (…  6.53      10
+#> 7        7 ( Petal.Length >= 4.25 ) & ( Petal.Length >= 6.05 )      7.58       9
+#> ----------------------------------------------
+
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/to_sql_case.html b/docs/reference/to_sql_case.html new file mode 100644 index 0000000..26ec0e1 --- /dev/null +++ b/docs/reference/to_sql_case.html @@ -0,0 +1,184 @@ + +Extract SQL case statement from a rulelist — to_sql_case • tidyrules + + +
+
+ + + +
+
+ + +
+

Extract SQL case statement from a rulelist

+
+ +
+
to_sql_case(rulelist, rhs_column_name = "RHS", output_colname = "output")
+
+ +
+

Arguments

+
rulelist
+

A rulelist object

+ + +
rhs_column_name
+

(string, default: "RHS") Name of the column in the +rulelist to be used as RHS (WHEN some_rule THEN rhs) in the sql case +statement

+ + +
output_colname
+

(string, default: "output") Name of the output column +created by the SQL statement (used in case ... AS output_column)

+ +
+
+

Value

+ + +

(string invisibly) SQL case statement

+
+
+

Details

+

As a side-effect, the SQL statement is cat to stdout. The output +contains newline character.

+
+
+

See also

+

rulelist, tidy, augment, predict, convert_rule_flavor

+

Other Auxiliary Rulelist Utility: +convert_rule_flavor()

+
+ +
+

Examples

+
model_c5 = C50::C5.0(Attrition ~., data = modeldata::attrition, rules = TRUE)
+tidy(model_c5)
+#> ---- Rulelist --------------------------------
+#> ▶ Keys: trial_nbr
+#> ▶ Number of distinct keys: 1
+#> ▶ Number of rules: 24
+#> ▶ Model type: C5
+#> ▶ Estimation type: classification
+#> ▶ Is validation data set: FALSE
+#> 
+#> 
+#>    rule_nbr trial_nbr LHS                         RHS   support confidence  lift
+#>       <int>     <int> <chr>                       <fct>   <int>      <dbl> <dbl>
+#>  1        1         1 ( JobLevel <= 1 ) & ( Mont… Yes        16      0.944   5.9
+#>  2        2         1 ( EnvironmentSatisfaction … No        521      0.941   1.1
+#>  3        3         1 ( DailyRate <= 722 ) & ( J… Yes        13      0.933   5.8
+#>  4        4         1 ( JobRole == 'Research_Sci… No        195      0.924   1.1
+#>  5        5         1 ( EnvironmentSatisfaction … Yes         9      0.909   5.6
+#>  6        6         1 ( EnvironmentSatisfaction … Yes         9      0.909   5.6
+#>  7        7         1 ( JobRole %in% c('Laborato… Yes        14      0.875   5.4
+#>  8        8         1 ( JobRole == 'Laboratory_T… Yes         6      0.875   5.4
+#>  9        9         1 ( Department == 'Sales' ) … Yes        13      0.867   5.4
+#> 10       10         1 ( TotalWorkingYears > 2 )   No       1347      0.864   1  
+#> # ℹ 14 more rows
+#> ----------------------------------------------
+to_sql_case(tidy(model_c5))
+#> CASE
+#> WHEN ( JobLevel <= 1 ) AND ( MonthlyIncome <= 2468 ) AND ( OverTime = 'Yes' ) AND ( TotalWorkingYears > 2 ) AND ( YearsAtCompany <= 3 ) THEN 'Yes'
+#> WHEN ( EnvironmentSatisfaction IN ('Medium', 'High', 'Very_High') ) AND ( JobInvolvement IN ('Medium', 'High', 'Very_High') ) AND ( OverTime = 'No' ) AND ( TrainingTimesLastYear > 1 ) AND ( WorkLifeBalance IN ('Better', 'Best') ) THEN 'No'
+#> WHEN ( DailyRate <= 722 ) AND ( JobLevel <= 1 ) AND ( MonthlyIncome <= 2468 ) AND ( OverTime = 'Yes' ) AND ( TotalWorkingYears > 2 ) THEN 'Yes'
+#> WHEN ( JobRole = 'Research_Scientist' ) AND ( OverTime = 'No' ) THEN 'No'
+#> WHEN ( EnvironmentSatisfaction IN ('Low', 'Medium') ) AND ( MaritalStatus IN ('Divorced', 'Married') ) AND ( NumCompaniesWorked > 4 ) AND ( OverTime = 'Yes' ) AND ( PerformanceRating = 'Excellent' ) AND ( RelationshipSatisfaction IN ('Low', 'Medium', 'High') ) THEN 'Yes'
+#> WHEN ( EnvironmentSatisfaction IN ('Low', 'Medium') ) AND ( Gender = 'Male' ) AND ( MaritalStatus IN ('Divorced', 'Married') ) AND ( NumCompaniesWorked > 4 ) AND ( OverTime = 'Yes' ) AND ( PerformanceRating = 'Excellent' ) THEN 'Yes'
+#> WHEN ( JobRole IN ('Laboratory_Technician', 'Sales_Representative') ) AND ( MonthlyIncome <= 2657 ) AND ( TotalWorkingYears <= 2 ) AND ( WorkLifeBalance IN ('Bad', 'Good') ) THEN 'Yes'
+#> WHEN ( JobRole = 'Laboratory_Technician' ) AND ( MaritalStatus = 'Single' ) AND ( MonthlyIncome > 2468 ) AND ( OverTime = 'Yes' ) AND ( TrainingTimesLastYear <= 2 ) THEN 'Yes'
+#> WHEN ( Department = 'Sales' ) AND ( MaritalStatus = 'Single' ) AND ( OverTime = 'Yes' ) AND ( YearsSinceLastPromotion > 1 ) THEN 'Yes'
+#> WHEN ( TotalWorkingYears > 2 ) THEN 'No'
+#> WHEN ( Age <= 44 ) AND ( OverTime = 'No' ) AND ( TotalWorkingYears > 2 ) AND ( WorkLifeBalance = 'Bad' ) AND ( YearsWithCurrManager <= 0 ) THEN 'Yes'
+#> WHEN ( EducationField = 'Life_Sciences' ) AND ( EnvironmentSatisfaction IN ('Low', 'Medium') ) AND ( JobInvolvement = 'Low' ) AND ( MonthlyIncome > 2468 ) AND ( OverTime = 'Yes' ) THEN 'Yes'
+#> WHEN ( JobInvolvement = 'Low' ) AND ( OverTime = 'No' ) AND ( TotalWorkingYears <= 2 ) THEN 'Yes'
+#> WHEN ( Department = 'Sales' ) AND ( JobInvolvement IN ('Low', 'Medium') ) AND ( JobSatisfaction IN ('Medium', 'High') ) AND ( MaritalStatus = 'Single' ) AND ( OverTime = 'Yes' ) AND ( YearsSinceLastPromotion <= 1 ) THEN 'Yes'
+#> WHEN ( JobRole IN ('Laboratory_Technician', 'Sales_Representative') ) AND ( TotalWorkingYears <= 2 ) AND ( TrainingTimesLastYear <= 1 ) THEN 'Yes'
+#> WHEN ( JobSatisfaction = 'High' ) AND ( OverTime = 'No' ) AND ( WorkLifeBalance = 'Bad' ) AND ( YearsSinceLastPromotion > 6 ) THEN 'Yes'
+#> WHEN ( Department = 'Sales' ) AND ( JobInvolvement = 'Very_High' ) AND ( MaritalStatus = 'Single' ) AND ( OverTime = 'Yes' ) THEN 'Yes'
+#> WHEN ( EnvironmentSatisfaction = 'Low' ) AND ( JobRole = 'Laboratory_Technician' ) AND ( TotalWorkingYears <= 2 ) THEN 'Yes'
+#> WHEN ( JobRole = 'Human_Resources' ) AND ( TotalWorkingYears <= 2 ) THEN 'Yes'
+#> WHEN ( OverTime = 'Yes' ) AND ( TotalWorkingYears <= 2 ) THEN 'Yes'
+#> WHEN ( OverTime = 'No' ) AND ( TotalWorkingYears > 2 ) AND ( WorkLifeBalance = 'Bad' ) AND ( YearsWithCurrManager <= 0 ) THEN 'Yes'
+#> WHEN ( Department = 'Sales' ) AND ( MaritalStatus = 'Single' ) AND ( OverTime = 'Yes' ) THEN 'Yes'
+#> WHEN ( OverTime = 'No' ) AND ( WorkLifeBalance = 'Bad' ) AND ( YearsSinceLastPromotion > 6 ) THEN 'Yes'
+#> WHEN ( EnvironmentSatisfaction IN ('Low', 'Medium') ) AND ( MonthlyRate > 16620 ) AND ( OverTime = 'Yes' ) THEN 'Yes'
+#> ELSE NULL
+#> END AS output
+
+
+
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/varSpec.html b/docs/reference/varSpec.html index 2d81c98..80c5fb6 100644 --- a/docs/reference/varSpec.html +++ b/docs/reference/varSpec.html @@ -1,68 +1,13 @@ - - - - - - - -Get variable specification for a Cubist/C5 object — varSpec • tidyrules - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Get variable specification for a Cubist/C5 object — varSpec • tidyrules + + - - - - -
-
- -
- -
+

Obtain variable names, type (numeric, ordered, factor) and - levels as a tibble

+levels as a tidytable

-
varSpec(object)
- -

Arguments

- - - - - - -
object

Cubist/C5 object

- -

Value

- -

A tibble with three columns: variable(character), type(character) and - levels(a list-column). For numeric variables, levels are set to NA.

- -

Examples

-
data("attrition", package = "modeldata") -attrition <- tibble::as_tibble(attrition) -cols_att <- setdiff(colnames(attrition), c("MonthlyIncome", "Attrition")) - -cb_att <- - Cubist::cubist(x = attrition[, cols_att],y = attrition[["MonthlyIncome"]]) -varSpec(cb_att)
#> # A tibble: 29 x 3 -#> type levels variable -#> <chr> <list> <chr> -#> 1 numeric <chr [1]> Age -#> 2 factor <chr [3]> BusinessTravel -#> 3 numeric <chr [1]> DailyRate -#> 4 factor <chr [3]> Department -#> 5 numeric <chr [1]> DistanceFromHome -#> 6 ordered <chr [5]> Education -#> 7 factor <chr [6]> EducationField -#> 8 ordered <chr [4]> EnvironmentSatisfaction -#> 9 factor <chr [2]> Gender -#> 10 numeric <chr [1]> HourlyRate -#> # … with 19 more rows
+
+
varSpec(object)
+
+ +
+

Arguments

+
object
+

Cubist/C5 object

+ +
+
+

Value

+ + +

A tidytable with three columns: variable(character), type(character) +and levels(a list-column). For numeric variables, levels are set to NA.

+
+ +
+

Examples

+
if (FALSE) {
+data("attrition", package = "modeldata")
+cols_att = setdiff(colnames(attrition), c("MonthlyIncome", "Attrition"))
+
+cb_att = Cubist::cubist(x = attrition[, cols_att],
+                        y = attrition[["MonthlyIncome"]]
+                        )
+varSpec(cb_att)
+}
+
+
+
-
- +
- - + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml new file mode 100644 index 0000000..96176e9 --- /dev/null +++ b/docs/sitemap.xml @@ -0,0 +1,183 @@ + + + + /404.html + + + /articles/index.html + + + /articles/tidyrules_vignette.html + + + /articles/using_tidyrules.html + + + /authors.html + + + /index.html + + + /news/index.html + + + /reference/addBackquotes.html + + + /reference/as_rulelist.data.frame.html + + + /reference/as_rulelist.html + + + /reference/as_ruleset.html + + + /reference/augment.html + + + /reference/augment.rulelist.html + + + /reference/augment_class_keys.html + + + /reference/augment_class_no_keys.html + + + /reference/augment_regr_keys.html + + + /reference/augment_regr_no_keys.html + + + /reference/calculate.html + + + /reference/calculate.rulelist.html + + + /reference/convert_rule_flavor.html + + + /reference/index.html + + + /reference/package_tidyrules.html + + + /reference/plot.prune_rulelist.html + + + /reference/plot.rulelist.html + + + /reference/positionSpaceOutsideSinglequotes.html + + + /reference/predict.rulelist.html + + + /reference/predict.ruleset.html + + + /reference/predict_all_nokeys_rulelist.html + + + /reference/predict_all_rulelist.html + + + /reference/predict_nokeys_rulelist.html + + + /reference/predict_rulelist.html + + + /reference/print.prune_rulelist.html + + + /reference/print.rulelist.html + + + /reference/print.ruleset.html + + + /reference/prune.html + + + /reference/prune.rulelist.html + + + /reference/removeEmptyLines.html + + + /reference/reorder.html + + + /reference/reorder.rulelist.html + + + /reference/ruleRToPython.html + + + /reference/ruleRToSQL.html + + + /reference/rulelist.html + + + /reference/ruleset.html + + + /reference/set_keys.html + + + /reference/set_validation_data.html + + + /reference/strHead.html + + + /reference/strReplaceReduce.html + + + /reference/strSplitSingle.html + + + /reference/strTail.html + + + /reference/tidy.C5.0.html + + + /reference/tidy.constparty.html + + + /reference/tidy.cubist.html + + + /reference/tidy.html + + + /reference/tidy.rpart.html + + + /reference/tidyRules.C5.0.html + + + /reference/tidyRules.cubist.html + + + /reference/tidyRules.html + + + /reference/tidyRules.rpart.html + + + /reference/to_sql_case.html + + + /reference/varSpec.html + + diff --git a/inst/CITATION b/inst/CITATION new file mode 100644 index 0000000..514c005 --- /dev/null +++ b/inst/CITATION @@ -0,0 +1,18 @@ +citHeader("To cite tidyrules in publications use:") + +citEntry( + entry = "Manual", + title = "tidyrules", + author = "Srikanth Komala Sheshachala, Amith Kumar Ullur Raghavendra", + year = "2024", + url = "https://CRAN.R-project.org/package=tidyrules", + textVersion = paste( + "Srikanth Komala Sheshachala, Amith Kumar Ullur Raghavendra", + "(2024).", + "tidyrules", + "Utilities to Retrieve Rulelists from Model Fits, Filter, Prune, Reorder and Predict on unseen data.", + "R package version 4.2.0.", + "https://CRAN.R-project.org/package=tidyrules" + ), + doi = "10.32614/CRAN.package.tidyrules" +) diff --git a/man/.DS_Store b/man/.DS_Store index 5008ddf..4a33a6d 100644 Binary files a/man/.DS_Store and b/man/.DS_Store differ diff --git a/man/figures/tidyrules_schematic.png b/man/figures/tidyrules_schematic.png new file mode 100644 index 0000000..35eaed1 Binary files /dev/null and b/man/figures/tidyrules_schematic.png differ diff --git a/vignettes/.DS_Store b/vignettes/.DS_Store index 5008ddf..b637205 100644 Binary files a/vignettes/.DS_Store and b/vignettes/.DS_Store differ diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..097b241 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/articles/.DS_Store b/vignettes/articles/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/vignettes/articles/.DS_Store differ diff --git a/vignettes/articles/tidyrules_schematic.png b/vignettes/articles/tidyrules_schematic.png new file mode 100644 index 0000000..35eaed1 Binary files /dev/null and b/vignettes/articles/tidyrules_schematic.png differ diff --git a/vignettes/articles/using_tidyrules.Rmd b/vignettes/articles/using_tidyrules.Rmd new file mode 100644 index 0000000..adf5d59 --- /dev/null +++ b/vignettes/articles/using_tidyrules.Rmd @@ -0,0 +1,326 @@ +--- +title: "Using tidyrules" +author: "Srikanth KS, Amith Kumar UR" +date: "`r Sys.Date()`" +output: + html_document: + toc: true + toc_float: true + collapsed: false + smooth_scroll: false + highlight: zenburn + theme: readable + self_contained: yes + mode: selfcontained +vignette: > + %\VignetteIndexEntry{Using tidyrules} + %\VignetteEngine{knitr::rmarkdown} + \usepackage[utf8]{inputenc} +--- + +## Abstract + +> [tidyrules](https://cran.r-project.org/package=tidyrules) [R](https://www.r-project.org/) [package](https://cran.r-project.org/) provides a framework to work with decision rules. Rules can be extracted from supported models, augmented with (custom) metrics using validation data, manipulated using standard dataframe operations, reordered and pruned based on a metric, predict on unseen (test) data. Utilities include; Creating a rulelist manually, Exporting a rulelist as a SQL case statement and so on. The package offers two classes; rulelist and rulelset based on dataframe. + +## Schematic +![](tidyrules_schematic.png) + +```{r, include = FALSE} +library("dplyr") +devtools::load_all() +``` + +## About + +This document provides a working example of a classification problem where the functionality of package is showcased. We use `modeldata::attrition` dataset where `Attrition` column is the binary dependent variable. + + +```{r, eval = FALSE} +library("tidyrules") +``` + +```{r} +att = modeldata::attrition +set.seed(1) +valid_index = sample(c(TRUE, FALSE), nrow(att), replace = TRUE) +att_train = att[!valid_index, ] # nrow: 742 +att_valid = att[valid_index, ] # nrow: 728 +glimpse(att) +``` + +## Tidy + +`tidy` generic creates `rulelist` from a supported model fit. `rulelist` class is fundamental data structure which offers many methods such as `predict`, `augment` and so on. A `rulelist` is a dataframe with some extra attributes. The order of rows of the dataframe defines the order of preference of rules. + +`tidy` supports these model fits: + +- `C5` rule-based model (classification) +- `rpart` tree (classification / regression) +- `party` tree (classification / regression) +- `cubist` tree (regression) + +Lets build a C5 model and then extract a rulelist: + +```{r} +model_c5 = C50::C5.0(Attrition ~., data = att_train, rules = TRUE) +model_c5 +``` + +```{r, collapse = TRUE} +tidy_c5 = tidy(model_c5) +tidy_c5 +``` + +## Rulelist + +A rulelist is expected to have these mandatory columns: + +- `rule_nbr`: Something that identifies a rule uniquely per `keys`. Typically, an integer vector starting from 1. +- `LHS`: A character vector of R-parsable strings +- `RHS`: factor (for classification), numeric (for regression) or character vector of R-parsable strings (to be evaluated) + +`trial_nbr` is a key. `C5` model builds multiple boosting iterations indexed by `trial_nbr` (default is set to 1). `rule_nbr`'s start from 1 for each `trial_nbr`. In general, `keys` columns along with `rule_nbr` column should be unique. + +Attribute `estimation_type` is central to further methods where metrics get computed. At this moment, the package supports these: `classification`, `regression`. + +The rulelist (obtained from `C5` model) ordered by `confidence` column, by default. + +A rulelist can be either created using `tidy` on a supported model or a from a dataframe using `as_rulelist`. + +> `r paste0(rep(cli::symbol$smiley, 3), collapse = "")` rulelist is simply a dataframe with some attributes. Manipulate them with standard dataframe operations (`dplyr`, `data.table` ...). `tibble::as_tibble` or `as.data.frame` will convert to a tibble/dataframe (with attributes). `as_rulelist` can be used to convert to a rulelist. + +## Predict + +The mainstay of package is the `predict` method of the rulelist class. `predict` provides the first rule (in the order as per the rulelist) that is applicable for a observation/row in the test data. If a row is not *covered* by any rule, then `rule_nbr` is missing. + +```{r, eval = FALSE} +predict(tidy_c5, att_valid) +``` + +```{r, echo = FALSE} +tibble::as_tibble(predict(tidy_c5, att_valid)) +``` + +> `r paste0(rep(cli::symbol$smiley, 3), collapse = "")` To know all rules applicable for a row, use argument `multiple = TRUE`. Alternately, `predict` on a ruleset always yields all rules applicable per row. + +```{r, eval = FALSE} +predict(tidy_c5, att_valid, multiple = TRUE) +``` + +```{r, echo = FALSE} +tibble::as_tibble(predict(tidy_c5, att_valid, multiple = TRUE)) +``` + +## Setters + +- `set_validation_data`: Setting (or removing) validation data adds a validation data to a rulelist which gets used for `augment`, `calculate` and other methods. + +- `set_keys`: Sets (or removes) keys. + +```{r} +tidy_c5 = + tidy_c5 %>% + set_validation_data(att_valid, y_name = "Attrition", weight = 1) %>% + set_keys(NULL) + +tidy_c5 +``` + +> `r paste0(rep(cli::symbol$smiley, 3), collapse = "")` Setting weight argument (other than 1 which means equal weigth) leads to calculating weighted metrics. + +## Augment + +`augment` adds metrics related to validation data in a new column 'augmented_stats'. + +```{r} +tidy_c5 %>% + augment() %>% + tibble::as_tibble() %>% + tidytable::unnest(names_sep = "__") %>% + glimpse() +``` + +> `r paste0(rep(cli::symbol$smiley, 3), collapse = "")` If augmented metrics differ from train data metrics, then it could indicate drift in the data! + +> `r paste0(rep(cli::symbol$smiley, 3), collapse = "")` `augment` also supports custom metrics in `dplyr::summarise` syntax! + +## Plot + +Plotting a rulelist as a heatmap helps in understanding these things: + +- Cluster of rows which are *covered* by same set of rules (with hclust) +- Cluster rules based on the common rows they *cover (with hclust) +- Row and column labels with dependent variable help us identify potential outliers and potential under-fitting. + +```{r, out.width = "100%"} +plot(tidy_c5) +``` + +> `r paste0(rep(cli::symbol$smiley, 3), collapse = "")` distance metric for rules is `jaccard` and distance metric for row clusters is `euclidean`. Former can be changed to any distance supported by `proxy` package or a custom distance function for custom insight! + +> `r paste0(rep(cli::symbol$smiley, 3), collapse = "")` When you have a rulelist which is a combination of multiple classifiers, rule clusters quickly reveal 'correlated' rules! The ones which cover almost same rows, but LHS of each reads different! + +## Calculate + +`calculate` computes cumulative metrics (as rules are applied in the row order) depending on attribute `estimation_type`. + +```{r, eval = FALSE} +calculate(tidy_c5) +``` + +```{r, echo = FALSE} +tibble::as_tibble(calculate(tidy_c5)) +``` + +> `r paste0(rep(cli::symbol$smiley, 3), collapse = "")` `calculate` allows a custom metric of your choice! + +## Reorder + +`reorder` intends to reorder the order of rules. At the moment, the greedy implementation adds one rule at a time to a new rulelist (from the input rulelist) such that a metric (see `calculate`) is maximixed/minimized. + +Suppose, you wanted to find a smaller ruleset with least overlap that would still cover 80% of the validation_data. Then, + +```{r, cache=TRUE} +reorder(tidy_c5, + metric = c("cumulative_overlap", + "cumulative_coverage", + "cumulative_accuracy" + ), + minimize = TRUE + ) %>% + mutate(rel_cum_overlap = + cumulative_overlap / max(cumulative_overlap), + rel_cum_coverage = + cumulative_coverage / max(cumulative_coverage) + ) %>% + select(rule_nbr, LHS, RHS, + rel_cum_overlap, rel_cum_coverage, + cumulative_accuracy + ) +``` + +we infer that first 9 rules (~ 20% overlap) in the reordered rulelist would do still ensuring an accuracy of 85% ! + +> `r paste0(rep(cli::symbol$smiley, 3), collapse = "")` In the above code, 2nd metric onwards are used to break ties! (similar to `base::order`) + +> `r paste0(rep(cli::symbol$smiley, 3), collapse = "")` Reordering changes the decision bourdaries of your fit! It is a post-hoc method to overlap the *learnt* rules to optimize for the metric you need! But remember, greedy optimization method does guarantee the global minima (maxima)! + +> `r paste0(rep(cli::symbol$smiley, 3), collapse = "")` `reorder` comes with a `init = k` argument which leaves a predecided top k rules in their order and reorders only bottom ones. This might be required when policy layer needs to be incorporated into the rule engine! + +## Prune + +`prune` suggests k th rule to stop at based on some stopping criteria. + +Suppose, we seek to find a smaller rulelist with maximum possible accuracy with a minimum (relative) coverage of 70% and (relative) overlap not more than half the number of rows. Then, + +```{r, cache = TRUE} +prune_suggestion = + reorder(tidy_c5, "cumulative_accuracy", minimize = FALSE) %>% + prune(stop_expr_string = "relative__cumulative_coverage >= 0.7 & cumulative_overlap <= 728/2") + +prune_suggestion +plot(prune_suggestion) +prune_suggestion$pruned +``` + +> `r paste0(rep(cli::symbol$smiley, 3), collapse = "")` `prune` is powerful when combined with `reorder`! While `reorder` chases a metric, `prune` takes care of constraints! This might lead to small rulelists, very good for explainability! + +## Out in the Wild + +Use `to_sql_case` to get SQL case when code chunk from a rulelist. + +```{r, eval = FALSE} +to_sql_case(head(tidy_c5, 5)) +``` + +``` +CASE +WHEN (Age > 26) + AND (EnvironmentSatisfaction IN ( 'Medium', 'High', 'Very_High' )) + AND (PercentSalaryHike <= 17) + AND (StockOptionLevel > 0) + AND (StockOptionLevel <= 2) + AND (TotalWorkingYears > 2) THEN + 'No' +WHEN (Age > 26) + AND (EnvironmentSatisfaction IN ( 'Medium', 'High', 'Very_High' )) + AND (StockOptionLevel > 0) + AND (YearsAtCompany > 3) THEN + 'No' +WHEN (BusinessTravel = 'Non-Travel') THEN + 'No' +WHEN (Age <= 31) + AND (EducationField = 'Technical_Degree') + AND (StockOptionLevel <= 0) THEN + 'Yes' +WHEN (JobSatisfaction IN ( 'Low', 'Medium', 'High' )) + AND (MonthlyIncome > 3210) + AND (RelationshipSatisfaction IN ( 'Medium', 'High', 'Very_High' )) + AND (TrainingTimesLastYear > 2) THEN + 'No' +ELSE + NULL +END AS output +``` + +## More + +- We will add `tidy` support to more models. Your contributions are welcome! +- Ideas for methods are welcome! + +`r cli::symbol$tick` `For dev and issues, reach us at http://github.com/talegari/tidyrules + +`r cli::symbol$tick` 'master' branch always holds the 'tested' dev code! + +`r cli::symbol$tick` Get the latest stable version from CRAN! + +Yours truly, +Amith (ಅಮಿತ್) and Srikanth (ಶ್ರೀಕಾಂತ) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/vignettes/articles/using_tidyrules_cache/html/__packages b/vignettes/articles/using_tidyrules_cache/html/__packages new file mode 100644 index 0000000..b7567b5 --- /dev/null +++ b/vignettes/articles/using_tidyrules_cache/html/__packages @@ -0,0 +1,3 @@ +dplyr +testthat +tidyrules diff --git a/vignettes/articles/using_tidyrules_cache/html/unnamed-chunk-15_f03a8d63362ca102ad981b956d49d4d1.RData b/vignettes/articles/using_tidyrules_cache/html/unnamed-chunk-15_f03a8d63362ca102ad981b956d49d4d1.RData new file mode 100644 index 0000000..833de92 Binary files /dev/null and b/vignettes/articles/using_tidyrules_cache/html/unnamed-chunk-15_f03a8d63362ca102ad981b956d49d4d1.RData differ diff --git a/vignettes/articles/using_tidyrules_cache/html/unnamed-chunk-15_f03a8d63362ca102ad981b956d49d4d1.rdb b/vignettes/articles/using_tidyrules_cache/html/unnamed-chunk-15_f03a8d63362ca102ad981b956d49d4d1.rdb new file mode 100644 index 0000000..e69de29 diff --git a/vignettes/articles/using_tidyrules_cache/html/unnamed-chunk-15_f03a8d63362ca102ad981b956d49d4d1.rdx b/vignettes/articles/using_tidyrules_cache/html/unnamed-chunk-15_f03a8d63362ca102ad981b956d49d4d1.rdx new file mode 100644 index 0000000..486feb5 Binary files /dev/null and b/vignettes/articles/using_tidyrules_cache/html/unnamed-chunk-15_f03a8d63362ca102ad981b956d49d4d1.rdx differ diff --git a/vignettes/articles/using_tidyrules_cache/html/unnamed-chunk-16_def8c7d6e5eca5b9e427ce9481afc21b.RData b/vignettes/articles/using_tidyrules_cache/html/unnamed-chunk-16_def8c7d6e5eca5b9e427ce9481afc21b.RData new file mode 100644 index 0000000..5fb8eba Binary files /dev/null and b/vignettes/articles/using_tidyrules_cache/html/unnamed-chunk-16_def8c7d6e5eca5b9e427ce9481afc21b.RData differ diff --git a/vignettes/articles/using_tidyrules_cache/html/unnamed-chunk-16_def8c7d6e5eca5b9e427ce9481afc21b.rdb b/vignettes/articles/using_tidyrules_cache/html/unnamed-chunk-16_def8c7d6e5eca5b9e427ce9481afc21b.rdb new file mode 100644 index 0000000..0a5cb33 Binary files /dev/null and b/vignettes/articles/using_tidyrules_cache/html/unnamed-chunk-16_def8c7d6e5eca5b9e427ce9481afc21b.rdb differ diff --git a/vignettes/articles/using_tidyrules_cache/html/unnamed-chunk-16_def8c7d6e5eca5b9e427ce9481afc21b.rdx b/vignettes/articles/using_tidyrules_cache/html/unnamed-chunk-16_def8c7d6e5eca5b9e427ce9481afc21b.rdx new file mode 100644 index 0000000..af2c584 Binary files /dev/null and b/vignettes/articles/using_tidyrules_cache/html/unnamed-chunk-16_def8c7d6e5eca5b9e427ce9481afc21b.rdx differ diff --git a/vignettes/tidyrules_vignette.Rmd b/vignettes/tidyrules_vignette.Rmd deleted file mode 100644 index 34bb45b..0000000 --- a/vignettes/tidyrules_vignette.Rmd +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: "Using tidyrules" -author: "Srikanth KS, Amith Kumar UR" -date: "`r Sys.Date()`" -output: - html_document: - toc: true - toc_float: - collapsed: false - smooth_scroll: false -vignette: > - %\VignetteIndexEntry{Using tidyrules} - %\VignetteEngine{knitr::rmarkdown} - \usepackage[utf8]{inputenc} -abstract: "tidyrules package provides a framework to work with decision rules. Rules can be extracted from supported models, augmented with (custom) metrics using validation data, manipulated using standard dataframe operations, reordered and pruned based on a metric, predict on unseen (test) data. Utilities include; Creating a rulelist manually, Exporting a rulelist as a SQL case statement and so on. The package offers two classes; rulelist and rulelset based on dataframe." ---- \ No newline at end of file