-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #133 from smuellerd/master
added z-score method
- Loading branch information
Showing
2 changed files
with
183 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
#' z-score | ||
#' | ||
#' @description | ||
#' Calculates regulatory activities using a z-score as descibed in KSEA or RoKAI. | ||
#' | ||
#' @details | ||
#' The z-score calculates the mean of the molecular features of the known targets | ||
#' for each regulator and adjusts it for the number of identified targets for the | ||
#' regulator, the standard deviation of all molecular features (RoKAI), as well as | ||
#' the mean of all moleculare features (KSEA). | ||
#' | ||
#' @inheritParams .decoupler_mat_format | ||
#' @inheritParams .decoupler_network_format | ||
#' @param sparse Deprecated parameter. | ||
#' @param center Logical value indicating if `mat` must be centered by | ||
#' [base::rowMeans()]. | ||
#' @param na.rm Should missing values (including NaN) be omitted from the | ||
#' calculations of [base::rowMeans()]? | ||
#' @param minsize Integer indicating the minimum number of targets per source. | ||
#' @param flavor Whether the calculation should be based on RoKAI (default) or | ||
#' KSEA. | ||
#' | ||
#' @return A long format tibble of the enrichment scores for each source | ||
#' across the samples. Resulting tibble contains the following columns: | ||
#' 1. `statistic`: Indicates which method is associated with which score. | ||
#' 2. `source`: Source nodes of `network`. | ||
#' 3. `condition`: Condition representing each column of `mat`. | ||
#' 4. `score`: Regulatory activity (enrichment score). | ||
#' @family decoupleR statistics | ||
#' @export | ||
#' | ||
#' @importFrom stats coef lm summary.lm | ||
#' @importFrom magrittr %<>% %>% | ||
#' @importFrom dplyr ungroup | ||
#' @examples | ||
#' inputs_dir <- system.file("testdata", "inputs", package = "decoupleR") | ||
#' | ||
#' mat <- readRDS(file.path(inputs_dir, "mat.rds")) | ||
#' net <- readRDS(file.path(inputs_dir, "net.rds")) | ||
#' | ||
#' run_zscore(mat, net, minsize=0) | ||
run_zscore <- function(mat, | ||
network, | ||
.source = source, | ||
.target = target, | ||
.mor = mor, | ||
.likelihood = likelihood, | ||
sparse = FALSE, | ||
center = FALSE, | ||
na.rm = FALSE, | ||
minsize = 5L, | ||
flavor = "RoKAI" | ||
) { | ||
|
||
# NSE vs. R CMD check workaround | ||
condition <- likelihood <- mor <- p_value <- score <- | ||
source <- statistic <- target <- NULL | ||
|
||
# Check for NAs/Infs in mat | ||
mat %<>% check_nas_infs | ||
|
||
network %>% | ||
# Convert to standard tibble: source-target-mor. | ||
rename_net( | ||
{{ .source }}, | ||
{{ .target }}, | ||
{{ .mor }}, | ||
{{ .likelihood }} | ||
) %>% | ||
filt_minsize(rownames(mat), ., minsize) %>% | ||
# Preprocessing ------------------------------------------------------- | ||
.fit_preprocessing(mat, center, na.rm, sparse) %>% | ||
# Model evaluation ---------------------------------------------------- | ||
{.zscore_analysis(.$mat, .$mor_mat, flavor)} %>% | ||
ungroup() | ||
|
||
} | ||
|
||
#' Wrapper to execute run_zscore() logic on preprocessed data | ||
#' | ||
#' Calculate a z-score from the molecular features of its targets and | ||
#' normalise it by the background molecular features. | ||
#' | ||
#' @inheritParams run_zscore | ||
#' @param mor_mat | ||
#' | ||
#' @inherit run_zscore return | ||
#' @keywords intern | ||
#' @importFrom stats pnorm | ||
#' @importFrom dplyr filter rename mutate group_by summarise arrange | ||
#' @importFrom tibble rownames_to_column tibble | ||
#' @importFrom tidyr pivot_longer drop_na | ||
#' @importFrom purrr map_dfr | ||
#' @importFrom magrittr %<>% %>% | ||
#' @noRd | ||
.zscore_analysis <- function(mat, mor_mat, flavor) { | ||
net <- mor_mat %>% | ||
as.data.frame() %>% | ||
rownames_to_column("target") %>% | ||
pivot_longer(cols = -target, values_to = "mor", names_to = "source") %>% | ||
filter(mor != 0) | ||
|
||
scores <- purrr::map_dfr(seq_len(ncol(mat)), function(exp){ | ||
# Convert column of mat to data frame and drop NA rows | ||
mat_c <- mat[, exp, drop = FALSE] %>% | ||
data.frame() %>% | ||
drop_na() %>% | ||
rownames_to_column("target") | ||
|
||
# Perform inner join and filter NA rows | ||
KSdata <- full_join(net, mat_c, by = "target") %>% | ||
drop_na() %>% | ||
rename("stat" = colnames(.)[4]) | ||
|
||
# Calculate value based on mor and stat | ||
KSdata <- KSdata %>% | ||
mutate(value = mor * stat) | ||
|
||
# Aggregate mean values for each source | ||
Mean.FC <- KSdata %>% | ||
group_by(source) %>% | ||
summarise(mS = mean(value), m = n()) %>% | ||
arrange(source) | ||
|
||
# Calculate Enrichment and z-score | ||
mean_value <- mean(mat_c[, 2], na.rm = TRUE) | ||
if (flavor == "RoKAI") { | ||
mean_value <- 0 | ||
} | ||
Mean.FC <- Mean.FC %>% | ||
mutate( | ||
Enrichment = mS / abs(mean(mat_c[,2], na.rm = TRUE)), | ||
z.score = ((mS - mean_value) * sqrt(m)) / sd(mat_c[, 2], na.rm = TRUE), | ||
p.value = pnorm(-abs(z.score)) | ||
) | ||
|
||
# Prepare and return result as tibble | ||
tibble( | ||
statistic = "z_score", | ||
source = Mean.FC$source, | ||
condition = colnames(mat_c)[2], | ||
score = Mean.FC$z.score, | ||
p_value = Mean.FC$p.value | ||
) | ||
}) | ||
# Arrange scores according to the order of sources in mor_mat | ||
scores <- scores %>% | ||
arrange(match(source, colnames(mor_mat))) | ||
|
||
return(scores) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
library(decoupleR) | ||
|
||
# Directories ------------------------------------------------------------- | ||
|
||
# Inputs | ||
input_dir <- system.file("testdata", "inputs", package = "decoupleR") | ||
|
||
|
||
# Data to run ------------------------------------------------------------- | ||
|
||
mat <- file.path(input_dir, "mat.rds") %>% | ||
readRDS() | ||
|
||
net <- file.path(input_dir, "net.rds") %>% | ||
readRDS() | ||
|
||
# Test for run_wmean function --------------------------------------------- | ||
|
||
test_that("test run_zscore", { | ||
res_1 <- run_zscore(mat, net, minsize=0) | ||
|
||
expect_equal(res_1$score[1], 3.52, tolerance=0.01) | ||
expect_equal(sign(range(res_1$score)), c(-1, 1)) | ||
|
||
net_2 <- net | ||
net_2$mor <- 1 | ||
res_2 <- run_zscore(mat, net_2, minsize=0) | ||
|
||
expect_equal(res_2$score[1], 3.89, tolerance=0.01) | ||
expect_equal(sign(range(res_2$score)), c(1, 1)) | ||
}) |