Skip to content

Commit

Permalink
Merge pull request #133 from smuellerd/master
Browse files Browse the repository at this point in the history
added z-score method
  • Loading branch information
PauBadiaM authored Oct 19, 2024
2 parents f916a46 + 6e9553a commit 3a41229
Show file tree
Hide file tree
Showing 2 changed files with 183 additions and 0 deletions.
152 changes: 152 additions & 0 deletions R/statistic-zscore.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#' z-score
#'
#' @description
#' Calculates regulatory activities using a z-score as descibed in KSEA or RoKAI.
#'
#' @details
#' The z-score calculates the mean of the molecular features of the known targets
#' for each regulator and adjusts it for the number of identified targets for the
#' regulator, the standard deviation of all molecular features (RoKAI), as well as
#' the mean of all moleculare features (KSEA).
#'
#' @inheritParams .decoupler_mat_format
#' @inheritParams .decoupler_network_format
#' @param sparse Deprecated parameter.
#' @param center Logical value indicating if `mat` must be centered by
#' [base::rowMeans()].
#' @param na.rm Should missing values (including NaN) be omitted from the
#' calculations of [base::rowMeans()]?
#' @param minsize Integer indicating the minimum number of targets per source.
#' @param flavor Whether the calculation should be based on RoKAI (default) or
#' KSEA.
#'
#' @return A long format tibble of the enrichment scores for each source
#' across the samples. Resulting tibble contains the following columns:
#' 1. `statistic`: Indicates which method is associated with which score.
#' 2. `source`: Source nodes of `network`.
#' 3. `condition`: Condition representing each column of `mat`.
#' 4. `score`: Regulatory activity (enrichment score).
#' @family decoupleR statistics
#' @export
#'
#' @importFrom stats coef lm summary.lm
#' @importFrom magrittr %<>% %>%
#' @importFrom dplyr ungroup
#' @examples
#' inputs_dir <- system.file("testdata", "inputs", package = "decoupleR")
#'
#' mat <- readRDS(file.path(inputs_dir, "mat.rds"))
#' net <- readRDS(file.path(inputs_dir, "net.rds"))
#'
#' run_zscore(mat, net, minsize=0)
run_zscore <- function(mat,
network,
.source = source,
.target = target,
.mor = mor,
.likelihood = likelihood,
sparse = FALSE,
center = FALSE,
na.rm = FALSE,
minsize = 5L,
flavor = "RoKAI"
) {

# NSE vs. R CMD check workaround
condition <- likelihood <- mor <- p_value <- score <-
source <- statistic <- target <- NULL

# Check for NAs/Infs in mat
mat %<>% check_nas_infs

network %>%
# Convert to standard tibble: source-target-mor.
rename_net(
{{ .source }},
{{ .target }},
{{ .mor }},
{{ .likelihood }}
) %>%
filt_minsize(rownames(mat), ., minsize) %>%
# Preprocessing -------------------------------------------------------
.fit_preprocessing(mat, center, na.rm, sparse) %>%
# Model evaluation ----------------------------------------------------
{.zscore_analysis(.$mat, .$mor_mat, flavor)} %>%
ungroup()

}

#' Wrapper to execute run_zscore() logic on preprocessed data
#'
#' Calculate a z-score from the molecular features of its targets and
#' normalise it by the background molecular features.
#'
#' @inheritParams run_zscore
#' @param mor_mat
#'
#' @inherit run_zscore return
#' @keywords intern
#' @importFrom stats pnorm
#' @importFrom dplyr filter rename mutate group_by summarise arrange
#' @importFrom tibble rownames_to_column tibble
#' @importFrom tidyr pivot_longer drop_na
#' @importFrom purrr map_dfr
#' @importFrom magrittr %<>% %>%
#' @noRd
.zscore_analysis <- function(mat, mor_mat, flavor) {
net <- mor_mat %>%
as.data.frame() %>%
rownames_to_column("target") %>%
pivot_longer(cols = -target, values_to = "mor", names_to = "source") %>%
filter(mor != 0)

scores <- purrr::map_dfr(seq_len(ncol(mat)), function(exp){
# Convert column of mat to data frame and drop NA rows
mat_c <- mat[, exp, drop = FALSE] %>%
data.frame() %>%
drop_na() %>%
rownames_to_column("target")

# Perform inner join and filter NA rows
KSdata <- full_join(net, mat_c, by = "target") %>%
drop_na() %>%
rename("stat" = colnames(.)[4])

# Calculate value based on mor and stat
KSdata <- KSdata %>%
mutate(value = mor * stat)

# Aggregate mean values for each source
Mean.FC <- KSdata %>%
group_by(source) %>%
summarise(mS = mean(value), m = n()) %>%
arrange(source)

# Calculate Enrichment and z-score
mean_value <- mean(mat_c[, 2], na.rm = TRUE)
if (flavor == "RoKAI") {
mean_value <- 0
}
Mean.FC <- Mean.FC %>%
mutate(
Enrichment = mS / abs(mean(mat_c[,2], na.rm = TRUE)),
z.score = ((mS - mean_value) * sqrt(m)) / sd(mat_c[, 2], na.rm = TRUE),
p.value = pnorm(-abs(z.score))
)

# Prepare and return result as tibble
tibble(
statistic = "z_score",
source = Mean.FC$source,
condition = colnames(mat_c)[2],
score = Mean.FC$z.score,
p_value = Mean.FC$p.value
)
})
# Arrange scores according to the order of sources in mor_mat
scores <- scores %>%
arrange(match(source, colnames(mor_mat)))

return(scores)
}

31 changes: 31 additions & 0 deletions tests/testthat/test-statistic-zscore.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
library(decoupleR)

# Directories -------------------------------------------------------------

# Inputs
input_dir <- system.file("testdata", "inputs", package = "decoupleR")


# Data to run -------------------------------------------------------------

mat <- file.path(input_dir, "mat.rds") %>%
readRDS()

net <- file.path(input_dir, "net.rds") %>%
readRDS()

# Test for run_wmean function ---------------------------------------------

test_that("test run_zscore", {
res_1 <- run_zscore(mat, net, minsize=0)

expect_equal(res_1$score[1], 3.52, tolerance=0.01)
expect_equal(sign(range(res_1$score)), c(-1, 1))

net_2 <- net
net_2$mor <- 1
res_2 <- run_zscore(mat, net_2, minsize=0)

expect_equal(res_2$score[1], 3.89, tolerance=0.01)
expect_equal(sign(range(res_2$score)), c(1, 1))
})

0 comments on commit 3a41229

Please sign in to comment.