From 115710804b441255ebb046bb0e3b81d4c26bc0da Mon Sep 17 00:00:00 2001 From: Dario Romagnoli Date: Tue, 11 Oct 2022 17:03:07 +0200 Subject: [PATCH] Minor changes --- DESCRIPTION | 6 +++--- NEWS.md | 3 +++ R/reduce_to_regions.R | 20 +++++++++++++------- man/median_of_region.Rd | 20 -------------------- man/reduce_to_regions.Rd | 10 +++++++++- tests/testthat/test-PAMES.R | 4 ++-- 6 files changed, 30 insertions(+), 33 deletions(-) delete mode 100644 man/median_of_region.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 5e51ee9..0fd9fc0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,10 @@ Package: PAMES -Date: 2021-03-12 +Date: 2021-12-01 Type: Package Title: Purity Assessment from clonal MEthylation Sites Description: Exploiting data from DNA methylation, this package provides the operations required to evaluate the purity of tumor samples. -Version: 2.7.1 +Version: 2.7.2 Authors@R: c(person("Dario", "Romagnoli", role=c("aut", "cre"), email="dario.romagnoli87@gmail.com"), person("Matteo", "Benelli", role="aut"), @@ -24,5 +24,5 @@ Suggests: License: GPL-3 | file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.1 +RoxygenNote: 7.2.0 VignetteBuilder: knitr diff --git a/NEWS.md b/NEWS.md index fb3b287..aaabb1a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,6 @@ +# PAMES v2.7.2 +- rename `median_of_regions`: `reduce_region` +- add `method` parameter to `reduce_to_regions`: allow choice between "median" (default) or mean # PAMES v2.7.1 - fix `select_informative_regions_ext` with flag `return_info=TRUE` # PAMES v2.7.0 diff --git a/R/reduce_to_regions.R b/R/reduce_to_regions.R index c0d3fae..d84347b 100644 --- a/R/reduce_to_regions.R +++ b/R/reduce_to_regions.R @@ -11,14 +11,17 @@ #' @param min_CpGs An integer (default to 3). Minimum number of CpG sites #' within a single genomic region required to compute the reduced beta value #' (return NA otherwise). +#' @param method Take `median` or `mean`of CpG sites. #' @return A matrix of beta values (nrow == length(cpg_indexes)). #' @importFrom stats median #' @export #' @examples #' reduced_data <- reduce_to_regions(bs_toy_matrix, bs_toy_sites, cpg_islands[1:10,]) -reduce_to_regions <- function(beta_table, cpg_sites, cpg_regions, min_CpGs = 3){ +reduce_to_regions <- function(beta_table, cpg_sites, cpg_regions, min_CpGs = 3, method=c("median", "mean")){ + message(sprintf("[%s] # Reduce to regions #", Sys.time())) # check parameters + method <- match.arg(method) min_CpGs <- as.integer(min_CpGs) assertthat::assert_that(min_CpGs > 0) assertthat::assert_that(ncol(cpg_sites) >= 2) @@ -63,7 +66,7 @@ reduce_to_regions <- function(beta_table, cpg_sites, cpg_regions, min_CpGs = 3){ reduced_data <- lapply(seq_along(idx_list[above_thr_regions]), function(i) { utils::setTxtProgressBar(pb, i) idx <- idx_list[[i]] - median_of_region(beta_table[idx,,drop = FALSE], min_CpGs) + summarise_region(beta_table[idx,,drop = FALSE], min_CpGs, method) }) close(pb) @@ -73,20 +76,23 @@ reduce_to_regions <- function(beta_table, cpg_sites, cpg_regions, min_CpGs = 3){ return(reduced_table) } -#' Transform CpG sites to one CpG region +#' Reduce many CpG sites to one CpG region #' -#' If the number of sites is sufficient take the median value else return NA. +#' If the number of sites is sufficient, take the median/mean value else return NA. #' @param x A subset matrix. #' @param n Minimum required number of sites per region (return NA otherwise). +#' @param method Either `median` or `mean`. #' @return A vector #' @keywords internal -median_of_region <- function(x, n) { - # remove sites non reported for all samples +summarise_region <- function(x, n, method) { + # remove fully NA sites valid_sites <- which(rowSums(is.na(x)) != ncol(x)) x <- x[valid_sites,,drop=FALSE] if (nrow(x) < n) { return(rep(NA, ncol(x))) - } else { + } else if (method=="median"){ return(apply(x, 2, median, na.rm = TRUE)) + } else if (method=="mean"){ + return(apply(x, 2, mean, na.rm = TRUE)) } } diff --git a/man/median_of_region.Rd b/man/median_of_region.Rd deleted file mode 100644 index 2825c81..0000000 --- a/man/median_of_region.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/reduce_to_regions.R -\name{median_of_region} -\alias{median_of_region} -\title{Transform CpG sites to one CpG region} -\usage{ -median_of_region(x, n) -} -\arguments{ -\item{x}{A subset matrix.} - -\item{n}{Minimum required number of sites per region (return NA otherwise).} -} -\value{ -A vector -} -\description{ -If the number of sites is sufficient take the median value else return NA. -} -\keyword{internal} diff --git a/man/reduce_to_regions.Rd b/man/reduce_to_regions.Rd index 54f2d0d..cdc3fd2 100644 --- a/man/reduce_to_regions.Rd +++ b/man/reduce_to_regions.Rd @@ -4,7 +4,13 @@ \alias{reduce_to_regions} \title{Reduce beta values from CpG sites to genomic regions} \usage{ -reduce_to_regions(beta_table, cpg_sites, cpg_regions, min_CpGs = 3) +reduce_to_regions( + beta_table, + cpg_sites, + cpg_regions, + min_CpGs = 3, + method = c("median", "mean") +) } \arguments{ \item{beta_table}{A matrix of beta-values (percentage).} @@ -16,6 +22,8 @@ reduce_to_regions(beta_table, cpg_sites, cpg_regions, min_CpGs = 3) \item{min_CpGs}{An integer (default to 3). Minimum number of CpG sites within a single genomic region required to compute the reduced beta value (return NA otherwise).} + +\item{method}{Take `median` or `mean`of CpG sites.} } \value{ A matrix of beta values (nrow == length(cpg_indexes)). diff --git a/tests/testthat/test-PAMES.R b/tests/testthat/test-PAMES.R index fcd5112..b8af340 100644 --- a/tests/testthat/test-PAMES.R +++ b/tests/testthat/test-PAMES.R @@ -33,8 +33,8 @@ context("CpG regions") ################################################## test_that("median of regions", { x <- rbind(rep(NA, 10), sample(100, 10), sample(100, 10)) x[,3] <- NA - expect_type(median_of_region(x, 2), "double") - expect_true(all(is.na(median_of_region(x, 3)))) + expect_type(summarise_region(x, 2, "median"), "double") + expect_true(all(is.na(summarise_region(x, 3, "mean")))) }) test_that("reduce_to_regions works", { expect_error(reduce_to_regions(tumor_toy_data, illumina27k_hg38[3:4], cpg_islands), "No shared chromosomes")