Skip to content

Commit

Permalink
New version v3.0.0 aka PAMES2
Browse files Browse the repository at this point in the history
  • Loading branch information
romagnolid committed Oct 31, 2022
1 parent 1157108 commit 0525ee3
Show file tree
Hide file tree
Showing 57 changed files with 1,436 additions and 2,015 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
^README-.*\.png$
^data-raw/$
^TODO\.md$
^LICENSE\.md$
19 changes: 8 additions & 11 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,27 +1,24 @@
Package: PAMES
Date: 2021-12-01
Type: Package
Title: Purity Assessment from clonal MEthylation Sites
Description: Exploiting data from DNA methylation, this package provides the operations
required to evaluate the purity of tumor samples.
Version: 2.7.2
Description: Exploiting data from DNA methylation, this package provides the
operations required to evaluate the purity of tumor samples.
Version: 3.0.0
Authors@R: c(person("Dario", "Romagnoli", role=c("aut", "cre"),
email="[email protected]"),
person("Matteo", "Benelli", role="aut"),
person("Francesca", "Demichelis", role="aut"))
Depends: R (>= 3.5.0)
Depends: R (>= 4.0.0)
Imports:
parallel,
S4Vectors,
assertthat,
GenomicRanges,
IRanges,
assertthat,
dplyr
parallel,
S4Vectors,
Suggests:
knitr,
rmarkdown,
testthat
License: GPL-3 | file LICENSE
License: GPL (>= 3)
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.0
Expand Down
674 changes: 0 additions & 674 deletions LICENSE

This file was deleted.

595 changes: 595 additions & 0 deletions LICENSE.md

Large diffs are not rendered by default.

15 changes: 8 additions & 7 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# Generated by roxygen2: do not edit by hand

export(compute_AUC)
export(compute_purity)
export(find_informative_regions)
export(find_informative_sites)
export(get_AUC)
export(get_purity)
export(reduce_to_regions)
export(select_informative_regions)
export(select_informative_regions_ext)
export(select_informative_sites)
export(select_informative_sites_ext)
importFrom(dplyr,"%>%")
importFrom(stats,dist)
importFrom(stats,median)
importFrom(stats,quantile)
importFrom(stats,setNames)
importFrom(utils,adist)
importFrom(utils,head)
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# PAMES v3.0.0 aka PAMES2
PAMES has been completely rewritten to work in with our new framework [MiMeSis](https://github.com/cgplab/MIMESIS)
Major changes:
- all functions now return a data.frame instead of a vector: according to our experience, data.frames works integrate better with pipelines;
- `get_AUC` replace `compute_AUC` and takes as input a matrix of beta-values as originally produced by Illumina Beadchips (instead of rounded percentages);
- `find_informative_sites` and `find_informative_regions` replace `select_informative_sites` and `select_informative_regions`, respectively, and integrate the new `control_constraints` parameter;
- `get_purity` replace `compute_purity` and as input a data.frame (generated by `find_informative_sites`/`find_informative_regions`).

# PAMES v2.7.2
- rename `median_of_regions`: `reduce_region`
- add `method` parameter to `reduce_to_regions`: allow choice between "median" (default) or mean
Expand Down
12 changes: 6 additions & 6 deletions R/PAMES.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
#' PAMES: Purity Assessment from clonal MEthylation Sites
#'
#' The PAMES package provides a set of functions to estimate
#' the level of purity of tumor samples.
#' the level of purity or the tumor content of tumor samples.
#'
#' The basic workflow of PAMES requires to \code{\link{compute_AUC}} (to evaluate tumor-control methylation differences),
#' \code{\link{select_informative_sites}} (to retrieve sites of interest),
#' and \code{\link{compute_purity}} of tumor samples.
#' The basic workflow of PAMES requires to \code{\link{get_AUC}} (to evaluate tumor-control methylation differences),
#' \code{\link{find_informative_sites}} (to retrieve sites of interest),
#' and \code{\link{get_purity}} of tumor samples.
#' When working with methylation data obtained with other technologies (such as Bisulphite Sequencing),
#' users should must map their set of CpG sites to differentially methylated regions
#' (such as CpG islands) using data \code{\link{reduce_to_regions}}, then
#' \code{\link{compute_AUC}}, \code{\link{select_informative_regions}} and finally
#' \code{\link{compute_purity}}.
#' \code{\link{get_AUC}}, \code{\link{find_informative_regions}} and finally
#' \code{\link{get_purity}}.
#
#' @docType package
#' @name PAMES
Expand Down
93 changes: 0 additions & 93 deletions R/compute_AUC.R

This file was deleted.

33 changes: 0 additions & 33 deletions R/compute_purity.R

This file was deleted.

8 changes: 4 additions & 4 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@
#' @source \url{https://genome.ucsc.edu/}
"cpg_islands"

#' BS Toy data
#' BS-Seq Toy data
#'
#' Example of tumor and control beta values from Bisulphite Sequencing
"bs_toy_matrix"
"bs_seq_toy_matrix"

#' BS Toy sites
#' BS-Seq Toy sites
#'
#' Example of data.frame with location of CpG sites
"bs_toy_sites"
"bs_seq_toy_sites"
136 changes: 136 additions & 0 deletions R/find_informative_regions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#' Select informative regions (extended)
#'
#' This function generates a list of informative regions to estimate the purity
#' or the tumor content of a set of tumor samples.
#'
#' A new parameter, named \code{control_costraints}, is required force
#' selection of sites where upper/lower quartiles of control scores are below
#' beta-values given by \code{control_costraints}. Regions are divided into
#' \code{hyper} and \code{hypo} depending on their level of methylation with
#' respect to the average beta-score of normal samples.
#'
#' @param tumor_table A matrix of beta-values of tumor samples.
#' @param control_table A matrix of beta-values of control/normal samples.
#' @param auc A data.frame with AUC scores generated by \code{get_AUC}.
#' @param cores Number of parallel processes.
#' @param max_regions Maximum number of regions to retrieve (half hyper-, half
#' hypo-methylated) (default=20).
#' @param percentiles A vector of length 2. Min and max percentiles to
#' select sites with beta values outside hypo- and hyper-ranges (default = c(0,100);
#' i.e. only min and max beta should be outside of ranges).
#' @param hyper_range A vector of length 2 with minimum lower and upper values
#' required to select hyper-methylated informative sites.
#' @param hypo_range A vector of length 2 with minimum lower and upper values
#' required to select hypo-methylated informative sites.
#' @param method How to select sites: "even" (half hyper-, half hypo-methylated sites),
#' "top" (highest AUC irregardless of hyper or hypomethylation), "hyper" (hyper-methylated sites only),
#' "hypo" (hypo-methylated, sites only).
#' @param control_costraints To select a site, "first quartile"/"third quartile" of control data must be above/below these beta-values.
#' @param full_info Return all informative sites (for debugging purposes).
#' @return A data.frame reporing region names (chr_position) and type ("hyper" and "hypo") of informative regions.
#' @importFrom stats quantile
#' @export
#' @examples
#' reduced_data <- reduce_to_regions(bs_seq_toy_matrix, bs_seq_toy_sites, cpg_islands[1:1000,])
#' auc_data <- get_AUC(reduced_data[,1:10], reduced_data[,11:20])
#' info_regions <- find_informative_regions(reduced_data[,1:10], reduced_data[,11:20], auc_data)
find_informative_regions <- function(tumor_table, control_table, auc, cores=1,
max_regions = 20, percentiles = c(0,100),
hyper_range = c(min = .40, max = .90), hypo_range = c(min = .10, max = .60),
control_costraints = c(.30, .70),
method = c("even", "top", "hyper", "hypo"), full_info=FALSE){

message(sprintf("[%s] # Find informative regions #", Sys.time()))

# check parameters
assertthat::assert_that(is.matrix(tumor_table))
assertthat::assert_that(is.matrix(control_table))
assertthat::assert_that(!is.null(rownames(tumor_table)))
assertthat::assert_that(identical(rownames(tumor_table), rownames(control_table)))
assertthat::assert_that(nrow(tumor_table) == nrow(auc))

assertthat::assert_that(is.matrix(tumor_table))
assertthat::assert_that(is.matrix(control_table))
assertthat::assert_that(nrow(tumor_table) == nrow(auc))

assertthat::assert_that(is.numeric(cores))
assertthat::assert_that(is.numeric(max_regions))
assertthat::assert_that(is.numeric(hyper_range))
assertthat::assert_that(is.numeric(hypo_range))
assertthat::assert_that(is.numeric(control_costraints))
assertthat::assert_that(is.numeric(percentiles))

assertthat::assert_that(length(hyper_range) == 2)
assertthat::assert_that(length(hypo_range) == 2)
assertthat::assert_that(length(control_costraints) == 2)
assertthat::assert_that(length(percentiles) == 2)

method <- match.arg(method)
if (method == "even")
assertthat::assert_that(max_regions %% 2 == 0, msg="method is set to 'even' but max_regions is not even")

assertthat::assert_that(is.logical(full_info))

message(sprintf("- Method: %s", method))
message(sprintf("- Number of regions to retrieve: %i", max_regions))
message(sprintf("- Hyper-methylated regions range: %.2f-%.2f", hyper_range[1], hyper_range[2]))
message(sprintf("- Hypo-methylated regions range: %.2f-%.2f", hypo_range[1], hypo_range[2]))
message(sprintf("- Control constraints: %.2f-%.2f", control_costraints[1], control_costraints[2]))
message(sprintf("- Percentiles: %g-%g", percentiles[1], percentiles[2]))

# minimum and maximum beta per region
cl <- parallel::makeCluster(cores)
message(sprintf("[%s] Compute min-/max- beta scores...", Sys.time()))
min_beta <- suppressWarnings(parallel::parApply(cl, tumor_table, 1, quantile, probs = percentiles[1]/100, na.rm = TRUE))
max_beta <- suppressWarnings(parallel::parApply(cl, tumor_table, 1, quantile, probs = percentiles[2]/100, na.rm = TRUE))

message(sprintf("[%s] Compute control interquartiles...", Sys.time()))
lower_quart <- suppressWarnings(parallel::parApply(cl, control_table, 1, quantile, probs = .25, na.rm = TRUE))
upper_quart <- suppressWarnings(parallel::parApply(cl, control_table, 1, quantile, probs = .75, na.rm = TRUE))

message(sprintf("[%s] Select regions...", Sys.time()))
diff_meth_regions_df <- cbind(auc,
data.frame(Max_beta = max_beta,
Min_beta = min_beta,
Lower_Quart = lower_quart,
Upper_Quart = upper_quart))
hyper_regions_idx <- with(diff_meth_regions_df, which(AUC > .80 & Min_beta < hyper_range[1] & Max_beta > hyper_range[2] & Upper_Quart < control_costraints[1]))
hypo_regions_idx <- with(diff_meth_regions_df, which(AUC < .20 & Min_beta < hypo_range[1] & Max_beta > hypo_range[2] & Lower_Quart > control_costraints[2]))
regions_type <- rep("no_differences", nrow(auc))
regions_type[hyper_regions_idx] <- "hyper"
regions_type[hypo_regions_idx] <- "hypo"
diff_meth_regions_df$Region_type <- regions_type
diff_meth_regions_df$AUC[diff_meth_regions_df$eegions_type == "Hypo"] <- 1-diff_meth_regions_df$AUC
diff_meth_regions_df <- diff_meth_regions_df[order(diff_meth_regions_df$AUC, decreasing = TRUE),]

hyper_regions_df <- diff_meth_regions_df[diff_meth_regions_df$Region_type == "hyper",]
hypo_regions_df <- diff_meth_regions_df[diff_meth_regions_df$Region_type == "hypo",]
message(sprintf("* Total hyper-methylated regions = %i", nrow(hyper_regions_df)))
message(sprintf("* Total hypo-methylated regions = %i", nrow(hypo_regions_df)))

if (full_info) {
regions <- rbind(hyper_regions_df, hypo_regions_df)
columns <- c("Probe", "Region_type",
"AUC", "Max_beta", "Min_beta", "Upper_Quart", "Lower_Quart")
regions <- regions[,columns]
} else {
if (method == "even") {
regions <- rbind(head(hyper_regions_df[hyper_regions_df$Keep_site,],max_regions/2),
head(hypo_regions_df[hypo_regions_df$Keep_site,],max_regions/2))
} else if (method == "top") {
regions <- rbind(hyper_regions_df, hypo_regions_df)
regions <- regions[order(regions$AUC, decreasing = TRUE),]
regions <- head(regions,max_regions)
} else if (method == "hyper") {
regions <- head(hyper_regions_df,max_regions)
} else if (method == "hypo") {
regions <- head(hypo_regions_df,max_regions)
}
regions <- regions[c("Probe", "Region_type")]
message(sprintf("* Retrieved hyper-methylated regions = %i", sum(regions$Region_type=="hyper")))
message(sprintf("* Retrieved hypo-methylated regions = %i", sum(regions$Region_type=="hypo")))
}
rownames(regions) <- NULL
message(sprintf("[%s] Done", Sys.time()))
return(regions)
}
Loading

0 comments on commit 0525ee3

Please sign in to comment.