-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1157108
commit 0525ee3
Showing
57 changed files
with
1,436 additions
and
2,015 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,3 +4,4 @@ | |
^README-.*\.png$ | ||
^data-raw/$ | ||
^TODO\.md$ | ||
^LICENSE\.md$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,24 @@ | ||
Package: PAMES | ||
Date: 2021-12-01 | ||
Type: Package | ||
Title: Purity Assessment from clonal MEthylation Sites | ||
Description: Exploiting data from DNA methylation, this package provides the operations | ||
required to evaluate the purity of tumor samples. | ||
Version: 2.7.2 | ||
Description: Exploiting data from DNA methylation, this package provides the | ||
operations required to evaluate the purity of tumor samples. | ||
Version: 3.0.0 | ||
Authors@R: c(person("Dario", "Romagnoli", role=c("aut", "cre"), | ||
email="[email protected]"), | ||
person("Matteo", "Benelli", role="aut"), | ||
person("Francesca", "Demichelis", role="aut")) | ||
Depends: R (>= 3.5.0) | ||
Depends: R (>= 4.0.0) | ||
Imports: | ||
parallel, | ||
S4Vectors, | ||
assertthat, | ||
GenomicRanges, | ||
IRanges, | ||
assertthat, | ||
dplyr | ||
parallel, | ||
S4Vectors, | ||
Suggests: | ||
knitr, | ||
rmarkdown, | ||
testthat | ||
License: GPL-3 | file LICENSE | ||
License: GPL (>= 3) | ||
Encoding: UTF-8 | ||
LazyData: true | ||
RoxygenNote: 7.2.0 | ||
|
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,13 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(compute_AUC) | ||
export(compute_purity) | ||
export(find_informative_regions) | ||
export(find_informative_sites) | ||
export(get_AUC) | ||
export(get_purity) | ||
export(reduce_to_regions) | ||
export(select_informative_regions) | ||
export(select_informative_regions_ext) | ||
export(select_informative_sites) | ||
export(select_informative_sites_ext) | ||
importFrom(dplyr,"%>%") | ||
importFrom(stats,dist) | ||
importFrom(stats,median) | ||
importFrom(stats,quantile) | ||
importFrom(stats,setNames) | ||
importFrom(utils,adist) | ||
importFrom(utils,head) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
#' Select informative regions (extended) | ||
#' | ||
#' This function generates a list of informative regions to estimate the purity | ||
#' or the tumor content of a set of tumor samples. | ||
#' | ||
#' A new parameter, named \code{control_costraints}, is required force | ||
#' selection of sites where upper/lower quartiles of control scores are below | ||
#' beta-values given by \code{control_costraints}. Regions are divided into | ||
#' \code{hyper} and \code{hypo} depending on their level of methylation with | ||
#' respect to the average beta-score of normal samples. | ||
#' | ||
#' @param tumor_table A matrix of beta-values of tumor samples. | ||
#' @param control_table A matrix of beta-values of control/normal samples. | ||
#' @param auc A data.frame with AUC scores generated by \code{get_AUC}. | ||
#' @param cores Number of parallel processes. | ||
#' @param max_regions Maximum number of regions to retrieve (half hyper-, half | ||
#' hypo-methylated) (default=20). | ||
#' @param percentiles A vector of length 2. Min and max percentiles to | ||
#' select sites with beta values outside hypo- and hyper-ranges (default = c(0,100); | ||
#' i.e. only min and max beta should be outside of ranges). | ||
#' @param hyper_range A vector of length 2 with minimum lower and upper values | ||
#' required to select hyper-methylated informative sites. | ||
#' @param hypo_range A vector of length 2 with minimum lower and upper values | ||
#' required to select hypo-methylated informative sites. | ||
#' @param method How to select sites: "even" (half hyper-, half hypo-methylated sites), | ||
#' "top" (highest AUC irregardless of hyper or hypomethylation), "hyper" (hyper-methylated sites only), | ||
#' "hypo" (hypo-methylated, sites only). | ||
#' @param control_costraints To select a site, "first quartile"/"third quartile" of control data must be above/below these beta-values. | ||
#' @param full_info Return all informative sites (for debugging purposes). | ||
#' @return A data.frame reporing region names (chr_position) and type ("hyper" and "hypo") of informative regions. | ||
#' @importFrom stats quantile | ||
#' @export | ||
#' @examples | ||
#' reduced_data <- reduce_to_regions(bs_seq_toy_matrix, bs_seq_toy_sites, cpg_islands[1:1000,]) | ||
#' auc_data <- get_AUC(reduced_data[,1:10], reduced_data[,11:20]) | ||
#' info_regions <- find_informative_regions(reduced_data[,1:10], reduced_data[,11:20], auc_data) | ||
find_informative_regions <- function(tumor_table, control_table, auc, cores=1, | ||
max_regions = 20, percentiles = c(0,100), | ||
hyper_range = c(min = .40, max = .90), hypo_range = c(min = .10, max = .60), | ||
control_costraints = c(.30, .70), | ||
method = c("even", "top", "hyper", "hypo"), full_info=FALSE){ | ||
|
||
message(sprintf("[%s] # Find informative regions #", Sys.time())) | ||
|
||
# check parameters | ||
assertthat::assert_that(is.matrix(tumor_table)) | ||
assertthat::assert_that(is.matrix(control_table)) | ||
assertthat::assert_that(!is.null(rownames(tumor_table))) | ||
assertthat::assert_that(identical(rownames(tumor_table), rownames(control_table))) | ||
assertthat::assert_that(nrow(tumor_table) == nrow(auc)) | ||
|
||
assertthat::assert_that(is.matrix(tumor_table)) | ||
assertthat::assert_that(is.matrix(control_table)) | ||
assertthat::assert_that(nrow(tumor_table) == nrow(auc)) | ||
|
||
assertthat::assert_that(is.numeric(cores)) | ||
assertthat::assert_that(is.numeric(max_regions)) | ||
assertthat::assert_that(is.numeric(hyper_range)) | ||
assertthat::assert_that(is.numeric(hypo_range)) | ||
assertthat::assert_that(is.numeric(control_costraints)) | ||
assertthat::assert_that(is.numeric(percentiles)) | ||
|
||
assertthat::assert_that(length(hyper_range) == 2) | ||
assertthat::assert_that(length(hypo_range) == 2) | ||
assertthat::assert_that(length(control_costraints) == 2) | ||
assertthat::assert_that(length(percentiles) == 2) | ||
|
||
method <- match.arg(method) | ||
if (method == "even") | ||
assertthat::assert_that(max_regions %% 2 == 0, msg="method is set to 'even' but max_regions is not even") | ||
|
||
assertthat::assert_that(is.logical(full_info)) | ||
|
||
message(sprintf("- Method: %s", method)) | ||
message(sprintf("- Number of regions to retrieve: %i", max_regions)) | ||
message(sprintf("- Hyper-methylated regions range: %.2f-%.2f", hyper_range[1], hyper_range[2])) | ||
message(sprintf("- Hypo-methylated regions range: %.2f-%.2f", hypo_range[1], hypo_range[2])) | ||
message(sprintf("- Control constraints: %.2f-%.2f", control_costraints[1], control_costraints[2])) | ||
message(sprintf("- Percentiles: %g-%g", percentiles[1], percentiles[2])) | ||
|
||
# minimum and maximum beta per region | ||
cl <- parallel::makeCluster(cores) | ||
message(sprintf("[%s] Compute min-/max- beta scores...", Sys.time())) | ||
min_beta <- suppressWarnings(parallel::parApply(cl, tumor_table, 1, quantile, probs = percentiles[1]/100, na.rm = TRUE)) | ||
max_beta <- suppressWarnings(parallel::parApply(cl, tumor_table, 1, quantile, probs = percentiles[2]/100, na.rm = TRUE)) | ||
|
||
message(sprintf("[%s] Compute control interquartiles...", Sys.time())) | ||
lower_quart <- suppressWarnings(parallel::parApply(cl, control_table, 1, quantile, probs = .25, na.rm = TRUE)) | ||
upper_quart <- suppressWarnings(parallel::parApply(cl, control_table, 1, quantile, probs = .75, na.rm = TRUE)) | ||
|
||
message(sprintf("[%s] Select regions...", Sys.time())) | ||
diff_meth_regions_df <- cbind(auc, | ||
data.frame(Max_beta = max_beta, | ||
Min_beta = min_beta, | ||
Lower_Quart = lower_quart, | ||
Upper_Quart = upper_quart)) | ||
hyper_regions_idx <- with(diff_meth_regions_df, which(AUC > .80 & Min_beta < hyper_range[1] & Max_beta > hyper_range[2] & Upper_Quart < control_costraints[1])) | ||
hypo_regions_idx <- with(diff_meth_regions_df, which(AUC < .20 & Min_beta < hypo_range[1] & Max_beta > hypo_range[2] & Lower_Quart > control_costraints[2])) | ||
regions_type <- rep("no_differences", nrow(auc)) | ||
regions_type[hyper_regions_idx] <- "hyper" | ||
regions_type[hypo_regions_idx] <- "hypo" | ||
diff_meth_regions_df$Region_type <- regions_type | ||
diff_meth_regions_df$AUC[diff_meth_regions_df$eegions_type == "Hypo"] <- 1-diff_meth_regions_df$AUC | ||
diff_meth_regions_df <- diff_meth_regions_df[order(diff_meth_regions_df$AUC, decreasing = TRUE),] | ||
|
||
hyper_regions_df <- diff_meth_regions_df[diff_meth_regions_df$Region_type == "hyper",] | ||
hypo_regions_df <- diff_meth_regions_df[diff_meth_regions_df$Region_type == "hypo",] | ||
message(sprintf("* Total hyper-methylated regions = %i", nrow(hyper_regions_df))) | ||
message(sprintf("* Total hypo-methylated regions = %i", nrow(hypo_regions_df))) | ||
|
||
if (full_info) { | ||
regions <- rbind(hyper_regions_df, hypo_regions_df) | ||
columns <- c("Probe", "Region_type", | ||
"AUC", "Max_beta", "Min_beta", "Upper_Quart", "Lower_Quart") | ||
regions <- regions[,columns] | ||
} else { | ||
if (method == "even") { | ||
regions <- rbind(head(hyper_regions_df[hyper_regions_df$Keep_site,],max_regions/2), | ||
head(hypo_regions_df[hypo_regions_df$Keep_site,],max_regions/2)) | ||
} else if (method == "top") { | ||
regions <- rbind(hyper_regions_df, hypo_regions_df) | ||
regions <- regions[order(regions$AUC, decreasing = TRUE),] | ||
regions <- head(regions,max_regions) | ||
} else if (method == "hyper") { | ||
regions <- head(hyper_regions_df,max_regions) | ||
} else if (method == "hypo") { | ||
regions <- head(hypo_regions_df,max_regions) | ||
} | ||
regions <- regions[c("Probe", "Region_type")] | ||
message(sprintf("* Retrieved hyper-methylated regions = %i", sum(regions$Region_type=="hyper"))) | ||
message(sprintf("* Retrieved hypo-methylated regions = %i", sum(regions$Region_type=="hypo"))) | ||
} | ||
rownames(regions) <- NULL | ||
message(sprintf("[%s] Done", Sys.time())) | ||
return(regions) | ||
} |
Oops, something went wrong.