From fc62e5572f070344ff52ab2b4ef30fd328635dd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=BCller-Dott?= Date: Thu, 11 Apr 2024 13:25:01 +0200 Subject: [PATCH 1/2] added option to load meta data --- R/utils-omnipath.R | 65 ++++++++++++++++++++++++---------- tests/testthat/test-omnipath.R | 2 ++ 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/R/utils-omnipath.R b/R/utils-omnipath.R index e76ab2d..0ec411a 100644 --- a/R/utils-omnipath.R +++ b/R/utils-omnipath.R @@ -85,12 +85,14 @@ get_dorothea <- function(organism='human', levels=c('A', 'B', 'C'), #' @param organism Which organism to use. Only human, mouse and rat are available. #' @param split_complexes Whether to split complexes into subunits. By default #' complexes are kept as they are. -#' @param ... Ignored. +#' @param load_meta Whether to load meta data for the TF-gene interactions. This is set +#' to false by default. +#' @param ... Optional additional arguments, passed to OmniPath import_transcriptional_interactions. #' #' @export #' @examples #' collectri <- get_collectri(organism='human', split_complexes=FALSE) -get_collectri <- function(organism='human', split_complexes=FALSE, ...){ +get_collectri <- function(organism='human', split_complexes=FALSE, load_meta=FALSE, ...){ # NSE vs. R CMD check workaround source_genesymbol <- target_genesymbol <- weight <- NULL @@ -104,6 +106,7 @@ get_collectri <- function(organism='human', split_complexes=FALSE, ...){ organism = organism, genesymbol=TRUE, loops=TRUE, + extra_attrs = TRUE, ... ), error = function(e){ @@ -122,9 +125,12 @@ get_collectri <- function(organism='human', split_complexes=FALSE, ...){ OmnipathR::import_tf_mirna_interactions( genesymbols=TRUE, resources = "CollecTRI", - strict_evidences = TRUE + strict_evidences = TRUE, + extra_attrs = TRUE ) %>% - base::rbind(collectri, .) + base::rbind(collectri, .) %>% + OmnipathR::extra_attrs_to_cols(sign_decision = CollecTRI_sign_decision, + TF_category = CollecTRI_tf_category) }, error = function(e){ OmnipathR::omnipath_msg( @@ -140,6 +146,10 @@ get_collectri <- function(organism='human', split_complexes=FALSE, ...){ cols <- c('source_genesymbol', 'target_genesymbol', 'is_stimulation', 'is_inhibition') + + if (load_meta){ + cols <- base::append(cols, c('sources', 'references', 'sign_decision', 'TF_category')) + } collectri_interactions <- collectri[!stringr::str_detect(collectri$source, "COMPLEX"), cols] @@ -155,20 +165,39 @@ get_collectri <- function(organism='human', split_complexes=FALSE, ...){ stringr::str_detect(source_genesymbol, "NFKB") ~ "NFKB") ) } - - collectri <- base::rbind(collectri_interactions, collectri_complex) %>% - dplyr::distinct(source_genesymbol, target_genesymbol, - .keep_all = TRUE) %>% - dplyr::mutate(weight = dplyr::case_when( - is_stimulation == 1 ~ 1, - is_stimulation == 0 ~ -1 - )) %>% - dplyr::select(source_genesymbol, target_genesymbol, - weight) %>% - dplyr::rename("source" = source_genesymbol, - "target" = target_genesymbol, - "mor" = weight, - ) + + if (!load_meta){ + collectri <- base::rbind(collectri_interactions, collectri_complex) %>% + dplyr::distinct(source_genesymbol, target_genesymbol, + .keep_all = TRUE) %>% + dplyr::mutate(weight = dplyr::case_when( + is_stimulation == 1 ~ 1, + is_stimulation == 0 ~ -1 + )) %>% + dplyr::select(source_genesymbol, target_genesymbol, + weight) %>% + dplyr::rename("source" = source_genesymbol, + "target" = target_genesymbol, + "mor" = weight) + } else { + collectri <- base::rbind(collectri_interactions, collectri_complex) %>% + dplyr::distinct(source_genesymbol, target_genesymbol, + .keep_all = TRUE) %>% + dplyr::mutate(weight = dplyr::case_when( + is_stimulation == 1 ~ 1, + is_stimulation == 0 ~ -1 + )) %>% + dplyr::select(source_genesymbol, target_genesymbol, + weight, sources, references, sign_decision, TF_category) %>% + dplyr::mutate(references = stringr::str_extract_all(references, "\\d+")) %>% + dplyr::mutate(references = purrr::map_chr(references, ~paste(.x, collapse = ";"))) %>% + dplyr::rename("source" = source_genesymbol, + "target" = target_genesymbol, + "mor" = weight, + "resources" = sources, + "PMIDs" = references) + } + return(collectri) } diff --git a/tests/testthat/test-omnipath.R b/tests/testthat/test-omnipath.R index 85eff7c..51342d5 100644 --- a/tests/testthat/test-omnipath.R +++ b/tests/testthat/test-omnipath.R @@ -35,4 +35,6 @@ test_that("test get_collectri", { testthat::expect_true(nrow(df) > 0) df_split <- get_collectri(split_complexes=TRUE) testthat::expect_true(nrow(df) < nrow(df_split)) + df_split <- get_collectri(load_meta=TRUE) + testthat::expect_true(ncol(df) < ncol(df_split)) }) From d405bb61313936e52a85c964bddaf43347c90413 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=BCller-Dott?= Date: Thu, 11 Apr 2024 15:22:25 +0200 Subject: [PATCH 2/2] removed code duplication --- R/utils-omnipath.R | 45 ++++++++++++++-------------------- tests/testthat/test-omnipath.R | 4 +-- 2 files changed, 20 insertions(+), 29 deletions(-) diff --git a/R/utils-omnipath.R b/R/utils-omnipath.R index 0ec411a..5387976 100644 --- a/R/utils-omnipath.R +++ b/R/utils-omnipath.R @@ -165,37 +165,28 @@ get_collectri <- function(organism='human', split_complexes=FALSE, load_meta=FAL stringr::str_detect(source_genesymbol, "NFKB") ~ "NFKB") ) } - + + collectri <- base::rbind(collectri_interactions, collectri_complex) %>% + dplyr::distinct(source_genesymbol, target_genesymbol, + .keep_all = TRUE) %>% + dplyr::mutate(weight = dplyr::case_when( + is_stimulation == 1 ~ 1, + is_stimulation == 0 ~ -1 + )) %>% + dplyr::rename("source" = source_genesymbol, + "target" = target_genesymbol, + "mor" = weight) + if (!load_meta){ - collectri <- base::rbind(collectri_interactions, collectri_complex) %>% - dplyr::distinct(source_genesymbol, target_genesymbol, - .keep_all = TRUE) %>% - dplyr::mutate(weight = dplyr::case_when( - is_stimulation == 1 ~ 1, - is_stimulation == 0 ~ -1 - )) %>% - dplyr::select(source_genesymbol, target_genesymbol, - weight) %>% - dplyr::rename("source" = source_genesymbol, - "target" = target_genesymbol, - "mor" = weight) + collectri <- collectri %>% + dplyr::select(source, target, mor) } else { - collectri <- base::rbind(collectri_interactions, collectri_complex) %>% - dplyr::distinct(source_genesymbol, target_genesymbol, - .keep_all = TRUE) %>% - dplyr::mutate(weight = dplyr::case_when( - is_stimulation == 1 ~ 1, - is_stimulation == 0 ~ -1 - )) %>% - dplyr::select(source_genesymbol, target_genesymbol, - weight, sources, references, sign_decision, TF_category) %>% + collectri <- collectri %>% dplyr::mutate(references = stringr::str_extract_all(references, "\\d+")) %>% dplyr::mutate(references = purrr::map_chr(references, ~paste(.x, collapse = ";"))) %>% - dplyr::rename("source" = source_genesymbol, - "target" = target_genesymbol, - "mor" = weight, - "resources" = sources, - "PMIDs" = references) + dplyr::rename("resources" = sources, + "PMIDs" = references) %>% + dplyr::select(source, target, mor, resources, PMIDs, sign_decision, TF_category) } return(collectri) diff --git a/tests/testthat/test-omnipath.R b/tests/testthat/test-omnipath.R index 51342d5..6baf0e9 100644 --- a/tests/testthat/test-omnipath.R +++ b/tests/testthat/test-omnipath.R @@ -35,6 +35,6 @@ test_that("test get_collectri", { testthat::expect_true(nrow(df) > 0) df_split <- get_collectri(split_complexes=TRUE) testthat::expect_true(nrow(df) < nrow(df_split)) - df_split <- get_collectri(load_meta=TRUE) - testthat::expect_true(ncol(df) < ncol(df_split)) + df_meta <- get_collectri(load_meta=TRUE) + testthat::expect_true(ncol(df) < ncol(df_meta)) })