Add aditional data.

neurogenomics · Apr 16, 2024 · 6a2f8ea · 6a2f8ea
1 parent 745e055
commit 6a2f8ea
Show file tree

Hide file tree

Showing 17 changed files with 4,090 additions and 28 deletions.
diff --git a/R/creb_motif.R b/R/creb_motif.R
@@ -1,8 +1,9 @@
 #' CREB Motif
 #'
-#' This dataset contains the MA0018.5 CREB1 motif from JASPAR2024.
+#' The MA0018.5 CREB1 motif from JASPAR2024.
 #'
 #' @format A universalmotif object produced by the \code{read_motif_file}
 #' function.
 #' @source JASPAR2024 \url{https://jaspar.elixir.no/matrix/MA0018.5/}
+#' @usage data("creb_motif")
 "creb_motif"
diff --git a/R/creb_peaks.R b/R/creb_peaks.R
@@ -1,18 +1,24 @@
 #' CREB Peaks
 #'
-#' This dataset contains a set of CREB1 TIP-seq peaks (narrowPeak) produced by
-#' MACS3 (first-mate, 5' shift). We have subset the peaks to reduce the data
-#' size (only chr19). The commands used to subset the data were:
+#' A set of CREB1 TIP-seq peaks (narrowPeak) produced by MACS3 (first-mate only,
+#' 5'-shift to centre the 5' end of each read). The BAM file used as input for
+#' the peak calling is not publicly available. The peak file has been subset
+#' to reduce the data size (only chromosome 19).
 #'
-#' creb_peaks <- read.table("path/to/creb_peaks")
-#' creb_peaks <- creb_peaks[creb_peaks$V1 == "chr19",]
-#' write.table(creb_peaks,
-#'             "creb_subset.narrowPeak",
-#'             row.names = FALSE,
-#'             col.names = FALSE,
-#'             quote = FALSE,
-#'             sep = "\t")
+#' The code used to prepare the data as an .rda file is:
+#'
+#' \code{creb_peaks <- read.table("path/to/creb_peaks.narrowPeak")}
+#' \code{creb_peaks <- creb_peaks[creb_peaks$V1 == "chr19",]}
+#' \code{write.table(creb_peaks,
+#'                   "creb_subset.narrowPeak",
+#'                   row.names = FALSE,
+#'                   col.names = FALSE,
+#'                   quote = FALSE,
+#'                   sep = "\t")}
+#' \code{creb_peaks <- read_peak_file("inst/extdata/creb_subset.narrowPeak")}
+#' \code{usethis::use_data(creb_peaks, overwrite = TRUE)}
 #'
 #' @format A GRanges peak object outputted by the \code{read_peak_file}
 #' function.
+#' @usage data("creb_peaks")
 "creb_peaks"
diff --git a/R/ctcf_motif.R b/R/ctcf_motif.R
@@ -0,0 +1,9 @@
+#' CTCF Motif
+#'
+#' The MA1930.2 CTCF motif from JASPAR2024.
+#'
+#' @format A universalmotif object produced by the \code{read_motif_file}
+#' function.
+#' @source JASPAR2024 \url{https://jaspar.elixir.no/matrix/MA1930.2/}
+#' @usage data("ctcf_motif")
+"ctcf_motif"
diff --git a/R/ctcf_peaks.R b/R/ctcf_peaks.R
@@ -0,0 +1,28 @@
+#' CTCF Peaks
+#'
+#' A set of CTCF TIP-seq peaks (narrowPeak) produced by MACS3 (first-mate only,
+#' 5'-shift to centre the 5' end of each read). The BAM file used as input for
+#' peak calling was generated by Bartlett et al. (2021) and retrieved from NCBI
+#' GEO under accession GSE188512. Specifically, we used GSM5684367	(CTCF
+#' TIP-seq 5k cells). The peak file has been subset to reduce the data size
+#' (only chromosome 19).
+#'
+#' The code used to prepare the data as an .rda file is:
+#'
+#' \code{ctcf_peaks <- read.table("path/to/ctcf_peaks")} \cr
+#' \code{ctcf_peaks <- ctcf_peaks[ctcf_peaks$V1 == "chr19",]} \cr
+#' \code{write.table(ctcf_peaks,
+#'                   "inst/extdata/ctcf_subset.narrowPeak",
+#'                   row.names = FALSE,
+#'                   col.names = FALSE,
+#'                   quote = FALSE,
+#'                   sep = "\t")} \cr
+#' \code{ctcf_peaks <- read_peak_file("inst/extdata/ctcf_subset.narrowPeak")} \cr
+#' \code{usethis::use_data(ctcf_peaks, overwrite = TRUE)} \cr
+#'
+#' @format A GRanges peak object outputted by the \code{read_peak_file}
+#' function.
+#' @source NCBI GEO (GSE188512)
+#' \url{https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE188512}
+#' @usage data("ctcf_peaks")
+"ctcf_peaks"
diff --git a/data-raw/creb_motif.R b/data-raw/creb_motif.R
@@ -1,5 +1,5 @@
 creb_motif <- read_motif_file(
-  "MA0018.5.jaspar",
+  "inst/extdata/MA0018.5.jaspar",
   motif_id = "MA0018.5",
   file_format = "jaspar"
   )

diff --git a/data-raw/ctcf_motif.R b/data-raw/ctcf_motif.R
@@ -0,0 +1,6 @@
+ctcf_motif <- read_motif_file(
+  "inst/extdata/MA1930.2.jaspar",
+  motif_id = "MA1930.2.jaspar",
+  file_format = "jaspar"
+)
+usethis::use_data(ctcf_motif, overwrite = TRUE)
diff --git a/data-raw/ctcf_peaks.R b/data-raw/ctcf_peaks.R
@@ -0,0 +1,2 @@
+ctcf_peaks <- read_peak_file("inst/extdata/ctcf_subset.narrowPeak")
+usethis::use_data(ctcf_peaks, overwrite = TRUE)
diff --git a/data/ctcf_motif.rda b/data/ctcf_motif.rda
diff --git a/data/ctcf_peaks.rda b/data/ctcf_peaks.rda
diff --git a/inst/extdata/MA1930.2.jaspar b/inst/extdata/MA1930.2.jaspar
@@ -0,0 +1,5 @@
+>MA1930.2	CTCF
+A  [   293    242     41     86   1986    445    158    346    453    497    714    536    467    664    563    593    452    560    604    178     38   1906    127    348   2179     32   1140    169     62    119    335   1021    279 ]
+C  [  1497    107     35   2129    127    366    278    324   1333   1129    794    855   1138    604    730    902    781    458    300   2074   2515     97   1409   1016     54      9     14     71     14    138   1833    164   1307 ]
+G  [   400    144   2476     25    155   1456    179    892    328    419    623    770    561    376    541    494    393   1041   1242    155      9    275    961    175    212   2527   1399   1208   2472   2117     86   1161    740 ]
+T  [   386   2083     24    336    308    309   1961   1014    462    531    445    415    410    932    742    587    950    517    430    169     14    298     79   1037    131      8     23   1128     28    202    322    230    250 ]