... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
Package: GRaNIE |
2 | 2 |
Title: GRaNIE: Reconstruction cell type specific gene regulatory networks including enhancers using chromatin accessibility and RNA-seq data |
3 |
-Version: 1.1.10 |
|
3 |
+Version: 1.1.12 |
|
4 | 4 |
Encoding: UTF-8 |
5 | 5 |
Authors@R: c(person("Christian", "Arnold", email = |
6 | 6 |
"chrarnold@web.de", role = c("cre","aut")), |
... | ... |
@@ -27,28 +27,22 @@ Imports: |
27 | 27 |
RColorBrewer, |
28 | 28 |
ComplexHeatmap, |
29 | 29 |
DESeq2, |
30 |
- csaw, |
|
31 | 30 |
circlize, |
32 |
- robust, |
|
33 | 31 |
progress, |
34 | 32 |
utils, |
35 | 33 |
methods, |
36 | 34 |
stringr, |
37 | 35 |
scales, |
38 |
- BiocManager, |
|
39 |
- BiocParallel, |
|
40 | 36 |
igraph, |
41 | 37 |
S4Vectors, |
42 | 38 |
ggplot2, |
43 | 39 |
rlang, |
44 | 40 |
Biostrings, |
45 | 41 |
GenomeInfoDb, |
46 |
- IRanges, |
|
47 | 42 |
SummarizedExperiment, |
48 | 43 |
forcats, |
49 | 44 |
gridExtra, |
50 | 45 |
limma, |
51 |
- purrr, |
|
52 | 46 |
tidyselect, |
53 | 47 |
readr, |
54 | 48 |
grid, |
... | ... |
@@ -60,12 +54,10 @@ Imports: |
60 | 54 |
magrittr, |
61 | 55 |
tibble, |
62 | 56 |
viridis, |
63 |
- BiocFileCache, |
|
64 |
- colorspace |
|
57 |
+ colorspace, |
|
58 |
+ biomaRt |
|
65 | 59 |
Depends: |
66 |
- R (>= 4.2.0), |
|
67 |
- tidyverse, |
|
68 |
- topGO |
|
60 |
+ R (>= 4.2.0) |
|
69 | 61 |
Suggests: |
70 | 62 |
knitr, |
71 | 63 |
BSgenome.Hsapiens.UCSC.hg19, |
... | ... |
@@ -79,13 +71,17 @@ Suggests: |
79 | 71 |
org.Hs.eg.db, |
80 | 72 |
org.Mm.eg.db, |
81 | 73 |
IHW, |
82 |
- biomaRt, |
|
83 | 74 |
clusterProfiler, |
84 | 75 |
ReactomePA, |
85 | 76 |
DOSE, |
77 |
+ BiocFileCache, |
|
86 | 78 |
ChIPseeker, |
87 | 79 |
testthat (>= 3.0.0), |
88 |
- BiocStyle |
|
80 |
+ BiocStyle, |
|
81 |
+ csaw, |
|
82 |
+ BiocParallel, |
|
83 |
+ topGO, |
|
84 |
+ robust |
|
89 | 85 |
VignetteBuilder: knitr |
90 | 86 |
biocViews: Software, GeneExpression, GeneRegulation, NetworkInference, GeneSetEnrichment, BiomedicalInformatics, Genetics, Transcriptomics, ATACSeq, RNASeq, GraphAndNetwork, Regression, Transcription, ChIPSeq |
91 | 87 |
License: Artistic-2.0 |
... | ... |
@@ -24,6 +24,7 @@ export(initializeGRN) |
24 | 24 |
export(loadExampleObject) |
25 | 25 |
export(nGenes) |
26 | 26 |
export(nPeaks) |
27 |
+export(nTFs) |
|
27 | 28 |
export(overlapPeaksAndTFBS) |
28 | 29 |
export(performAllNetworkAnalyses) |
29 | 30 |
export(plotCommunitiesEnrichment) |
... | ... |
@@ -36,8 +37,6 @@ export(plotPCA_all) |
36 | 37 |
export(plotTFEnrichment) |
37 | 38 |
export(plot_stats_connectionSummary) |
38 | 39 |
export(visualizeGRN) |
39 |
-import(BiocFileCache) |
|
40 |
-import(BiocManager) |
|
41 | 40 |
import(GenomicRanges) |
42 | 41 |
import(checkmate) |
43 | 42 |
import(ggplot2) |
... | ... |
@@ -45,11 +44,11 @@ import(grDevices) |
45 | 44 |
import(graphics) |
46 | 45 |
import(patchwork) |
47 | 46 |
import(tibble) |
48 |
-import(tidyverse) |
|
49 |
-import(topGO) |
|
50 | 47 |
import(utils) |
51 | 48 |
importFrom(GenomicRanges,mcols) |
52 |
-importFrom(IRanges,width) |
|
49 |
+importFrom(Matrix,Matrix) |
|
50 |
+importFrom(biomaRt,getBM) |
|
51 |
+importFrom(biomaRt,useEnsembl) |
|
53 | 52 |
importFrom(circlize,colorRamp2) |
54 | 53 |
importFrom(grid,gpar) |
55 | 54 |
importFrom(magrittr,`%>%`) |
... | ... |
@@ -1,3 +1,19 @@ |
1 |
+# GRaNIE 1.1.12 (2022-09-13) |
|
2 |
+ |
|
3 |
+## Major changes |
|
4 |
+ |
|
5 |
+- many Documentation and R help updates, Package Details Vignette is online |
|
6 |
+- The workflow vignette is now improved: better figure resolution, figure aspect ratios are optimized, and a few other changes |
|
7 |
+- the eGRN graph structure as built by `build_eGRN_graph()` in the `GRaNIE` object is now reset whenever the function `filterGRNAndConnectGenes()` is successfully executed to make sure that enrichment functions etc are not using an outdated graph structure. |
|
8 |
+- the landing page of the website has been extended and overhauled |
|
9 |
+- removed some dependency packages and moved others into `Suggests` to lower the installation burden of the package. In addition, removed `topGO` from the `Depends` section (now in `Suggests`) and removed `tidyverse` altogether (before in `Depends`). Detailed explanations when and how the packages listed under `Suggests` are needed can now be found in the new [Package Details Vignette](https://grp-zaugg.embl-community.io/GRaNIE/articles/GRaNIE_packageDetails.html#installation). |
|
10 |
+- new helper function `installSuggestedPackages()` to install all packages listed under `Suggests` |
|
11 |
+ |
|
12 |
+## Minor changes |
|
13 |
+ |
|
14 |
+- many small changes in the code, better argument checking, and preparing rigorous unit test inclusion |
|
15 |
+- internally renaming the (recently changed / renamed) gene type `lncRNA` from `biomaRt` to `lincRNA` to be compatible with older versions of `GRaNIE` |
|
16 |
+ |
|
1 | 17 |
|
2 | 18 |
# GRaNIE 1.1.X (2022-05-31) |
3 | 19 |
|
... | ... |
@@ -202,7 +202,7 @@ setMethod("show", |
202 | 202 |
cat(" Communities (TF-gene):\n") |
203 | 203 |
df = igraph::vertex.attributes(GRN@graph[["TF_gene"]]$graph) |
204 | 204 |
if (!is.null(df) & "community" %in% names(df)) { |
205 |
- communities = df %>% as.data.frame() %>% dplyr::count(community) %>% dplyr::arrange(desc(n)) |
|
205 |
+ communities = df %>% as.data.frame() %>% dplyr::count(community) %>% dplyr::arrange(dplyr::desc(.data$n)) |
|
206 | 206 |
cat(" Communities, sorted by size (n = Number of nodes): ", paste0(communities$community, " (n=", communities$n, collapse = "), "), ")\n", sep = "") |
207 | 207 |
} else { |
208 | 208 |
cat(" None found\n") |
... | ... |
@@ -241,11 +241,13 @@ setMethod("show", |
241 | 241 |
|
242 | 242 |
#' Get the number of peaks for a \code{\linkS4class{GRN}} object. |
243 | 243 |
#' |
244 |
-#' Return the number of peaks (all or only non-filtered ones) that are defined in the \code{\linkS4class{GRN}} object. |
|
244 |
+#' Returns the number of peaks (all or only non-filtered ones) from the provided peak datain the \code{\linkS4class{GRN}} object. |
|
245 | 245 |
#' |
246 | 246 |
#' @template GRN |
247 | 247 |
#' @param filter TRUE or FALSE. Default TRUE. Should peaks marked as filtered be included in the count? |
248 | 248 |
#' @return Integer. Number of peaks that are defined in the \code{\linkS4class{GRN}} object, either by excluding (filter = TRUE) or including (filter = FALSE) peaks that are currently marked as \emph{filtered}. |
249 |
+#' @seealso \code{\link{nTFs}} |
|
250 |
+#' @seealso \code{\link{nGenes}} |
|
249 | 251 |
#' @examples |
250 | 252 |
#' # See the Workflow vignette on the GRaNIE website for examples |
251 | 253 |
#' GRN = loadExampleObject() |
... | ... |
@@ -253,7 +255,6 @@ setMethod("show", |
253 | 255 |
#' nPeaks(GRN, filter = FALSE) |
254 | 256 |
#' @export |
255 | 257 |
#' @aliases peaks |
256 |
-#' @rdname peaks-methods |
|
257 | 258 |
nPeaks <- function(GRN, filter = TRUE) { |
258 | 259 |
|
259 | 260 |
checkmate::assertClass(GRN, "GRN") |
... | ... |
@@ -276,11 +277,13 @@ nPeaks <- function(GRN, filter = TRUE) { |
276 | 277 |
|
277 | 278 |
#' Get the number of genes for a \code{\linkS4class{GRN}} object. |
278 | 279 |
#' |
279 |
-#' Return the number of genes (all or only non-filtered ones) that are defined in the \code{\linkS4class{GRN}} object. |
|
280 |
+#' Returns the number of genes (all or only non-filtered ones) from the provided RNA-seq data in the \code{\linkS4class{GRN}} object. |
|
280 | 281 |
#' |
281 | 282 |
#' @template GRN |
282 | 283 |
#' @param filter TRUE or FALSE. Default TRUE. Should genes marked as filtered be included in the count? |
283 | 284 |
#' @return Integer. Number of genes that are defined in the \code{\linkS4class{GRN}} object, either by excluding (filter = TRUE) or including (filter = FALSE) genes that are currently marked as \emph{filtered}. |
285 |
+#' @seealso \code{\link{nTFs}} |
|
286 |
+#' @seealso \code{\link{nPeaks}} |
|
284 | 287 |
#' @examples |
285 | 288 |
#' # See the Workflow vignette on the GRaNIE website for examples |
286 | 289 |
#' GRN = loadExampleObject() |
... | ... |
@@ -288,7 +291,6 @@ nPeaks <- function(GRN, filter = TRUE) { |
288 | 291 |
#' nGenes(GRN, filter = FALSE) |
289 | 292 |
#' @export |
290 | 293 |
#' @aliases genes |
291 |
-#' @rdname genes-methods |
|
292 | 294 |
nGenes <- function(GRN, filter = TRUE) { |
293 | 295 |
|
294 | 296 |
checkmate::assertClass(GRN, "GRN") |
... | ... |
@@ -306,4 +308,32 @@ nGenes <- function(GRN, filter = TRUE) { |
306 | 308 |
|
307 | 309 |
nGenes |
308 | 310 |
|
311 |
+} |
|
312 |
+ |
|
313 |
+ |
|
314 |
+ |
|
315 |
+#' Get the number of TFs for a \code{\linkS4class{GRN}} object. |
|
316 |
+#' |
|
317 |
+#' Returns the number of TFs from the provided TFBS data in the \code{\linkS4class{GRN}} object. |
|
318 |
+#' |
|
319 |
+#' @template GRN |
|
320 |
+#' @return Integer. Number of TFs that are defined in the \code{\linkS4class{GRN}} object. |
|
321 |
+#' @seealso \code{\link{nGenes}} |
|
322 |
+#' @seealso \code{\link{nPeaks}} |
|
323 |
+#' @examples |
|
324 |
+#' # See the Workflow vignette on the GRaNIE website for examples |
|
325 |
+#' GRN = loadExampleObject() |
|
326 |
+#' nTFs(GRN) |
|
327 |
+#' @export |
|
328 |
+#' @aliases TFs |
|
329 |
+nTFs <- function(GRN) { |
|
330 |
+ |
|
331 |
+ checkmate::assertClass(GRN, "GRN") |
|
332 |
+ |
|
333 |
+ if (is.null(GRN@data$TFs$translationTable)) { |
|
334 |
+ return(NA) |
|
335 |
+ } |
|
336 |
+ |
|
337 |
+ nrow(GRN@data$TFs$translationTable) |
|
338 |
+ |
|
309 | 339 |
} |
310 | 340 |
\ No newline at end of file |
... | ... |
@@ -1,24 +1,42 @@ |
1 | 1 |
######## Init GRN ######## |
2 | 2 |
|
3 |
-#' Initialize a \code{\linkS4class{GRN}} object |
|
3 |
+#' Create and initialize a \code{\linkS4class{GRN}} object. |
|
4 |
+#' |
|
5 |
+#' Executing this function is the very first step in the *GRaNIE* workflow. After its execution, data can be added to the object. |
|
6 |
+#' \strong{Depending on the genome assembly version, additional genome annotation packages are required, as follows:} |
|
7 |
+#' For \code{hg19} and \code{hg38}, |
|
8 |
+#' the packages \code{org.Hs.eg.db} as well as \code{BSgenome.Hsapiens.UCSC.hg19}+\code{TxDb.Hsapiens.UCSC.hg19.knownGene} or |
|
9 |
+#' \code{BSgenome.Hsapiens.UCSC.hg38}+\code{TxDb.Hsapiens.UCSC.hg38.knownGene} are required, respectively. |
|
10 |
+#' For \code{mm9} and \code{mm10}, |
|
11 |
+#' the packages \code{org.Mm.eg.db} as well as \code{BSgenome.Mmusculus.UCSC.mm9}+\code{TxDb.Mmusculus.UCSC.mm9.knownGene} or |
|
12 |
+#' \code{Mmusculus.UCSC.mm10}+\code{TxDb.Mmusculus.UCSC.mm10.knownGene} are required, respectively. |
|
13 |
+#' For more information, see the error message if any of these packages is missing or the |
|
14 |
+#' \href{https://grp-zaugg.embl-community.io/GRaNIE/articles/GRaNIE_packageDetails.html#installation}{Package Details Vignette}. |
|
4 | 15 |
#' |
5 | 16 |
#' @export |
6 |
-#' @param objectMetadata List. Default \code{list()}. Optional (named) list with an arbitrary number of elements, all of which capture metadata for the object. This is mainly used to distinguish GRN objects from one another by storing object-specific metadata along with the data. |
|
7 |
-#' @param outputFolder Output folder, either absolute or relative to the current working directory. No default. Default output folder where all pipeline output will be put unless specified otherwise. We recommend specifying an absolute path. Note that for Windows-based systems, the path must be correctly specified with "/" as path separator. |
|
8 |
-#' @param genomeAssembly Character. No default. The genome assembly of all data that to be used within this object. Currently, supported genomes are: \code{hg19}, \code{hg38}, and \code{mm10}. |
|
17 |
+#' @param objectMetadata List. Default \code{list()}. Optional (named) list with an arbitrary number of elements, all of which |
|
18 |
+#' capture metadata for the object. This is mainly used to distinguish GRN objects from one another by storing object-specific metadata along with the data. |
|
19 |
+#' @param outputFolder Output folder, either absolute or relative to the current working directory. Default \code{"."}. |
|
20 |
+#' Default output folder where all pipeline output will be put unless specified otherwise. We recommend specifying an absolute path. |
|
21 |
+#' Note that for Windows-based systems, the path must be correctly specified with "/" as path separator. |
|
22 |
+#' @param genomeAssembly Character. No default. The genome assembly of all data that to be used within this object. |
|
23 |
+#' Currently, supported genomes are: \code{hg19}, \code{hg38}, \code{mm9} and \code{mm10}. See function description for further information and notes. |
|
9 | 24 |
#' @return Empty \code{\linkS4class{GRN}} object |
10 | 25 |
#' @examples |
11 | 26 |
#' meta.l = list(name = "exampleName", date = "01.03.22") |
12 | 27 |
#' GRN = initializeGRN(objectMetadata = meta.l, outputFolder = "output", genomeAssembly = "hg38") |
13 | 28 |
#' @export |
14 |
-#' @import tidyverse |
|
15 | 29 |
#' @importFrom stats sd median cor cor.test quantile |
16 | 30 |
initializeGRN <- function(objectMetadata = list(), |
17 |
- outputFolder, |
|
31 |
+ outputFolder = ".", |
|
18 | 32 |
genomeAssembly) { |
19 | 33 |
|
34 |
+ start = Sys.time() |
|
35 |
+ |
|
20 | 36 |
checkmate::assert(checkmate::checkNull(objectMetadata), checkmate::checkList(objectMetadata)) |
21 |
- checkmate::assertSubset(genomeAssembly, c("hg19","hg38", "mm10")) |
|
37 |
+ checkmate::assertChoice(genomeAssembly, c("hg19","hg38", "mm9", "mm10")) |
|
38 |
+ |
|
39 |
+ .checkAndLoadPackagesGenomeAssembly(genomeAssembly) |
|
22 | 40 |
|
23 | 41 |
# Create the folder first if not yet existing |
24 | 42 |
if (!dir.exists(outputFolder)) { |
... | ... |
@@ -51,8 +69,7 @@ initializeGRN <- function(objectMetadata = list(), |
51 | 69 |
packageName = utils::packageName() |
52 | 70 |
par.l$packageVersion = ifelse(is.null(packageName), NA, paste0(utils::packageVersion(packageName)[[1]], collapse = ".")) |
53 | 71 |
par.l$genomeAssembly = genomeAssembly |
54 |
- |
|
55 |
- .checkAndLoadPackagesGenomeAssembly(genomeAssembly) |
|
72 |
+ |
|
56 | 73 |
|
57 | 74 |
# Make an internal subslot |
58 | 75 |
par.l$internal = list() |
... | ... |
@@ -92,29 +109,47 @@ initializeGRN <- function(objectMetadata = list(), |
92 | 109 |
|
93 | 110 |
futile.logger::flog.info(paste0("Empty GRN object created successfully. Type the object name (e.g., GRN) to retrieve summary information about it at any time.")) |
94 | 111 |
|
112 |
+ .printExecutionTime(start, prefix = "") |
|
95 | 113 |
|
96 | 114 |
GRN |
97 | 115 |
} |
98 | 116 |
|
99 | 117 |
######## Add and filter data ######## |
100 | 118 |
|
101 |
-#' Add data to a \code{\linkS4class{GRN}} object |
|
119 |
+#' Add data to a \code{\linkS4class{GRN}} object. |
|
120 |
+#' |
|
121 |
+#' This function adds both RNA and peak data to a \code{\linkS4class{GRN}} object, along with data normalization. |
|
122 |
+#' In addition, and highly recommended, sample metadata can be optionally provided. |
|
123 |
+#' |
|
124 |
+#' If the \code{ChIPseeker} package is installed, additional peak annotation is provided in the annotation slot and a peak annotation QC plot is produced as part of peak-gene QC. |
|
125 |
+#' This is fully optional, however, and has no consequences for downstream functions. |
|
102 | 126 |
#' |
103 | 127 |
#' @export |
104 | 128 |
#' @template GRN |
105 |
-#' @param counts_peaks Data frame. No default. Counts for the peaks, with raw or normalized counts for each peak (rows) across all samples (columns). In addition to the count data, it must also contain one ID column with a particular format, see the argument \code{idColumn_peaks} below. Row names are ignored, column names must be set to the sample names and must match those from the RNA counts and the sample metadata table. |
|
106 |
-#' @param normalization_peaks Character. Default \code{DESeq_sizeFactor}. Normalization procedure for peak data. Must be one of \code{DESeq_sizeFactor}, \code{none}, or \code{quantile}. |
|
107 |
-#' @param idColumn_peaks Character. Default \code{peakID}. Name of the column in the counts_peaks data frame that contains peak IDs. The required format must be {chr}:{start}-{end}", with {chr} denoting the abbreviated chromosome name, and {start} and {end} the begin and end of the peak coordinates, respectively. End must be bigger than start. Examples for valid peak IDs are \code{chr1:400-800} or \code{chrX:20-25}. |
|
108 |
-#' @param counts_rna Data frame. No default. Counts for the RNA-seq data, with raw or normalized counts for each gene (rows) across all samples (columns). In addition to the count data, it must also contain one ID column with a particular format, see the argument \code{idColumn_rna} below. Row names are ignored, column names must be set to the sample names and must match those from the RNA counts and the sample metadata table. |
|
109 |
-#' @param normalization_rna Character. Default \code{quantile}. Normalization procedure for peak data. Must be one of "DESeq_sizeFactor", "none", or "quantile" |
|
129 |
+#' @param counts_peaks Data frame. No default. Counts for the peaks, with raw or normalized counts for each peak (rows) across all samples (columns). |
|
130 |
+#' In addition to the count data, it must also contain one ID column with a particular format, see the argument \code{idColumn_peaks} below. |
|
131 |
+#' Row names are ignored, column names must be set to the sample names and must match those from the RNA counts and the sample metadata table. |
|
132 |
+#' @param normalization_peaks Character. Default \code{DESeq_sizeFactor}. |
|
133 |
+#' Normalization procedure for peak data. Must be one of \code{DESeq_sizeFactor}, \code{none}, or \code{quantile}. |
|
134 |
+#' @param idColumn_peaks Character. Default \code{peakID}. Name of the column in the counts_peaks data frame that contains peak IDs. |
|
135 |
+#' The required format must be {chr}:{start}-{end}", with {chr} denoting the abbreviated chromosome name, and {start} and {end} the begin and end |
|
136 |
+#' of the peak coordinates, respectively. End must be bigger than start. Examples for valid peak IDs are \code{chr1:400-800} or \code{chrX:20-25}. |
|
137 |
+#' @param counts_rna Data frame. No default. Counts for the RNA-seq data, with raw or normalized counts for each gene (rows) across all samples (columns). |
|
138 |
+#' In addition to the count data, it must also contain one ID column with a particular format, see the argument \code{idColumn_rna} below. |
|
139 |
+#' Row names are ignored, column names must be set to the sample names and must match those from the RNA counts and the sample metadata table. |
|
140 |
+#' @param normalization_rna Character. Default \code{quantile}. Normalization procedure for peak data. |
|
141 |
+#' Must be one of "DESeq_sizeFactor", "none", or "quantile". For "quantile", \code{limma::normalizeQuantiles} is used for normalization. |
|
110 | 142 |
#' @param idColumn_RNA Character. Default \code{ENSEMBL}. Name of the column in the \code{counts_rna} data frame that contains Ensembl IDs. |
111 |
-#' @param sampleMetadata Data frame. Default \code{NULL}. Optional, additional metadata for the samples, such as age, sex, gender etc. If provided, the @seealso [plotPCA_all()] function can then incorporate and plot it. Sample names must match with those from both peak and RNA-Seq data. The first column is expected to contain the sample IDs, the actual column name is irrelevant. |
|
112 |
-#' @param allowOverlappingPeaks \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should overlapping peaks be allowed (then only a warning is issued when overlapping peaks are found) or (the default) should an error be raised? |
|
143 |
+#' @param sampleMetadata Data frame. Default \code{NULL}. Optional, additional metadata for the samples, such as age, sex, gender etc. |
|
144 |
+#' If provided, the @seealso [plotPCA_all()] function can then incorporate and plot it. Sample names must match with those from both peak and RNA-Seq data. The first column is expected to contain the sample IDs, the actual column name is irrelevant. |
|
145 |
+#' @param allowOverlappingPeaks \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should overlapping peaks be allowed (then only a warning is issued |
|
146 |
+#' when overlapping peaks are found) or (the default) should an error be raised? |
|
113 | 147 |
#' @template forceRerun |
114 |
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function. |
|
148 |
+#' @return An updated \code{\linkS4class{GRN}} object, with added data from this function (e.g., slots \code{GRN@data$peaks} and \code{GRN@data$RNA}) |
|
149 |
+#' @seealso \code{\link{plotPCA_all}} |
|
115 | 150 |
#' @examples |
116 | 151 |
#' # See the Workflow vignette on the GRaNIE website for examples |
117 |
-#' # library(tidyverse) |
|
152 |
+#' # library(readr) |
|
118 | 153 |
#' # rna.df = read_tsv("https://www.embl.de/download/zaugg/GRaNIE/rna.tsv.gz") |
119 | 154 |
#' # peaks.df = read_tsv("https://www.embl.de/download/zaugg/GRaNIE/peaks.tsv.gz") |
120 | 155 |
#' # meta.df = read_tsv("https://www.embl.de/download/zaugg/GRaNIE/sampleMetadata.tsv.gz") |
... | ... |
@@ -127,15 +162,18 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
127 | 162 |
allowOverlappingPeaks= FALSE, |
128 | 163 |
forceRerun = FALSE) { |
129 | 164 |
|
130 |
- GRN = .addFunctionLogToObject(GRN) |
|
165 |
+ start = Sys.time() |
|
131 | 166 |
|
132 | 167 |
checkmate::assertClass(GRN, "GRN") |
168 |
+ GRN = .addFunctionLogToObject(GRN) |
|
169 |
+ |
|
133 | 170 |
checkmate::assertDataFrame(counts_peaks, min.rows = 1, min.cols = 2) |
134 | 171 |
checkmate::assertDataFrame(counts_rna, min.rows = 1, min.cols = 2) |
135 | 172 |
checkmate::assertCharacter(idColumn_peaks, min.chars = 1, len = 1) |
136 | 173 |
checkmate::assertCharacter(idColumn_RNA, min.chars = 1, len = 1) |
137 | 174 |
checkmate::assertChoice(normalization_peaks, c("none", "DESeq_sizeFactor", "quantile")) |
138 | 175 |
checkmate::assertChoice(normalization_rna, c("none", "DESeq_sizeFactor", "quantile")) |
176 |
+ checkmate::assertFlag(allowOverlappingPeaks) |
|
139 | 177 |
checkmate::assertFlag(forceRerun) |
140 | 178 |
|
141 | 179 |
if (is.null(GRN@data$peaks$counts_norm) | |
... | ... |
@@ -328,6 +366,8 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
328 | 366 |
# Add gene annotation once |
329 | 367 |
GRN = .populateGeneAnnotation(GRN) |
330 | 368 |
|
369 |
+ .printExecutionTime(start, prefix = "") |
|
370 |
+ |
|
331 | 371 |
GRN |
332 | 372 |
|
333 | 373 |
} |
... | ... |
@@ -369,10 +409,10 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
369 | 409 |
paste0(ids_chr, ":", format(as.integer(start), scientific = FALSE), "-", format(as.integer(end), scientific= FALSE)) |
370 | 410 |
} |
371 | 411 |
|
372 |
- |
|
412 |
+#' @importFrom biomaRt useEnsembl getBM |
|
373 | 413 |
.retrieveAnnotationData <- function(genomeAssembly) { |
374 | 414 |
|
375 |
- futile.logger::flog.info(paste0("Retrieving genome annotation data from biomaRt... This may take a while")) |
|
415 |
+ futile.logger::flog.info(paste0("Retrieving genome annotation data from biomaRt for ", genomeAssembly, "... This may take a while")) |
|
376 | 416 |
|
377 | 417 |
params.l = .getBiomartParameters(genomeAssembly) |
378 | 418 |
|
... | ... |
@@ -407,7 +447,9 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
407 | 447 |
dplyr::mutate(chromosome_name = paste0("chr", chromosome_name)) %>% |
408 | 448 |
dplyr::rename(gene.chr = chromosome_name, gene.start = start_position, gene.end = end_position, |
409 | 449 |
gene.strand = strand, gene.ENSEMBL = ensembl_gene_id, gene.type = gene_biotype, gene.name = external_gene_name) %>% |
450 |
+ tidyr::replace_na(list(gene.type = "unknown")) %>% |
|
410 | 451 |
dplyr::mutate_if(is.character, as.factor) %>% |
452 |
+ dplyr::mutate(gene.type = dplyr::recode_factor(gene.type, lncRNA = "lincRNA")) %>% # there seems to be a name change from lincRNA -> lncRNA, lets change it here |
|
411 | 453 |
dplyr::mutate(gene.strand = factor(gene.strand, levels = c(1,-1), labels = c("+", "-"))) |
412 | 454 |
|
413 | 455 |
} |
... | ... |
@@ -458,11 +500,11 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
458 | 500 |
peak.CV = CV_peaks) |
459 | 501 |
|
460 | 502 |
GRN@annotation$consensusPeaks = metadata_peaks |
461 |
- |
|
503 |
+ |
|
462 | 504 |
if (!is.installed("ChIPseeker")) { |
463 |
- message = paste0("The package ChIPseeker is currently not installed, which is needed for additional peak annotation that can be useful for further downstream analyses. ", |
|
464 |
- " You may want to install it and re-run this function. However, this is optional and except for some missing additional annotation columns, there is no limitation.") |
|
465 |
- .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE) |
|
505 |
+ packageMessage = paste0("The package ChIPseeker is currently not installed, which is needed for additional peak annotation that can be useful for further downstream analyses. ", |
|
506 |
+ " You may want to install it and re-run this function. However, this is optional and except for some missing additional annotation columns, there is no limitation.") |
|
507 |
+ .checkPackageInstallation("ChIPseeker", packageMessage, isWarning = TRUE) |
|
466 | 508 |
} else { |
467 | 509 |
|
468 | 510 |
futile.logger::flog.info(paste0(" Retrieve peak annotation using ChipSeeker. This may take a while")) |
... | ... |
@@ -545,7 +587,8 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
545 | 587 |
gene.mean = rowMeans_rna, |
546 | 588 |
gene.median = rowMedians_rna, |
547 | 589 |
gene.CV = CV_rna) %>% |
548 |
- dplyr::full_join(genomeAnnotation.df, by = c("gene.ENSEMBL")) |
|
590 |
+ dplyr::full_join(genomeAnnotation.df, by = c("gene.ENSEMBL")) %>% |
|
591 |
+ dplyr::mutate(gene.type = forcats::fct_explicit_na(gene.type, na_level = "unknown/missing")) |
|
549 | 592 |
|
550 | 593 |
GRN@annotation$genes = metadata_rna |
551 | 594 |
|
... | ... |
@@ -560,7 +603,6 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
560 | 603 |
|
561 | 604 |
} |
562 | 605 |
|
563 |
-#' @importFrom IRanges width |
|
564 | 606 |
#' @importFrom rlang .data `:=` |
565 | 607 |
.calcGCContentPeaks <- function(GRN) { |
566 | 608 |
|
... | ... |
@@ -582,7 +624,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
582 | 624 |
|
583 | 625 |
GC_content.df = GC_content.df %>% |
584 | 626 |
tibble::as_tibble() %>% |
585 |
- dplyr::mutate(length = IRanges::width(query), |
|
627 |
+ dplyr::mutate(length = GenomicRanges::width(query), |
|
586 | 628 |
peak.ID = query$peakID, |
587 | 629 |
GC_class = cut(`G|C`, breaks = seq(0,1,0.1), include.lowest = TRUE, ordered_result = TRUE)) |
588 | 630 |
|
... | ... |
@@ -611,22 +653,22 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
611 | 653 |
GRN |
612 | 654 |
} |
613 | 655 |
|
614 |
-#' Filter data from a \code{\linkS4class{GRN}} object |
|
656 |
+#' Filter RNA-seq and/or peak data from a \code{\linkS4class{GRN}} object |
|
615 | 657 |
#' |
616 | 658 |
#' @template GRN |
617 |
-#' @param minNormalizedMean_peaks Numeric or \code{NULL}. Default 5. Minimum mean across all samples for a peak to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter. |
|
618 |
-#' @param maxNormalizedMean_peaks Numeric or \code{NULL}. Default \code{NULL}. Maximum mean across all samples for a peak to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter. |
|
619 |
-#' @param minNormalizedMeanRNA Numeric or \code{NULL}. Default 5. Minimum mean across all samples for a gene to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter. |
|
620 |
-#' @param maxNormalizedMeanRNA Numeric or \code{NULL}. Default \code{NULL}. Maximum mean across all samples for a gene to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter. |
|
659 |
+#' @param minNormalizedMean_peaks Numeric[0,] or \code{NULL}. Default 5. Minimum mean across all samples for a peak to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter. |
|
660 |
+#' @param maxNormalizedMean_peaks Numeric[0,] or \code{NULL}. Default \code{NULL}. Maximum mean across all samples for a peak to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter. |
|
661 |
+#' @param minNormalizedMeanRNA Numeric[0,] or \code{NULL}. Default 5. Minimum mean across all samples for a gene to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter. |
|
662 |
+#' @param maxNormalizedMeanRNA Numeric[0,] or \code{NULL}. Default \code{NULL}. Maximum mean across all samples for a gene to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter. |
|
621 | 663 |
#' @param chrToKeep_peaks Character vector or \code{NULL}. Default \code{NULL}. Vector of chromosomes that peaks are allowed to come from. This filter can be used to filter sex chromosomes from the peaks, for example (e.g, \code{c(paste0("chr", 1:22), "chrX", "chrY")}) |
622 |
-#' @param minSize_peaks Integer or \code{NULL}. Default \code{NULL}. Minimum peak size (width, end - start) for a peak to be retained. Set to \code{NULL} for not applying the filter. |
|
623 |
-#' @param maxSize_peaks Integer or \code{NULL}. Default 10000. Maximum peak size (width, end - start) for a peak to be retained. Set to \code{NULL} for not applying the filter. |
|
624 |
-#' @param minCV_peaks Numeric or \code{NULL}. Default \code{NULL}. Minimum CV (coefficient of variation, a unitless measure of variation) for a peak to be retained. Set to \code{NULL} for not applying the filter. |
|
625 |
-#' @param maxCV_peaks Numeric or \code{NULL}. Default \code{NULL}. Maximum CV (coefficient of variation, a unitless measure of variation) for a peak to be retained. Set to \code{NULL} for not applying the filter. |
|
626 |
-#' @param minCV_genes Numeric or \code{NULL}. Default \code{NULL}. Minimum CV (coefficient of variation, a unitless measure of variation) for a gene to be retained. Set to \code{NULL} for not applying the filter. |
|
627 |
-#' @param maxCV_genes Numeric or \code{NULL}. Default \code{NULL}. Maximum CV (coefficient of variation, a unitless measure of variation) for a gene to be retained. Set to \code{NULL} for not applying the filter. |
|
664 |
+#' @param minSize_peaks Integer[1,] or \code{NULL}. Default \code{NULL}. Minimum peak size (width, end - start) for a peak to be retained. Set to \code{NULL} for not applying the filter. |
|
665 |
+#' @param maxSize_peaks Integer[1,] or \code{NULL}. Default 10000. Maximum peak size (width, end - start) for a peak to be retained. Set to \code{NULL} for not applying the filter. |
|
666 |
+#' @param minCV_peaks Numeric[0,] or \code{NULL}. Default \code{NULL}. Minimum CV (coefficient of variation, a unitless measure of variation) for a peak to be retained. Set to \code{NULL} for not applying the filter. |
|
667 |
+#' @param maxCV_peaks Numeric[0,] or \code{NULL}. Default \code{NULL}. Maximum CV (coefficient of variation, a unitless measure of variation) for a peak to be retained. Set to \code{NULL} for not applying the filter. |
|
668 |
+#' @param minCV_genes Numeric[0,] or \code{NULL}. Default \code{NULL}. Minimum CV (coefficient of variation, a unitless measure of variation) for a gene to be retained. Set to \code{NULL} for not applying the filter. |
|
669 |
+#' @param maxCV_genes Numeric[0,] or \code{NULL}. Default \code{NULL}. Maximum CV (coefficient of variation, a unitless measure of variation) for a gene to be retained. Set to \code{NULL} for not applying the filter. |
|
628 | 670 |
#' @template forceRerun |
629 |
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function. |
|
671 |
+#' @return An updated \code{\linkS4class{GRN}} object, with added data from this function. |
|
630 | 672 |
#' @examples |
631 | 673 |
#' # See the Workflow vignette on the GRaNIE website for examples |
632 | 674 |
#' GRN = loadExampleObject() |
... | ... |
@@ -640,9 +682,12 @@ filterData <- function (GRN, |
640 | 682 |
minCV_peaks = NULL, maxCV_peaks = NULL, |
641 | 683 |
minCV_genes = NULL, maxCV_genes = NULL, |
642 | 684 |
forceRerun = FALSE) { |
643 |
- GRN = .addFunctionLogToObject(GRN) |
|
644 | 685 |
|
686 |
+ start = Sys.time() |
|
687 |
+ |
|
645 | 688 |
checkmate::assertClass(GRN, "GRN") |
689 |
+ GRN = .addFunctionLogToObject(GRN) |
|
690 |
+ |
|
646 | 691 |
checkmate::assertNumber(minNormalizedMean_peaks, lower = 0, null.ok = TRUE) |
647 | 692 |
checkmate::assertNumber(minNormalizedMeanRNA, lower = 0, null.ok = TRUE) |
648 | 693 |
checkmate::assertNumber(maxNormalizedMean_peaks, lower = minNormalizedMean_peaks , null.ok = TRUE) |
... | ... |
@@ -717,6 +762,8 @@ filterData <- function (GRN, |
717 | 762 |
GRN@config$isFiltered = TRUE |
718 | 763 |
} |
719 | 764 |
|
765 |
+ .printExecutionTime(start, prefix = "") |
|
766 |
+ |
|
720 | 767 |
GRN |
721 | 768 |
} |
722 | 769 |
|
... | ... |
@@ -887,17 +934,18 @@ filterData <- function (GRN, |
887 | 934 |
#' @param TFs Character vector. Default \code{all}. Vector of TF names to include. The special keyword \code{all} can be used to include all TF found in the folder as specified by \code{motifFolder}. If \code{all} is specified anywhere, all TFs will be included. TF names must otherwise match the file names that are found in the folder, without the file suffix. |
888 | 935 |
#' @param filesTFBSPattern Character. Default \code{"_TFBS"}. Suffix for the file names in the TFBS folder that is not part of the TF name. Can be empty. For example, for the TF CTCF, if the file is called \code{CTCF.all.TFBS.bed}, set this parameter to \code{".all.TFBS"}. |
889 | 936 |
#' @param fileEnding Character. Default \code{".bed"}. File ending for the files from the motif folder. |
890 |
-#' @param nTFMax \code{NULL} or integer. Default \code{NULL}. Maximal number of TFs to import. Can be used for testing purposes, e.g., setting to 5 only imports 5 TFs even though the whole \code{motifFolder} has many more TFs defined. |
|
937 |
+#' @param nTFMax \code{NULL} or Integer[1,]. Default \code{NULL}. Maximal number of TFs to import. Can be used for testing purposes, e.g., setting to 5 only imports 5 TFs even though the whole \code{motifFolder} has many more TFs defined. |
|
891 | 938 |
#' @template forceRerun |
892 |
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function. |
|
939 |
+#' @return An updated \code{\linkS4class{GRN}} object, with additional information added from this function (\code{GRN@data$TFs$translationTable} in particular) |
|
893 | 940 |
#' @examples |
894 | 941 |
#' # See the Workflow vignette on the GRaNIE website for examples |
895 | 942 |
#' @export |
896 | 943 |
addTFBS <- function(GRN, motifFolder, TFs = "all", nTFMax = NULL, filesTFBSPattern = "_TFBS", fileEnding = ".bed", forceRerun = FALSE) { |
897 | 944 |
|
898 |
- GRN = .addFunctionLogToObject(GRN) |
|
899 |
- |
|
945 |
+ start = Sys.time() |
|
900 | 946 |
checkmate::assertClass(GRN, "GRN") |
947 |
+ GRN = .addFunctionLogToObject(GRN) |
|
948 |
+ |
|
901 | 949 |
checkmate::assertDirectoryExists(motifFolder) |
902 | 950 |
checkmate::assertCharacter(TFs, min.len = 1) |
903 | 951 |
checkmate::assert(checkmate::testNull(nTFMax), checkmate::testIntegerish(nTFMax, lower = 1)) |
... | ... |
@@ -927,6 +975,8 @@ addTFBS <- function(GRN, motifFolder, TFs = "all", nTFMax = NULL, filesTFBSPatte |
927 | 975 |
|
928 | 976 |
} |
929 | 977 |
|
978 |
+ .printExecutionTime(start, prefix = "") |
|
979 |
+ |
|
930 | 980 |
GRN |
931 | 981 |
|
932 | 982 |
} |
... | ... |
@@ -1006,7 +1056,7 @@ addTFBS <- function(GRN, motifFolder, TFs = "all", nTFMax = NULL, filesTFBSPatte |
1006 | 1056 |
#' @template GRN |
1007 | 1057 |
#' @template nCores |
1008 | 1058 |
#' @template forceRerun |
1009 |
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function. |
|
1059 |
+#' @return An updated \code{\linkS4class{GRN}} object, with added data from this function (\code{GRN@data$TFs$TF_peak_overlap} in particular) |
|
1010 | 1060 |
#' @examples |
1011 | 1061 |
#' # See the Workflow vignette on the GRaNIE website for examples |
1012 | 1062 |
#' GRN = loadExampleObject() |
... | ... |
@@ -1014,9 +1064,11 @@ addTFBS <- function(GRN, motifFolder, TFs = "all", nTFMax = NULL, filesTFBSPatte |
1014 | 1064 |
#' @export |
1015 | 1065 |
overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) { |
1016 | 1066 |
|
1067 |
+ start = Sys.time() |
|
1068 |
+ |
|
1069 |
+ checkmate::assertClass(GRN, "GRN") |
|
1017 | 1070 |
GRN = .addFunctionLogToObject(GRN) |
1018 | 1071 |
|
1019 |
- checkmate::assertClass(GRN, "GRN") |
|
1020 | 1072 |
checkmate::assertIntegerish(nCores, lower = 1) |
1021 | 1073 |
checkmate::assertFlag(forceRerun) |
1022 | 1074 |
|
... | ... |
@@ -1092,6 +1144,8 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) { |
1092 | 1144 |
|
1093 | 1145 |
} |
1094 | 1146 |
|
1147 |
+ .printExecutionTime(start, prefix = "") |
|
1148 |
+ |
|
1095 | 1149 |
GRN |
1096 | 1150 |
} |
1097 | 1151 |
|
... | ... |
@@ -1140,7 +1194,7 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) { |
1140 | 1194 |
dplyr::group_by(peakID) %>% |
1141 | 1195 |
dplyr::slice(which.min(distance)) %>% |
1142 | 1196 |
#arrange(distance, .by_group = TRUE) %>% |
1143 |
- # top_n(n = 2, desc(distance)) %>% |
|
1197 |
+ # top_n(n = 2, dplyr::desc(distance)) %>% |
|
1144 | 1198 |
dplyr::ungroup() |
1145 | 1199 |
|
1146 | 1200 |
futile.logger::flog.info(paste0(" Calculating intersection for TF ", TFCur, " finished. Number of overlapping TFBS after filtering: ", nrow(final.df))) |
... | ... |
@@ -1226,7 +1280,7 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) { |
1226 | 1280 |
# Reorder to make sure the order is the same. Due to the duplication ID issue, the number of columns may increase after the column selection |
1227 | 1281 |
|
1228 | 1282 |
# Some columns may be removed here due to zero standard deviation |
1229 |
- HOCOMOCO_mapping.exp.filt = HOCOMOCO_mapping.exp %>% dplyr::filter(ENSEMBL %in% colnames(sort.cor.m)) |
|
1283 |
+ HOCOMOCO_mapping.exp.filt = HOCOMOCO_mapping.exp %>% dplyr::filter(.data$ENSEMBL %in% colnames(sort.cor.m)) |
|
1230 | 1284 |
|
1231 | 1285 |
sort.cor.m = sort.cor.m[,as.character(HOCOMOCO_mapping.exp.filt$ENSEMBL)] |
1232 | 1286 |
colnames(sort.cor.m) = as.character(HOCOMOCO_mapping.exp.filt$HOCOID) |
... | ... |
@@ -1239,8 +1293,8 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) { |
1239 | 1293 |
|
1240 | 1294 |
peak_TF_overlapCur.df = .asMatrixFromSparse(GRN@data$TFs$TF_peak_overlap, convertZero_to_NA = FALSE) %>% |
1241 | 1295 |
tibble::as_tibble() %>% |
1242 |
- dplyr::filter(!isFiltered) %>% |
|
1243 |
- dplyr::select(-isFiltered) |
|
1296 |
+ dplyr::filter(!.data$isFiltered) %>% |
|
1297 |
+ dplyr::select(-.data$isFiltered) |
|
1244 | 1298 |
|
1245 | 1299 |
if (perm > 0 & shuffle) { |
1246 | 1300 |
peak_TF_overlapCur.df = .shuffleRowsPerColumn(peak_TF_overlapCur.df) |
... | ... |
@@ -1257,18 +1311,22 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) { |
1257 | 1311 |
|
1258 | 1312 |
#' Add TF activity data to GRN object using a simplified procedure for estimating it. EXPERIMENTAL. |
1259 | 1313 |
#' |
1314 |
+#' We do not yet provide full support for this function. It is currently being tested. Use at our own risk. |
|
1315 |
+#' |
|
1260 | 1316 |
#' @template GRN |
1261 | 1317 |
#' @template normalization_TFActivity |
1262 | 1318 |
#' @template name_TFActivity |
1263 | 1319 |
#' @template forceRerun |
1264 |
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function. |
|
1320 |
+#' @return An updated \code{\linkS4class{GRN}} object, with added data from this function |
|
1321 |
+#' (\code{GRN@data$TFs[[name]]} in particular, with \code{name} referring to the value of tje \code{name} parameter) |
|
1265 | 1322 |
|
1266 | 1323 |
addData_TFActivity <- function(GRN, normalization = "cyclicLoess", name = "TF_activity", forceRerun = FALSE) { |
1267 | 1324 |
|
1325 |
+ checkmate::assertClass(GRN, "GRN") |
|
1268 | 1326 |
GRN = .addFunctionLogToObject(GRN) |
1269 | 1327 |
start = Sys.time() |
1270 | 1328 |
|
1271 |
- checkmate::assertClass(GRN, "GRN") |
|
1329 |
+ |
|
1272 | 1330 |
checkmate::assertChoice(normalization, c("cyclicLoess", "sizeFactors", "quantile", "none")) |
1273 | 1331 |
checkmate::assertCharacter(name, min.chars = 1, len = 1) |
1274 | 1332 |
checkmate::assertFlag(forceRerun) |
... | ... |
@@ -1365,6 +1423,11 @@ addData_TFActivity <- function(GRN, normalization = "cyclicLoess", name = "TF_ac |
1365 | 1423 |
if (normalization == "cyclicLoess") { |
1366 | 1424 |
|
1367 | 1425 |
futile.logger::flog.info(paste0(" Normalizing data using cyclic LOESS")) |
1426 |
+ |
|
1427 |
+ |
|
1428 |
+ packageMessage = paste0("The package csaw is not installed but required for the cyclic LOESS normalization. Please install it and re-run this function or change the normalization type (if possible).") |
|
1429 |
+ .checkPackageInstallation("csaw", packageMessage) |
|
1430 |
+ |
|
1368 | 1431 |
# Perform a cyclic loess normalization |
1369 | 1432 |
# We use a slighlty more complicated setup to derive size factors for library normalization |
1370 | 1433 |
# Instead of just determining the size factors in DeSeq2 via cirtual samples, we use |
... | ... |
@@ -1413,6 +1476,8 @@ addData_TFActivity <- function(GRN, normalization = "cyclicLoess", name = "TF_ac |
1413 | 1476 |
|
1414 | 1477 |
#' Import externally derived TF Activity data. EXPERIMENTAL. |
1415 | 1478 |
#' |
1479 |
+#' We do not yet provide full support for this function. It is currently being tested. Use at our own risk. |
|
1480 |
+#' |
|
1416 | 1481 |
#' @template GRN |
1417 | 1482 |
#' @param data Data frame. No default. Data with TF data. |
1418 | 1483 |
#' @template name_TFActivity |
... | ... |
@@ -1420,18 +1485,19 @@ addData_TFActivity <- function(GRN, normalization = "cyclicLoess", name = "TF_ac |
1420 | 1485 |
#' @param nameColumn Character. Default \code{TF.name}. Must be unique for each TF / row. |
1421 | 1486 |
#' @template normalization_TFActivity |
1422 | 1487 |
#' @template forceRerun |
1423 |
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function. |
|
1488 |
+#' @return An updated \code{\linkS4class{GRN}} object, with added data from this function. |
|
1424 | 1489 |
importTFData <- function(GRN, data, name, idColumn = "ENSEMBL", nameColumn = "TF.name", normalization = "none", forceRerun = FALSE) { |
1425 | 1490 |
|
1491 |
+ checkmate::assertClass(GRN, "GRN") |
|
1426 | 1492 |
GRN = .addFunctionLogToObject(GRN) |
1427 | 1493 |
|
1428 | 1494 |
start = Sys.time() |
1429 | 1495 |
|
1430 |
- checkmate::assertClass(GRN, "GRN") |
|
1496 |
+ |
|
1431 | 1497 |
checkmate::assertDataFrame(data, min.cols = 2, min.rows = 1) |
1432 |
- checkmate::assertSubset(idColumn, colnames(data)) |
|
1433 |
- checkmate::assertSubset(nameColumn, colnames(data)) |
|
1434 |
- checkmate::assertSubset(GRN@config$sharedSamples, colnames(data)) |
|
1498 |
+ checkmate::assertChoice(idColumn, colnames(data)) |
|
1499 |
+ checkmate::assertChoice(nameColumn, colnames(data)) |
|
1500 |
+ checkmate::assertSubset(GRN@config$sharedSamples, colnames(data), empty.ok = FALSE) |
|
1435 | 1501 |
checkmate::assertCharacter(name, min.chars = 1, any.missing = FALSE, len = 1) |
1436 | 1502 |
checkmate::assertChoice(normalization, c("cyclicLoess", "sizeFactors", "quantile", "none")) |
1437 | 1503 |
|
... | ... |
@@ -1482,7 +1548,7 @@ importTFData <- function(GRN, data, name, idColumn = "ENSEMBL", nameColumn = "TF |
1482 | 1548 |
countsNorm$ENSEMBL = data$ENSEMBL |
1483 | 1549 |
|
1484 | 1550 |
nRowBefore = nrow(countsNorm) |
1485 |
- countsNorm.subset = dplyr::filter(countsNorm, ENSEMBL %in% GRN@data$TFs$translationTable$ENSEMBL) |
|
1551 |
+ countsNorm.subset = dplyr::filter(countsNorm, .data$ENSEMBL %in% GRN@data$TFs$translationTable$ENSEMBL) |
|
1486 | 1552 |
nRowAfter = nrow(countsNorm.subset) |
1487 | 1553 |
if (nRowAfter == 0) { |
1488 | 1554 |
message = "No rows overlapping with translation table, check ENSEMBL IDs." |
... | ... |
@@ -1535,14 +1601,14 @@ importTFData <- function(GRN, data, name, idColumn = "ENSEMBL", nameColumn = "TF |
1535 | 1601 |
#' Run the activator-repressor classification for the TFs for a \code{\linkS4class{GRN}} object |
1536 | 1602 |
#' |
1537 | 1603 |
#' @template GRN |
1538 |
-#' @param significanceThreshold_Wilcoxon Numeric between 0 and 1. Default 0.05. Significance threshold for Wilcoxon test that is run in the end for the final classification. See the Vignette and *diffTF* paper for details. |
|
1539 |
-#' @param plot_minNoTFBS_heatmap Integer. Default 100. Minimum number of TFBS for a TF to be included in the heatmap that is part of the output of this function. |
|
1604 |
+#' @param significanceThreshold_Wilcoxon Numeric[0,1]. Default 0.05. Significance threshold for Wilcoxon test that is run in the end for the final classification. See the Vignette and *diffTF* paper for details. |
|
1605 |
+#' @param plot_minNoTFBS_heatmap Integer[1,]. Default 100. Minimum number of TFBS for a TF to be included in the heatmap that is part of the output of this function. |
|
1540 | 1606 |
#' @param deleteIntermediateData \code{TRUE} or \code{FALSE}. Default \code{TRUE}. Should intermediate data be deleted before returning the object after a successful run? Due to the size of the produced intermediate data, we recommend setting this to \code{TRUE}, but if memory or object size are not an issue, the information can also be kept. |
1541 | 1607 |
#' @template plotDiagnosticPlots |
1542 | 1608 |
#' @template outputFolder |
1543 | 1609 |
#' @template corMethod |
1544 | 1610 |
#' @template forceRerun |
1545 |
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function. |
|
1611 |
+#' @return An updated \code{\linkS4class{GRN}} object, with additional information added from this function. |
|
1546 | 1612 |
#' @examples |
1547 | 1613 |
#' # See the Workflow vignette on the GRaNIE website for examples |
1548 | 1614 |
#' # GRN = loadExampleObject() |
... | ... |
@@ -1554,9 +1620,12 @@ AR_classification_wrapper<- function (GRN, significanceThreshold_Wilcoxon = 0.05 |
1554 | 1620 |
corMethod = "pearson", |
1555 | 1621 |
forceRerun = FALSE) { |
1556 | 1622 |
|
1623 |
+ start = Sys.time() |
|
1624 |
+ |
|
1625 |
+ checkmate::assertClass(GRN, "GRN") |
|
1557 | 1626 |
GRN = .addFunctionLogToObject(GRN) |
1558 | 1627 |
|
1559 |
- checkmate::assertClass(GRN, "GRN") |
|
1628 |
+ |
|
1560 | 1629 |
checkmate::assertNumber(significanceThreshold_Wilcoxon, lower = 0, upper = 1) |
1561 | 1630 |
checkmate::assertNumber(plot_minNoTFBS_heatmap, lower = 1) |
1562 | 1631 |
checkmate::assertFlag(deleteIntermediateData) |
... | ... |
@@ -1730,6 +1799,7 @@ AR_classification_wrapper<- function (GRN, significanceThreshold_Wilcoxon = 0.05 |
1730 | 1799 |
|
1731 | 1800 |
} # end of for each connection type |
1732 | 1801 |
|
1802 |
+ .printExecutionTime(start, prefix = "") |
|
1733 | 1803 |
|
1734 | 1804 |
GRN |
1735 | 1805 |
|
... | ... |
@@ -1753,6 +1823,8 @@ AR_classification_wrapper<- function (GRN, significanceThreshold_Wilcoxon = 0.05 |
1753 | 1823 |
|
1754 | 1824 |
#' Add TF-peak connections to a \code{\linkS4class{GRN}} object |
1755 | 1825 |
#' |
1826 |
+#' After the execution of this function, QC plots can be plotted with the function \code{\link{plotDiagnosticPlots_TFPeaks}} unless this has already been done by default due to \code{plotDiagnosticPlots = TRUE} |
|
1827 |
+#' |
|
1756 | 1828 |
#' @template GRN |
1757 | 1829 |
#' @template plotDiagnosticPlots |
1758 | 1830 |
#' @template plotDetails |
... | ... |
@@ -1760,13 +1832,14 @@ AR_classification_wrapper<- function (GRN, significanceThreshold_Wilcoxon = 0.05 |
1760 | 1832 |
#' @template corMethod |
1761 | 1833 |
#' @param connectionTypes Character vector. Default \code{expression}. Vector of connection types to include for the TF-peak connections. If an additional connection type is specified here, it has to be available already within the object (EXPERIMENTAL). See the function \code{\link{addData_TFActivity}} for details. |
1762 | 1834 |
#' @param removeNegativeCorrelation Vector of \code{TRUE} or \code{FALSE}. Default \code{FALSE}. EXPERIMENTAL. Must be a logical vector of the same length as the parameter \code{connectionType}. Should negatively correlated TF-peak connections be removed for the specific connection type? For connection type expression, the default is \code{FALSE}, while for any TF Activity related connection type, we recommend setting this to \code{TRUE}. |
1763 |
-#' @param maxFDRToStore Numeric. Default 0.3. Maximum TF-peak FDR value to permanently store a particular TF-peak connection in the object? This parameter has a large influence on the overall memory size of the object, and we recommend not storing connections with a high FDR due to their sheer number. |
|
1835 |
+#' @param maxFDRToStore Numeric[0,1]. Default 0.3. Maximum TF-peak FDR value to permanently store a particular TF-peak connection in the object? This parameter has a large influence on the overall memory size of the object, and we recommend not storing connections with a high FDR due to their sheer number. |
|
1764 | 1836 |
#' @param addForPermuted \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Add connections also for permuted data. Leave at \code{TRUE} unless you know what you are doing. |
1765 | 1837 |
#' @param useGCCorrection \code{TRUE} or \code{FALSE}. Default \code{FALSE}. EXPERIMENTAL. Should a GC-matched background be used when calculating FDRs? |
1766 |
-#' @param percBackground_size Numeric (0 to 100). Default 75. EXPERIMENTAL. Description will follow. Only relevant if \code{useGCCorrection} is set to \code{TRUE}, ignored otherwise. |
|
1838 |
+#' @param percBackground_size Numeric[0,100]. Default 75. EXPERIMENTAL. Description will follow. Only relevant if \code{useGCCorrection} is set to \code{TRUE}, ignored otherwise. |
|
1767 | 1839 |
#' @param percBackground_resample \code{TRUE} or \code{FALSE}. Default \code{TRUE}. EXPERIMENTAL. Should resampling be enabled for those GC bins for which not enough background peaks are available?. Only relevant if \code{useGCCorrection} is set to \code{TRUE}, ignored otherwise. |
1768 | 1840 |
#' @template forceRerun |
1769 |
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function. |
|
1841 |
+#' @seealso \code{\link{plotDiagnosticPlots_TFPeaks}} |
|
1842 |
+#' @return An updated \code{\linkS4class{GRN}} object, with additional information added from this function. |
|
1770 | 1843 |
#' @examples |
1771 | 1844 |
#' # See the Workflow vignette on the GRaNIE website for examples |
1772 | 1845 |
#' GRN = loadExampleObject() |
... | ... |
@@ -1781,16 +1854,18 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
1781 | 1854 |
useGCCorrection = FALSE, percBackground_size = 75, percBackground_resample = TRUE, |
1782 | 1855 |
forceRerun = FALSE) { |
1783 | 1856 |
|
1784 |
- GRN = .addFunctionLogToObject(GRN) |
|
1785 |
- |
|
1857 |
+ start = Sys.time() |
|
1858 |
+ |
|
1786 | 1859 |
checkmate::assertClass(GRN, "GRN") |
1860 |
+ GRN = .addFunctionLogToObject(GRN) |
|
1861 |
+ |
|
1787 | 1862 |
checkmate::assertFlag(plotDiagnosticPlots) |
1788 | 1863 |
checkmate::assertFlag(plotDetails) |
1789 | 1864 |
checkmate::assertChoice(corMethod, c("pearson", "spearman")) |
1790 | 1865 |
checkmate::assertFlag(addForPermuted) |
1791 | 1866 |
|
1792 | 1867 |
GRN = .checkAndUpdateConnectionTypes(GRN) # For compatibility with older versions |
1793 |
- checkmate::assertSubset(connectionTypes, GRN@config$TF_peak_connectionTypes) |
|
1868 |
+ checkmate::assertSubset(connectionTypes, GRN@config$TF_peak_connectionTypes, empty.ok = FALSE) |
|
1794 | 1869 |
|
1795 | 1870 |
checkmate::assertLogical(removeNegativeCorrelation, any.missing = FALSE, len = length(connectionTypes)) |
1796 | 1871 |
|
... | ... |
@@ -1852,6 +1927,8 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
1852 | 1927 |
|
1853 | 1928 |
} |
1854 | 1929 |
|
1930 |
+ .printExecutionTime(start, prefix = "") |
|
1931 |
+ |
|
1855 | 1932 |
GRN |
1856 | 1933 |
} |
1857 | 1934 |
|
... | ... |
@@ -1928,7 +2005,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
1928 | 2005 |
|
1929 | 2006 |
pb <- progress::progress_bar$new(total = length(allTF)) |
1930 | 2007 |
|
1931 |
- peaksFiltered = GRN@data$peaks$consensusPeaks %>% dplyr::filter(!isFiltered) |
|
2008 |
+ peaksFiltered = GRN@data$peaks$consensusPeaks %>% dplyr::filter(!.data$isFiltered) |
|
1932 | 2009 |
|
1933 | 2010 |
if (!useGCCorrection) { |
1934 | 2011 |
minPerc = 100 |
... | ... |
@@ -2036,7 +2113,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
2036 | 2113 |
peakIDsSel = c() |
2037 | 2114 |
for (i in seq_len(nrow(GC_classes_foreground.df))) { |
2038 | 2115 |
|
2039 |
- peaksBackgroundGCCur = peaksBackground %>% dplyr::filter(GC_class == GC_classes_foreground.df$GC_class[i]) |
|
2116 |
+ peaksBackgroundGCCur = peaksBackground %>% dplyr::filter(.data$GC_class == GC_classes_foreground.df$GC_class[i]) |
|
2040 | 2117 |
|
2041 | 2118 |
if (nrow( peaksBackgroundGCCur) == 0) { |
2042 | 2119 |
next |
... | ... |
@@ -2180,7 +2257,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
2180 | 2257 |
if (connectionTypeCur %in% connectionTypes_removeNegCor ) { |
2181 | 2258 |
|
2182 | 2259 |
# futile.logger::flog.info(paste0(" Remove negatively correlated TF-peak pairs for connection type ", connectionTypeCur)) |
2183 |
- cor.peak.tf = dplyr::filter(cor.peak.tf, TF_peak.r >= 0) |
|
2260 |
+ cor.peak.tf = dplyr::filter(cor.peak.tf, .data$TF_peak.r >= 0) |
|
2184 | 2261 |
|
2185 | 2262 |
} |
2186 | 2263 |
|
... | ... |
@@ -2218,10 +2295,10 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
2218 | 2295 |
# DISCARD other rows altogether |
2219 | 2296 |
# Left join here is what we want, as we need this df only for "real" data |
2220 | 2297 |
tblFilt.df = dplyr::left_join(cor.peak.tf, fdr.curve, by = "TF_peak.r_bin") %>% |
2221 |
- dplyr::filter(TF_peak.fdr <= maxFDRToStore) %>% |
|
2222 |
- dplyr::select(TF.name, TF_peak.r_bin, TF_peak.r, TF_peak.fdr, |
|
2223 |
- TF_peak.fdr_orig, peak.ID, TF_peak.fdr_direction, |
|
2224 |
- TF_peak.connectionType, tidyselect::contains("value")) |
|
2298 |
+ dplyr::filter(.data$TF_peak.fdr <= maxFDRToStore) %>% |
|
2299 |
+ dplyr::select(.data$TF.name, .data$TF_peak.r_bin, .data$TF_peak.r, .data$TF_peak.fdr, |
|
2300 |
+ .data$TF_peak.fdr_orig, .data$peak.ID, .data$TF_peak.fdr_direction, |
|
2301 |
+ .data$TF_peak.connectionType, tidyselect::contains("value")) |
|
2225 | 2302 |
|
2226 | 2303 |
|
2227 | 2304 |
if (!plotDetails) { |
... | ... |
@@ -2277,7 +2354,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
2277 | 2354 |
requiredNoPeaks = round(n_rel * targetNoPeaks, 0) |
2278 | 2355 |
# Check in background |
2279 | 2356 |
availableNoPeaks = GC_classes_background.df %>% |
2280 |
- dplyr::filter(GC_class == GC_class.cur) %>% |
|
2357 |
+ dplyr::filter(.data$GC_class == GC_class.cur) %>% |
|
2281 | 2358 |
dplyr::pull(.data$n) |
2282 | 2359 |
#futile.logger::flog.info(paste0(" GC.class ", GC.class.cur, ": Required: ", requiredNoPeaks, ", available: ", availableNoPeaks)) |
2283 | 2360 |
if ( availableNoPeaks < requiredNoPeaks) { |
... | ... |
@@ -2309,6 +2386,8 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
2309 | 2386 |
|
2310 | 2387 |
#' Add peak-gene connections to a \code{\linkS4class{GRN}} object |
2311 | 2388 |
#' |
2389 |
+#' After the execution of this function, QC plots can be plotted with the function \code{\link{plotDiagnosticPlots_peakGene}} unless this has already been done by default due to \code{plotDiagnosticPlots = TRUE} |
|
2390 |
+#' |
|
2312 | 2391 |
#' @export |
2313 | 2392 |
#' @template GRN |
2314 | 2393 |
#' @param overlapTypeGene Character. \code{"TSS"} or \code{"full"}. Default \code{"TSS"}. If set to \code{"TSS"}, only the TSS of the gene is used as reference for finding genes in the neighborhood of a peak. If set to \code{"full"}, the whole annotated gene (including all exons and introns) is used instead. |
... | ... |
@@ -2317,11 +2396,12 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
2317 | 2396 |
#' @param TADs Data frame with TAD domains. Default \code{NULL}. If provided, the neighborhood of a peak is defined by the TAD domain the peak is in rather than a fixed-sized neighborhood. The expected format is a BED-like data frame with at least 3 columns in this particular order: chromosome, start, end, the 4th column is optional and will be taken as ID column. All additional columns as well as column names are ignored. For the first 3 columns, the type is checked as part of a data integrity check. |
2318 | 2397 |
#' @template nCores |
2319 | 2398 |
#' @template plotDiagnosticPlots |
2320 |
-#' @param plotGeneTypes List of character vectors. Default \code{list(c("all"), c("protein_coding"), c("protein_coding", "lincRNA"))}. Each list element may consist of one or multiple gene types that are plotted collectively in one PDF. The special keyword \code{"all"} denotes all gene types that are found (be aware: this typically contains 20+ gene types, see \url{https://www.gencodegenes.org/pages/biotypes.html} for details). |
|
2399 |
+#' @param plotGeneTypes List of character vectors. Default \code{list(c("all"), c("protein_coding"))}. Each list element may consist of one or multiple gene types that are plotted collectively in one PDF. The special keyword \code{"all"} denotes all gene types that are found (be aware: this typically contains 20+ gene types, see \url{https://www.gencodegenes.org/pages/biotypes.html} for details). |
|
2321 | 2400 |
#' @template outputFolder |
2322 | 2401 |
#' @template addRobustRegression |
2323 | 2402 |
#' @template forceRerun |
2324 |
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function in different flavors. |
|
2403 |
+#' @seealso \code{\link{plotDiagnosticPlots_peakGene}} |
|
2404 |
+#' @return An updated \code{\linkS4class{GRN}} object, with additional information added from this function. |
|
2325 | 2405 |
#' @examples |
2326 | 2406 |
#' # See the Workflow vignette on the GRaNIE website for examples |
2327 | 2407 |
#' GRN = loadExampleObject() |
... | ... |
@@ -2330,25 +2410,36 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2330 | 2410 |
promoterRange = 250000, TADs = NULL, |
2331 | 2411 |
nCores = 4, |
2332 | 2412 |
plotDiagnosticPlots = TRUE, |
2333 |
- plotGeneTypes = list(c("all"), c("protein_coding"), c("protein_coding", "lincRNA")), |
|
2413 |
+ plotGeneTypes = list(c("all"), c("protein_coding")), |
|
2334 | 2414 |
outputFolder = NULL, |
2335 | 2415 |
addRobustRegression = FALSE, |
2336 | 2416 |
forceRerun = FALSE) { |
2337 | 2417 |
|
2418 |
+ start = Sys.time() |
|
2419 |
+ |
|
2420 |
+ checkmate::assertClass(GRN, "GRN") |
|
2338 | 2421 |
GRN = .addFunctionLogToObject(GRN) |
2339 | 2422 |
|
2340 |
- checkmate::assertClass(GRN, "GRN") |
|
2423 |
+ |
|
2341 | 2424 |
checkmate::assertChoice(overlapTypeGene, c("TSS", "full")) |
2342 | 2425 |
checkmate::assertChoice(corMethod, c("pearson", "spearman")) |
2343 | 2426 |
checkmate::assertIntegerish(promoterRange, lower = 0) |
2344 | 2427 |
checkmate::assert(checkmate::testNull(TADs), checkmate::testDataFrame(TADs)) |
2345 | 2428 |
checkmate::assertIntegerish(nCores, lower = 1) |
2346 | 2429 |
checkmate::assertFlag(plotDiagnosticPlots) |
2347 |
- checkmate::assertList(plotGeneTypes, any.missing = FALSE, min.len = 1, types = "character") |
|
2430 |
+ for (elemCur in plotGeneTypes) { |
|
2431 |
+ checkmate::assertSubset(elemCur, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE) |
|
2432 |
+ } |
|
2433 |
+ |
|
2348 | 2434 |
checkmate::assert(checkmate::testNull(outputFolder), checkmate::testDirectoryExists(outputFolder)) |
2349 | 2435 |
checkmate::assertFlag(addRobustRegression) |
2350 | 2436 |
checkmate::assertFlag(forceRerun) |
2351 | 2437 |
|
2438 |
+ if (addRobustRegression) { |
|
2439 |
+ packageMessage = paste0("The package robust is not installed, but needed here due to addRobustRegression = TRUE. Please install it and re-run this function or change addRobustRegression to FALSE.") |
|
2440 |
+ .checkPackageInstallation("robust", packageMessage) |
|
2441 |
+ } |
|
2442 |
+ |
|
2352 | 2443 |
# As this is independent of the underlying GRN, it has to be done only once |
2353 | 2444 |
|
2354 | 2445 |
if (is.null(GRN@connections$peak_genes[["0"]]) | forceRerun) { |
... | ... |
@@ -2362,7 +2453,8 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2362 | 2453 |
|
2363 | 2454 |
# Check which gene types are available for the particular genome annotation |
2364 | 2455 |
# Use all of them to collect statistics. Filtering can be done later |
2365 |
- gene.types = unique(GRN@annotation$genes$gene.type) |
|
2456 |
+ # Just remove NA |
|
2457 |
+ gene.types = unique(GRN@annotation$genes$gene.type) %>% stats::na.omit() |
|
2366 | 2458 |
|
2367 | 2459 |
|
2368 | 2460 |
for (permutationCur in 0:.getMaxPermutation(GRN)) { |
... | ... |
@@ -2381,7 +2473,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2381 | 2473 |
.calculatePeakGeneCorrelations(GRN, permutationCur, |
2382 | 2474 |
TADs = TADs, |
2383 | 2475 |
neighborhoodSize = promoterRange, |
2384 |
- gene.types = names(gene.types), |
|
2476 |
+ gene.types = as.character(gene.types), |
|
2385 | 2477 |
corMethod = corMethod, |
2386 | 2478 |
randomizePeakGeneConnections = randomizePeakGeneConnections, |
2387 | 2479 |
overlapTypeGene, |
... | ... |
@@ -2400,10 +2492,10 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2400 | 2492 |
|
2401 | 2493 |
} |
2402 | 2494 |
|
2403 |
- GRN |
|
2404 |
- |
|
2405 |
- |
|
2495 |
+ .printExecutionTime(start, prefix = "") |
|
2406 | 2496 |
|
2497 |
+ GRN |
|
2498 |
+ |
|
2407 | 2499 |
} |
2408 | 2500 |
|
2409 | 2501 |
.calculatePeakGeneOverlaps <- function(GRN, allPeaks, peak_TAD_mapping = NULL, par.l, neighborhoodSize = 250000, genomeAssembly, |
... | ... |
@@ -2470,7 +2562,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2470 | 2562 |
subject = .getKnownGeneAnnotationNew(GRN, gene.types = gene.types, extendRegions = NULL) |
2471 | 2563 |
|
2472 | 2564 |
requiredColnames = c("gene.ENSEMBL","gene.type", "gene.name") |
2473 |
- checkmate::assertSubset(requiredColnames, colnames(subject)) |
|
2565 |
+ checkmate::assertSubset(requiredColnames, colnames(subject), empty.ok = FALSE) |
|
2474 | 2566 |
|
2475 | 2567 |
subject.gr = GenomicRanges::makeGRangesFromDataFrame(subject, keep.extra.columns = TRUE) |
2476 | 2568 |
|
... | ... |
@@ -2517,7 +2609,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2517 | 2609 |
TRUE ~ pmin(abs(orig_peak_start - gene.start), abs(orig_peak_end - gene.start)))) |
2518 | 2610 |
|
2519 | 2611 |
if (removeEnsemblNA) { |
2520 |
- overlaps.sub.df = dplyr::filter(overlaps.sub.df, !is.na(gene.ENSEMBL)) |
|
2612 |
+ overlaps.sub.df = dplyr::filter(overlaps.sub.df, !is.na(.data$gene.ENSEMBL)) |
|
2521 | 2613 |
} |
2522 | 2614 |
|
2523 | 2615 |
.printExecutionTime(start) |
... | ... |
@@ -2532,7 +2624,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2532 | 2624 |
TADs = NULL, |
2533 | 2625 |
mergeOverlappingTADs = FALSE, |
2534 | 2626 |
neighborhoodSize = 250000, |
2535 |
- gene.types = c("protein_coding", "lincRNA"), |
|
2627 |
+ gene.types = c("protein_coding"), |
|
2536 | 2628 |
overlapTypeGene = "TSS", |
2537 | 2629 |
corMethod = "pearson", |
2538 | 2630 |
randomizePeakGeneConnections = FALSE, |
... | ... |
@@ -2545,7 +2637,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2545 | 2637 |
|
2546 | 2638 |
genomeAssembly = GRN@config$parameters$genomeAssembly |
2547 | 2639 |
|
2548 |
- checkmate::assertSubset(gene.types, c("all", names(.getAllGeneTypesAndFrequencies(GRN@config$parameters$genomeAssembly, verbose = FALSE)))) |
|
2640 |
+ # checkmate::assertSubset(gene.types, c("all", names(.getAllGeneTypesAndFrequencies(GRN@config$parameters$genomeAssembly, verbose = FALSE))), empty.ok = FALSE) |
|
2549 | 2641 |
|
2550 | 2642 |
if (is.null(nCores)) { |
2551 | 2643 |
nCores = 1 |
... | ... |
@@ -2554,7 +2646,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2554 | 2646 |
|
2555 | 2647 |
#consensusPeaks = .createDF_fromCoordinates(getCounts(GRN, type = "peaks", norm = TRUE, 0)$peakID, "peakID") |
2556 | 2648 |
|
2557 |
- consensusPeaks = GRN@data$peaks$consensusPeaks %>% dplyr::filter(!isFiltered) |
|
2649 |
+ consensusPeaks = GRN@data$peaks$consensusPeaks %>% dplyr::filter(!.data$isFiltered) |
|
2558 | 2650 |
# Preprocess TAD boundaries |
2559 | 2651 |
if (!is.null(TADs)) { |
2560 | 2652 |
|
... | ... |
@@ -2786,6 +2878,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2786 | 2878 |
res.m[rowCur, "r"] = res$estimate |
2787 | 2879 |
|
2788 | 2880 |
if (addRobustRegression) { |
2881 |
+ |
|
2789 | 2882 |
lmRob.sum = tryCatch( { |
2790 | 2883 |
summary(robust::lmRob(data1 ~data2)) |
2791 | 2884 |
|
... | ... |
@@ -2838,18 +2931,29 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2838 | 2931 |
|
2839 | 2932 |
|
2840 | 2933 |
|
2841 |
-#' Filter the GRN and integrate peak-gene connections. |
|
2934 |
+#' Filter TF-peaks and peak-gene connections and combine them to TF-peak-gene connections to construct an eGRN. |
|
2842 | 2935 |
#' |
2843 |
-#' This is one of the main integrative functions of the \code{GRN} package. It has two main functions: Filtering the TF-peak and peak-gene connections that have been identified before, and combining the 3 major elements (TFs, peaks, genes) into one data frame, with one row per connection. Here, a connection can either be a TF-peak, peak-gene or TF-peak-gene link, depending on the parameters. |
|
2844 |
-#' Internally, first, the TF-peak are filtered before the peak-gene connections are added for reasons of memory and computational efficacy: It takes a lot of time and particularly space to connect the full GRN with all peak-gene connections - as most of the links have weak support (i.e., high FDR), first filtering out unwanted links dramatically reduces the memory needed for the combined GRN |
|
2936 |
+#' This is one of the main integrative functions of the \code{GRaNIE} package. It has two main functions: |
|
2937 |
+#' First, filtering both TF-peak and peak-gene connections according to different criteria such as FDR and other properties |
|
2938 |
+#' Second, joining the three major elements that an eGRN consist of (TFs, peaks, genes) into one data frame, with one row per unique TF-peak-gene connection. |
|
2939 |
+#' \strong{Note that the eGRN graph is reset upon successful execution of this function, and re-running the function \code{\link{build_eGRN_graph}} |
|
2940 |
+#' is necessary when any of the network functions of the package shall be executed.} |
|
2941 |
+ |
|
2942 |
+#' Internally, before joining them, both TF-peak links and peak-gene connections are filtered separately for reasons of memory and computational efficacy: |
|
2943 |
+#' First filtering out unwanted links dramatically reduces the memory needed for the full eGRN. Peak-gene p-value adjustment is only done after all filtering steps on the remaining set of |
|
2944 |
+#' connections to lower the statistical burden of multiple-testing adjustment; therefore, this may lead to initially counter-intuitive effects such as a particular connections not being included anymore as compared to a |
|
2945 |
+#' filtering based on different thresholds, or the FDR being different for the same reason. |
|
2845 | 2946 |
#' @template GRN |
2846 | 2947 |
#' @param TF_peak.fdr.threshold Numeric[0,1]. Default 0.2. Maximum FDR for the TF-peak links. Set to 1 or NULL to disable this filter. |
2847 | 2948 |
#' @template TF_peak.connectionTypes |
2848 | 2949 |
#' @param peak_gene.p_raw.threshold Numeric[0,1]. Default NULL. Threshold for the peak-gene connections, based on the raw p-value. All peak-gene connections with a larger raw p-value will be filtered out. |
2849 | 2950 |
#' @param peak_gene.fdr.threshold Numeric[0,1]. Default 0.2. Threshold for the peak-gene connections, based on the FDR. All peak-gene connections with a larger FDR will be filtered out. |
2850 |
-#' @param peak_gene.fdr.method Character. Default "BH". One of: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none", "IHW". Method for adjusting p-values for multiple testing. If set to "IHW", independent hypothesis weighting will be performed, and a suitable covariate has to be specified for the parameter \code{peak_gene.IHW.covariate}. |
|
2951 |
+#' @param peak_gene.fdr.method Character. Default "BH". One of: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none", "IHW". |
|
2952 |
+#' Method for adjusting p-values for multiple testing. |
|
2953 |
+#' If set to "IHW", the package \code{IHW} is required (as it is listed under \code{Suggests}, it may not be installed), |
|
2954 |
+#' and independent hypothesis weighting will be performed, and a suitable covariate has to be specified for the parameter \code{peak_gene.IHW.covariate}. |
|
2851 | 2955 |
#' @param peak_gene.IHW.covariate Character. Default NULL. Name of the covariate to use for IHW (column name from the table thatis returned with the function \code{getGRNConnections}. Only relevant if \code{peak_gene.fdr.method} is set to "IHW". You have to make sure the specified covariate is suitable or IHW, see the diagnostic plots that are generated in this function for this. For many datasets, the peak-gene distance (called \code{peak_gene.distance} in the object) seems suitable. |
2852 |
-#' @param peak_gene.IHW.nbins Integer or "auto". Default 5. Number of bins for IHW. Only relevant if \code{peak_gene.fdr.method} is set to "IHW". |
|
2956 |
+#' @param peak_gene.IHW.nbins Integer or "auto". Default "auto". Number of bins for IHW. Only relevant if \code{peak_gene.fdr.method} is set to "IHW". |
|
2853 | 2957 |
#' @template gene.types |
2854 | 2958 |
#' @param allowMissingTFs \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should connections be returned for which the TF is NA (i.e., connections consisting only of peak-gene links?). If set to \code{TRUE}, this generally greatly increases the number of connections but it may not be what you aim for. |
2855 | 2959 |
#' @param allowMissingGenes \code{TRUE} or \code{FALSE}. Default \code{TRUE}. Should connections be returned for which the gene is NA (i.e., connections consisting only of TF-peak links?). If set to \code{TRUE}, this generally increases the number of connections. |
... | ... |
@@ -2863,7 +2967,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2863 | 2967 |
#' @param silent \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Print progress messages and filter statistics. |
2864 | 2968 |
#' @param filterLoops \code{TRUE} or \code{FALSE}. Default \code{TRUE}. If a TF regulates itself (i.e., the TF and the gene are the same entity), should such loops be filtered from the GRN? |
2865 | 2969 |
#' @template outputFolder |
2866 |
-#' @return The same \code{\linkS4class{GRN}} object, with the filtered and merged TF-peak and peak-gene connections in the slot connections$all.filtered. |
|
2970 |
+#' @return An updated \code{\linkS4class{GRN}} object, with additional information added from this function. The filtered and merged TF-peak and peak-gene connections in the slot \code{GRN@connections$all.filtered}. |
|
2867 | 2971 |
#' @examples |
2868 | 2972 |
#' # See the Workflow vignette on the GRaNIE website for examples |
2869 | 2973 |
#' GRN = loadExampleObject() |
... | ... |
@@ -2871,6 +2975,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2871 | 2975 |
#' @seealso \code{\link{visualizeGRN}} |
2872 | 2976 |
#' @seealso \code{\link{addConnections_TF_peak}} |
2873 | 2977 |
#' @seealso \code{\link{addConnections_peak_gene}} |
2978 |
+#' @seealso \code{\link{build_eGRN_graph}} |
|
2874 | 2979 |
#' @importFrom rlang .data |
2875 | 2980 |
#' @importFrom magrittr `%>%` |
2876 | 2981 |
#' @export |
... | ... |
@@ -2881,8 +2986,8 @@ filterGRNAndConnectGenes <- function(GRN, |
2881 | 2986 |
peak_gene.fdr.threshold= 0.2, |
2882 | 2987 |
peak_gene.fdr.method = "BH", |
2883 | 2988 |
peak_gene.IHW.covariate = NULL, |
2884 |
- peak_gene.IHW.nbins = 5, |
|
2885 |
- gene.types = c("protein_coding", "lincRNA"), |
|
2989 |
+ peak_gene.IHW.nbins = "auto", |
|
2990 |
+ gene.types = c("protein_coding"), |
|
2886 | 2991 |
allowMissingTFs = FALSE, allowMissingGenes = TRUE, |
2887 | 2992 |
peak_gene.r_range = c(0,1), |
2888 | 2993 |
peak_gene.selection = "all", |
... | ... |
@@ -2893,28 +2998,36 @@ filterGRNAndConnectGenes <- function(GRN, |
2893 | 2998 |
outputFolder = NULL, |
2894 | 2999 |
silent = FALSE) { |
2895 | 3000 |
|
2896 |
- |
|
3001 |
+ start = Sys.time() |
|
3002 |
+ |
|
3003 |
+ checkmate::assertClass(GRN, "GRN") |
|
2897 | 3004 |
GRN = .addFunctionLogToObject(GRN) |
2898 | 3005 |
|
2899 |
- checkmate::assertClass(GRN, "GRN") |
|
3006 |
+ |
|
2900 | 3007 |
checkmate::assertCharacter(TF_peak.connectionTypes, min.len = 1, any.missing = FALSE) |
2901 | 3008 |
checkmate::assert(checkmate::checkNull(peak_gene.p_raw.threshold), checkmate::checkNumber(peak_gene.p_raw.threshold, lower = 0, upper = 1)) |
2902 | 3009 |
checkmate::assertNumeric(peak_gene.r_range, lower = -1, upper = 1, len = 2) |
2903 |
- checkmate::assertCharacter(gene.types, min.len = 1) |
|
3010 |
+ checkmate::assertSubset(gene.types, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE) |
|
2904 | 3011 |
checkmate::assertNumber(TF_peak.fdr.threshold, lower = 0, upper = 1) |
2905 | 3012 |
checkmate::assert(checkmate::checkNull(peak_gene.fdr.threshold), checkmate::checkNumber(peak_gene.fdr.threshold, lower = 0, upper = 1)) |
2906 | 3013 |
|
2907 |
- checkmate::assertSubset(peak_gene.fdr.method, c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none", "IHW")) |
|
3014 |
+ checkmate::assertChoice(peak_gene.fdr.method, c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none", "IHW")) |
|
2908 | 3015 |
checkmate::assert(checkmate::checkNull(peak_gene.IHW.covariate), checkmate::checkCharacter(peak_gene.IHW.covariate, min.chars = 1, len = 1)) |
3016 |
+ |
|
3017 |
+ checkmate::assert(checkIntegerish(peak_gene.IHW.nbins, lower = 1), checkSubset(peak_gene.IHW.nbins, "auto")) |
|
3018 |
+ |
|
2909 | 3019 |
checkmate::assertFlag(silent) |
2910 | 3020 |
checkmate::assertChoice(peak_gene.selection, c("all", "closest")) |
2911 | 3021 |
checkmate::assertFlag(allowMissingTFs) |
2912 | 3022 |
checkmate::assertFlag(allowMissingGenes) |
3023 |
+ |
|
3024 |
+ checkmate::assertFlag(TF_peak_FDR_selectViaCorBins) |
|
2913 | 3025 |
checkmate::assertFlag(filterLoops) |
2914 | 3026 |
|
2915 | 3027 |
if (peak_gene.fdr.method == "IHW" & !is.installed("IHW")) { |
2916 |
- message = "IHW has been selected for p-value adjustment, but IHW is currently not installed. Please install it and re-run the function or choose a different method." |
|
2917 |
- .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) |
|
3028 |
+ packageMessage = "IHW has been selected for p-value adjustment, but IHW is currently not installed. Please install it and re-run the function or choose a different method." |
|
3029 |
+ .checkPackageInstallation("IHW", packageMessage) |
|
3030 |
+ |
|
2918 | 3031 |
} |
2919 | 3032 |
|
2920 | 3033 |
if (!is.null(peak_gene.p_raw.threshold) & !is.null(peak_gene.fdr.threshold)) { |
... | ... |
@@ -2922,7 +3035,6 @@ filterGRNAndConnectGenes <- function(GRN, |
2922 | 3035 |
.checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) |
2923 | 3036 |
} |
2924 | 3037 |
|
2925 |
- start = Sys.time() |
|
2926 | 3038 |
if (silent) { |
2927 | 3039 |
futile.logger::flog.threshold(futile.logger::WARN) |
2928 | 3040 |
} |
... | ... |
@@ -2964,7 +3076,7 @@ filterGRNAndConnectGenes <- function(GRN, |
2964 | 3076 |
futile.logger::flog.info(paste0("Inital number of rows left before all filtering steps: ", nrow(grn.filt))) |
2965 | 3077 |
|
2966 | 3078 |
if (! "all" %in% TF_peak.connectionTypes) { |
2967 |
- checkmate::assertSubset(TF_peak.connectionTypes, .getAll_TF_peak_connectionTypes(GRN)) |
|
3079 |
+ checkmate::assertSubset(TF_peak.connectionTypes, .getAll_TF_peak_connectionTypes(GRN), empty.ok = FALSE) |
|
2968 | 3080 |
futile.logger::flog.info(paste0(" Filter network and retain only rows with one of the following TF-peak connection types: ", paste0(TF_peak.connectionTypes, collapse = ", "))) |
2969 | 3081 |
futile.logger::flog.info(paste0(" Number of TF-peak rows before filtering connection types: ", nrow(grn.filt))) |
2970 | 3082 |
grn.filt = dplyr::filter(grn.filt, .data$TF_peak.connectionType %in% TF_peak.connectionTypes) |
... | ... |
@@ -3169,7 +3281,7 @@ filterGRNAndConnectGenes <- function(GRN, |
3169 | 3281 |
|
3170 | 3282 |
if (peak_gene.fdr.method == "IHW") { |
3171 | 3283 |
|
3172 |
- # Identify those entries for which both p-value and covairate are not NA |
|
3284 |
+ # Identify those entries for which both p-value and covariate are not NA |
|
3173 | 3285 |
|
3174 | 3286 |
covariate_val = grn.filt %>% dplyr::pull(!!(peak_gene.IHW.covariate)) |
3175 | 3287 |
indexes = which(!is.na(grn.filt$peak_gene.p_raw) & !is.na(covariate_val)) |
... | ... |
@@ -3267,14 +3379,22 @@ filterGRNAndConnectGenes <- function(GRN, |
3267 | 3379 |
.checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE) |
3268 | 3380 |
} |
3269 | 3381 |
|
3270 |
- .printExecutionTime(start) |
|
3382 |
+ |
|
3271 | 3383 |
|
3272 | 3384 |
|
3273 | 3385 |
} #end for all permutations |
3274 | 3386 |
|
3387 |
+ if (!is.null(GRN@graph)) { |
|
3388 |
+ futile.logger::flog.info(paste0("Reset the graph slot in the object. Rerun the method build_eGRN_graph.")) |
|
3389 |
+ GRN@graph = list() |
|
3390 |
+ } |
|
3391 |
+ |
|
3392 |
+ |
|
3275 | 3393 |
|
3276 | 3394 |
if (silent) futile.logger::flog.threshold(futile.logger::INFO) |
3277 | 3395 |
|
3396 |
+ .printExecutionTime(start) |
|
3397 |
+ |
|
3278 | 3398 |
GRN |
3279 | 3399 |
|
3280 | 3400 |
} |
... | ... |
@@ -3294,7 +3414,7 @@ filterGRNAndConnectGenes <- function(GRN, |
3294 | 3414 |
checkmate::assert(checkmate::checkNumeric(covariates), checkmate::checkFactor(covariates)) |
3295 | 3415 |
stopifnot(length(pvalues) == length(covariates)) |
3296 | 3416 |
checkmate::assertNumber(alpha, lower = 0, upper = 1) |
3297 |
- checkmate::assertSubset(covariate_type, c("ordinal", "nominal")) |
|
3417 |
+ checkmate::assertChoice(covariate_type, c("ordinal", "nominal")) |
|
3298 | 3418 |
checkmate::assert(checkmate::checkInt(nbins, lower = 1), checkmate::checkSubset(nbins, c("auto"))) |
3299 | 3419 |
checkmate::assert(checkmate::checkNull(m_groups), checkmate::checkInteger(m_groups, len = length(covariates))) |
3300 | 3420 |
checkmate::assertLogical(quiet) |
... | ... |
@@ -3303,9 +3423,9 @@ filterGRNAndConnectGenes <- function(GRN, |
3303 | 3423 |
checkmate::assertInt(nsplits_internal, lower = 1) |
3304 | 3424 |
checkmate::assert(checkmate::checkNumeric(lambdas), checkmate::checkSubset(lambdas, c("auto"))) |
3305 | 3425 |
checkmate::assert(checkmate::checkNull(seed), checkmate::checkInteger(seed)) |
3306 |
- checkmate::assertSubset(distrib_estimator, c("grenander", "ECDF")) |
|
3307 |
- checkmate::assertSubset(lp_solver, c("lpsymphony", "gurobi")) |
|
3308 |
- checkmate::assertSubset(adjustment_type, c("BH", "bonferroni")) |
|
3426 |
+ checkmate::assertChoice(distrib_estimator, c("grenander", "ECDF")) |
|
3427 |
+ checkmate::assertChoice(lp_solver, c("lpsymphony", "gurobi")) |
|
3428 |
+ checkmate::assertChoice(adjustment_type, c("BH", "bonferroni")) |
|
3309 | 3429 |
checkmate::assertLogical(return_internal) |
3310 | 3430 |
checkmate::assertLogical(doDiagnostics) |
3311 | 3431 |
checkmate::assert(checkmate::checkNull(pdfFile), checkmate::checkDirectoryExists(dirname(pdfFile), access = "w")) |
... | ... |
@@ -3448,14 +3568,26 @@ filterGRNAndConnectGenes <- function(GRN, |
3448 | 3568 |
#' @template addRobustRegression |
3449 | 3569 |
#' @template nCores |
3450 | 3570 |
#' @template forceRerun |
3451 |
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function. |
|
3571 |
+#' @return An updated \code{\linkS4class{GRN}} object, with additional information added from this function. |
|
3452 | 3572 |
#' @examples |
3453 | 3573 |
#' # See the Workflow vignette on the GRaNIE website for examples |
3454 | 3574 |
#' GRN = loadExampleObject() |
3455 | 3575 |
#' GRN = add_TF_gene_correlation(GRN, forceRerun = FALSE) |
3456 | 3576 |
add_TF_gene_correlation <- function(GRN, corMethod = "pearson", addRobustRegression = FALSE, nCores = 1, forceRerun = FALSE) { |
3457 | 3577 |
|
3578 |
+ start = Sys.time() |
|
3579 |
+ |
|
3580 |
+ checkmate::assertClass(GRN, "GRN") |
|
3458 | 3581 |
GRN = .addFunctionLogToObject(GRN) |
3582 |
+ checkmate::assertChoice(corMethod, c("pearson", "spearman")) |
|
3583 |
+ assertFlag(addRobustRegression) |
|
3584 |
+ checkmate::assertIntegerish(nCores, lower = 1) |
|
3585 |
+ assertFlag(forceRerun) |
|
3586 |
+ |
|
3587 |
+ if (addRobustRegression) { |
|
3588 |
+ packageMessage = paste0("The package robust is not installed, but needed here due to addRobustRegression = TRUE. Please install it and re-run this function or change addRobustRegression to FALSE.") |
|
3589 |
+ .checkPackageInstallation("robust", packageMessage) |
|
3590 |
+ } |
|
3459 | 3591 |
|
3460 | 3592 |
if (is.null(GRN@connections$TF_genes.filtered) | forceRerun) { |
3461 | 3593 |
|
... | ... |
@@ -3562,6 +3694,8 @@ add_TF_gene_correlation <- function(GRN, corMethod = "pearson", addRobustRegress |
3562 | 3694 |
} # end for each permutation |
3563 | 3695 |
} |
3564 | 3696 |
|
3697 |
+ .printExecutionTime(start, prefix = "") |
|
3698 |
+ |
|
3565 | 3699 |
GRN |
3566 | 3700 |
} |
3567 | 3701 |
|
... | ... |
@@ -3580,8 +3714,8 @@ addSNPOverlap <- function(grn, SNPData, col_chr = "chr", col_pos = "pos", col_pe |
3580 | 3714 |
|
3581 | 3715 |
futile.logger::flog.info(paste0(" Checking validity of input")) |
3582 | 3716 |
checkmate::assertDataFrame(SNPData, min.rows = 1) |
3583 |
- checkmate::assertSubset(c(col_chr, col_pos), colnames(SNPData)) |
|
3584 |
- checkmate::assertSubset(c(col_peakID), colnames(grn)) |
|
3717 |
+ checkmate::assertSubset(c(col_chr, col_pos), colnames(SNPData), empty.ok = FALSE) |
|
3718 |
+ checkmate::assertSubset(c(col_peakID), colnames(grn), empty.ok = FALSE) |
|
3585 | 3719 |
|
3586 | 3720 |
if (genomeAssembly_peaks != genomeAssembly_SNP) { |
3587 | 3721 |
|
... | ... |
@@ -3683,22 +3817,24 @@ addSNPOverlap <- function(grn, SNPData, col_chr = "chr", col_pos = "pos", col_pe |
3683 | 3817 |
####### STATS ######### |
3684 | 3818 |
|
3685 | 3819 |
|
3686 |
-#' Generate a summary PDF for the number of connections for a \code{\linkS4class{GRN}} object. |
|
3820 |
+#' Generate a summary for the number of connections for different filtering criteria for a \code{\linkS4class{GRN}} object. |
|
3687 | 3821 |
#' |
3688 |
-#' Essentially, this functions calls \code{\link{filterGRNAndConnectGenes}} repeatedly and stores the total number of connections and other statistics each time to summarize them afterwards. All arguments are identical to the ones in \code{\link{filterGRNAndConnectGenes}}, see the help for this function for details. |
|
3822 |
+#' This functions calls \code{\link{filterGRNAndConnectGenes}} repeatedly and stores the total number of connections and other statistics each time to summarize them afterwards. All arguments are identical to the ones in \code{\link{filterGRNAndConnectGenes}}, see the help for this function for details. |
|
3823 |
+#' The function \code{\link{plot_stats_connectionSummary}} can be used afterwards for plotting. |
|
3689 | 3824 |
#' |
3690 | 3825 |
#' @export |
3691 | 3826 |
#' @template GRN |
3692 |
-#' @param TF_peak.fdr Numeric vector. Default \code{c(0.001, 0.01, 0.05, 0.1, 0.2)}. TF-peak FDR values to iterate over. |
|
3827 |
+#' @param TF_peak.fdr Numeric vector[0,1]. Default \code{c(0.001, 0.01, 0.05, 0.1, 0.2)}. TF-peak FDR values to iterate over. |
|
3693 | 3828 |
#' @template TF_peak.connectionTypes |
3694 |
-#' @param peak_gene.fdr Numeric vector. Default \code{c(0.001, 0.01, 0.05, 0.1, 0.2)}. Peak-gene FDR values to iterate over. |
|
3695 |
-#' @param peak_gene.p_raw Numeric vector. Default \code{NULL}. Peak-gene raw p-value values to iterate over. Skipped if set to NULL. |
|
3696 |
-#' @param peak_gene.r_range Numeric vector of length 2 (minimum -1, maximum 1). Default \code{c(0,1)}. The correlation range of peak-gene connections to keep. |
|
3829 |
+#' @param peak_gene.fdr Numeric vector[0,1]. Default \code{c(0.001, 0.01, 0.05, 0.1, 0.2)}. Peak-gene FDR values to iterate over. |
|
3830 |
+#' @param peak_gene.p_raw Numeric vector[0,1]. Default \code{NULL}. Peak-gene raw p-value values to iterate over. Skipped if set to NULL. |
|
3831 |
+#' @param peak_gene.r_range Numeric vector of length 2[-1,1]. Default \code{c(0,1)}. The correlation range of peak-gene connections to keep. |
|
3697 | 3832 |
#' @template gene.types |
3698 |
-#' @param allowMissingGenes Logical vector. Default \code{c(FALSE, TRUE)}. Allow genes to be missing for peak-gene connections? |
|
3699 |
-#' @param allowMissingTFs Logical vector. Default \code{c(FALSE)}. Allow TFs to be missing for TF-peak connections? |
|
3833 |
+#' @param allowMissingGenes Logical vector of length 1 or 2. Default \code{c(FALSE, TRUE)}. Allow genes to be missing for peak-gene connections? If both \code{FALSE} and \code{TRUE} are given, the code loops over both |
|
3834 |
+#' @param allowMissingTFs Logical vector of length 1 or 2. Default \code{c(FALSE)}. Allow TFs to be missing for TF-peak connections? If both \code{FALSE} and \code{TRUE} are given, the code loops over both |
|
3700 | 3835 |
#' @template forceRerun |
3701 |
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function. |
|
3836 |
+#' @seealso \code{\link{plot_stats_connectionSummary}} |
|
3837 |
+#' @return An updated \code{\linkS4class{GRN}} object, with additional information added from this function. |
|
3702 | 3838 |
#' @examples |
3703 | 3839 |
#' # See the Workflow vignette on the GRaNIE website for examples |
3704 | 3840 |
#' GRN = loadExampleObject() |
... | ... |
@@ -3710,21 +3846,23 @@ generateStatsSummary <- function(GRN, |
3710 | 3846 |
peak_gene.fdr = c(0.001, 0.01, 0.05, 0.1, 0.2), |
3711 | 3847 |
peak_gene.p_raw = NULL, |
3712 | 3848 |
peak_gene.r_range = c(0,1), |
3713 |
- gene.types = c("protein_coding", "lincRNA"), |
|
3849 |
+ gene.types = c("protein_coding"), |
|
3714 | 3850 |
allowMissingGenes = c(FALSE, TRUE), |
3715 | 3851 |
allowMissingTFs = c(FALSE), |
3716 | 3852 |
forceRerun = FALSE |
3717 | 3853 |
) { |
3718 | 3854 |
|
3719 |
- GRN = .addFunctionLogToObject(GRN) |
|
3855 |
+ start = Sys.time() |
|
3720 | 3856 |
|
3721 | 3857 |
checkmate::assertClass(GRN, "GRN") |
3858 |
+ GRN = .addFunctionLogToObject(GRN) |
|
3859 |
+ |
|
3722 | 3860 |
checkmate::assertNumeric(TF_peak.fdr, lower = 0, upper = 1, min.len = 1) |
3723 |
- checkmate::assertSubset(TF_peak.connectionTypes, c("all", GRN@config$TF_peak_connectionTypes)) |
|
3861 |
+ checkmate::assertSubset(TF_peak.connectionTypes, c("all", GRN@config$TF_peak_connectionTypes), empty.ok = FALSE) |
|
3724 | 3862 |
checkmate::assertNumeric(peak_gene.fdr, lower = 0, upper = 1, min.len = 1) |
3725 | 3863 |
checkmate::assert(checkmate::checkNull(peak_gene.p_raw), checkmate::checkNumeric(peak_gene.p_raw, lower = 0, upper = 1, min.len = 1)) |
3726 | 3864 |
checkmate::assertNumeric(peak_gene.r_range, lower = -1, upper = 1, len = 2) |
3727 |
- checkmate::assertCharacter(gene.types, min.len = 1) |
|
3865 |
+ checkmate::assertSubset(gene.types, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE) |
|
3728 | 3866 |
checkmate::assertSubset(allowMissingGenes, c(TRUE, FALSE)) |
3729 | 3867 |
checkmate::assertSubset(allowMissingTFs, c(TRUE, FALSE)) |
3730 | 3868 |
|
... | ... |
@@ -3859,7 +3997,7 @@ generateStatsSummary <- function(GRN, |
3859 | 3997 |
futile.logger::flog.info(paste0("Data already exists in object. Set forceRerun = TRUE to regenerate and overwrite.")) |
3860 | 3998 |
} |
3861 | 3999 |
|
3862 |
- |
|
4000 |
+ .printExecutionTime(start, prefix = "") |
|
3863 | 4001 |
|
3864 | 4002 |
GRN |
3865 | 4003 |
|
... | ... |
@@ -3975,36 +4113,53 @@ generateStatsSummary <- function(GRN, |
3975 | 4113 |
|
3976 | 4114 |
#' Load example GRN dataset |
3977 | 4115 |
#' |
4116 |
+#' Loads an example GRN object with 6 TFs, ~61.000 peaks, ~19.000 genes, 259 filtered connections and pre-calculated enrichments from the internet. |
|
4117 |
+#' This function uses \code{BiocFileCache} if installed to cache the example object, which is |
|
4118 |
+#' considerably faster than re-downloading the file anew every time the function is executed. |
|
4119 |
+#' If not, the file is re-downloaded every time anew. Thus, to enable caching, you may install the package \code{BiocFileCache}. |
|
4120 |
+#' |
|
3978 | 4121 |
#' @export |
3979 | 4122 |
#' @param forceDownload \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should the download be enforced even if the local cached file is already present? |
3980 | 4123 |
#' @param fileURL Character. Default \url{https://www.embl.de/download/zaugg/GRaNIE/GRN.rds}. URL to the GRN example object in rds format. |
3981 | 4124 |
#' @examples |
3982 | 4125 |
#' GRN = loadExampleObject() |
3983 |
-#' @return |
|
3984 |
-#' An example \code{\linkS4class{GRN}} object |
|
3985 |
-#' @import BiocFileCache |
|
4126 |
+#' @return An small example \code{\linkS4class{GRN}} object |
|
3986 | 4127 |
loadExampleObject <- function(forceDownload = FALSE, fileURL = "https://www.embl.de/download/zaugg/GRaNIE/GRN.rds") { |
3987 | 4128 |
|
3988 | 4129 |
checkmate::assertFlag(forceDownload) |
3989 |
- # Taken and modified from https://www.bioconductor.org/packages/release/bioc/vignettes/BiocFileCache/inst/doc/BiocFileCache.html |
|
3990 |
- |
|
3991 |
- bfc <- .get_cache() |
|
3992 |
- |
|
3993 |
- rid <- BiocFileCache::bfcquery(bfc, "GRaNIE_object_example")$rid |
|
3994 |
- if (!length(rid)) { |
|
3995 |
- rid <- names(BiocFileCache::bfcadd(bfc, "GRaNIE_object_example", fileURL)) |
|
3996 |
- } |
|
3997 |
- if (!isFALSE(BiocFileCache::bfcneedsupdate(bfc, rid)) | forceDownload) { |
|
3998 |
- messageStr = paste0("Downloading GRaNIE example object from ", fileURL) |
|
3999 |
- message(messageStr) |
|
4000 |
- filePath = BiocFileCache::bfcdownload(bfc, rid, ask = FALSE) |
|
4130 |
+ |
|
4131 |
+ if (!is.installed("BiocFileCache")) { |
|
4132 |
+ |
|
4133 |
+ message = paste0("The package BiocFileCache is not installed, but recommended if you want to speed-up the retrieval of the GRN example object ", |
|
4134 |
+ "via this function when using it multiple times. If not installed, the example object has to be downloaded anew every time you use this function.") |
|
4135 |
+ .checkPackageInstallation("BiocFileCache", message, isWarning = TRUE) |
|
4136 |
+ |
|
4137 |
+ GRN = readRDS(url(fileURL)) |
|
4138 |
+ |
|
4139 |
+ } else { |
|
4140 |
+ |
|
4141 |
+ |
|
4142 |
+ # Taken and modified from https://www.bioconductor.org/packages/release/bioc/vignettes/BiocFileCache/inst/doc/BiocFileCache.html |
|
4143 |
+ |
|
4144 |
+ bfc <- .get_cache() |
|
4145 |
+ |
|
4146 |
+ rid <- BiocFileCache::bfcquery(bfc, "GRaNIE_object_example")$rid |
|
4147 |
+ if (!length(rid)) { |
|
4148 |
+ rid <- names(BiocFileCache::bfcadd(bfc, "GRaNIE_object_example", fileURL)) |
|
4149 |
+ } |
|
4150 |
+ if (!isFALSE(BiocFileCache::bfcneedsupdate(bfc, rid)) | forceDownload) { |
|
4151 |
+ messageStr = paste0("Downloading GRaNIE example object from ", fileURL) |
|
4152 |
+ message(messageStr) |
|
4153 |
+ filePath = BiocFileCache::bfcdownload(bfc, rid, ask = FALSE) |
|
4154 |
+ } |
|
4155 |
+ |
|
4156 |
+ |
|
4157 |
+ filePath = BiocFileCache::bfcrpath(bfc, rids = rid) |
|
4158 |
+ |
|
4159 |
+ # Now we can read in the locally stored file |
|
4160 |
+ GRN = readRDS(filePath) |
|
4001 | 4161 |
} |
4002 |
- |
|
4003 |
- |
|
4004 |
- filePath = BiocFileCache::bfcrpath(bfc, rids = rid) |
|
4005 |
- |
|
4006 |
- # Now we can read in the locally stored file |
|
4007 |
- GRN = readRDS(filePath) |
|
4162 |
+ |
|
4008 | 4163 |
|
4009 | 4164 |
# Change the default path to the current working directory |
4010 | 4165 |
GRN@config$directories$outputRoot = "." |
... | ... |
@@ -4012,6 +4167,9 @@ loadExampleObject <- function(forceDownload = FALSE, fileURL = "https://www.embl |
4012 | 4167 |
GRN@config$directories$motifFolder = "." |
4013 | 4168 |
GRN@config$files$output_log = "GRN.log" |
4014 | 4169 |
|
4170 |
+ # Temporary fix: Replace lincRNA with lncRNA due to a recent change in biomaRt until we update the object directly |
|
4171 |
+ GRN@annotation$genes = dplyr::mutate(GRN@annotation$genes, gene.type = dplyr::recode_factor(gene.type, lncRNA = "lincRNA")) |
|
4172 |
+ |
|
4015 | 4173 |
GRN |
4016 | 4174 |
|
4017 | 4175 |
} |
... | ... |
@@ -4020,10 +4178,11 @@ loadExampleObject <- function(forceDownload = FALSE, fileURL = "https://www.embl |
4020 | 4178 |
#' Get counts for the various data defined in a \code{\linkS4class{GRN}} object |
4021 | 4179 |
#' |
4022 | 4180 |
#' Get counts for the various data defined in a \code{\linkS4class{GRN}} object. |
4181 |
+#' \strong{Note: This function, as all \code{get} functions from this package, does NOT return a \code{\linkS4class{GRN}} object.} |
|
4023 | 4182 |
#' |
4024 | 4183 |
#' @template GRN |
4025 | 4184 |
#' @param type Character. Either \code{peaks} or \code{rna}. \code{peaks} corresponds to the counts for the open chromatin data, while \code{rna} refers to th RNA-seq counts. If set to \code{rna}, both permuted and non-permuted data can be retrieved, while for \code{peaks}, only the non-permuted one (i.e., 0) can be retrieved. |
4026 |
-#' @param norm Logical. \code{TRUE} or \code{FALSE}. Should original (often raw, but this may not necessarily be the case) or normalized counts be returned? |
|
4185 |
+#' @param norm Logical. \code{TRUE} or \code{FALSE}. Should original (often raw, but this may not necessarily be the case) or normalized counts be returned? |
|
4027 | 4186 |
#' @template permuted |
4028 | 4187 |
#' @export |
4029 | 4188 |
#' @import tibble |
... | ... |
@@ -4031,10 +4190,11 @@ loadExampleObject <- function(forceDownload = FALSE, fileURL = "https://www.embl |
4031 | 4190 |
#' # See the Workflow vignette on the GRaNIE website for examples |
4032 | 4191 |
#' GRN = loadExampleObject() |
4033 | 4192 |
#' counts.df = getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE) |
4034 |
-#' @return Data frame of counts, with the type as indicated by the function parameters. |
|
4193 |
+#' @return Data frame of counts, with the type as indicated by the function parameters. This function does **NOT** return a \code{\linkS4class{GRN}} object. |
|
4035 | 4194 |
|
4036 | 4195 |
getCounts <- function(GRN, type, norm, permuted = FALSE) { |
4037 | 4196 |
|
4197 |
+ checkmate::assertClass(GRN, "GRN") |
|
4038 | 4198 |
GRN = .addFunctionLogToObject(GRN) |
4039 | 4199 |
|
4040 | 4200 |
checkmate::assertChoice(type, c("peaks", "rna")) |
... | ... |
@@ -4083,7 +4243,7 @@ getCounts <- function(GRN, type, norm, permuted = FALSE) { |
4083 | 4243 |
} else { # permutation 1 |
4084 | 4244 |
|
4085 | 4245 |
if (type == "peaks") { |
4086 |
- message = "Could not find counts in object." |
|
4246 |
+ message = "Could not find permuted peak counts in GRN object. Peaks are not stored as pemruted, set permuted = FALSE for type = \"peaks\"" |
|
4087 | 4247 |
.checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) |
4088 | 4248 |
} else { |
4089 | 4249 |
|
... | ... |
@@ -4110,23 +4270,28 @@ getCounts <- function(GRN, type, norm, permuted = FALSE) { |
4110 | 4270 |
|
4111 | 4271 |
|
4112 | 4272 |
|
4113 |
-#' Extract connections from a \code{\linkS4class{GRN}} object |
|
4273 |
+#' Extract connections or links from a \code{\linkS4class{GRN}} object as da data frame. |
|
4274 |
+#' |
|
4275 |
+#' Returns stored connections/links (either TF-peak, peak-genes or the filtered set of connections as produced by \code{\link{filterGRNAndConnectGenes}}). |
|
4276 |
+#' \strong{Note: This function, as all \code{get} functions from this package, does NOT return a \code{\linkS4class{GRN}} object.} |
|
4114 | 4277 |
#' |
4115 | 4278 |
#' @export |
4116 | 4279 |
#' @template GRN |
4117 | 4280 |
#' @template permuted |
4118 |
-#' @param type Character. Default \code{all.filtered}. Must be one of \code{TF_peaks}, \code{peak_genes}, \code{all.filtered}. The type of connections to retrieve. |
|
4119 |
-#' @param include_TF_gene_correlations Logical. \code{TRUE} or \code{FALSE}. Should TFs and gene correlations be returned as well? If set to \code{TRUE}, they must have been computed beforehand with \code{\link{add_TF_gene_correlation}}. |
|
4120 |
-#' @return A data frame with the connections. Importantly, this function does **NOT** return a \code{\linkS4class{GRN}} object. |
|
4281 |
+#' @param type Character. One of \code{TF_peaks}, \code{peak_genes}, \code{all.filtered}. Default \code{all.filtered}. The type of connections to retrieve. |
|
4282 |
+#' @param include_TF_gene_correlations Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should TFs and gene correlations be returned as well? If set to \code{TRUE}, they must have been computed beforehand with \code{\link{add_TF_gene_correlation}}. |
|
4283 |
+#' @return A data frame with the requested connections. This function does **NOT** return a \code{\linkS4class{GRN}} object. |
|
4284 |
+#' @seealso \code{\link{filterGRNAndConnectGenes}} |
|
4121 | 4285 |
#' @examples |
4122 | 4286 |
#' # See the Workflow vignette on the GRaNIE website for examples |
4123 | 4287 |
#' GRN = loadExampleObject() |
4124 | 4288 |
#' GRN_con.all.df = getGRNConnections(GRN) |
4125 | 4289 |
getGRNConnections <- function(GRN, type = "all.filtered", permuted = FALSE, include_TF_gene_correlations = FALSE) { |
4126 | 4290 |
|
4291 |
+ checkmate::assertClass(GRN, "GRN") |
|
4127 | 4292 |
GRN = .addFunctionLogToObject(GRN) |
4128 | 4293 |
|
4129 |
- checkmate::assertClass(GRN, "GRN") |
|
4294 |
+ |
|
4130 | 4295 |
checkmate::assertChoice(type, c("TF_peaks", "peak_genes", "all.filtered")) |
4131 | 4296 |
checkmate::assertFlag(permuted) |
4132 | 4297 |
#checkmate::assertIntegerish(permutation, lower = 0, upper = .getMaxPermutation(GRN)) |
... | ... |
@@ -4304,11 +4469,13 @@ getGRNConnections <- function(GRN, type = "all.filtered", permuted = FALSE, inc |
4304 | 4469 |
|
4305 | 4470 |
#' Retrieve parameters for previously used function calls and general parameters for a \code{\linkS4class{GRN}} object. |
4306 | 4471 |
#' |
4472 |
+#' \strong{Note: This function, as all \code{get} functions from this package, does NOT return a \code{\linkS4class{GRN}} object.} |
|
4473 |
+#' |
|
4307 | 4474 |
#' @export |
4308 | 4475 |
#' @template GRN |
4309 | 4476 |
#' @param name Character. Default \code{all}. Name of parameter or function name to retrieve. Set to the special keyword \code{all} to retrieve all parameters. |
4310 |
-#' @param type Character. Default \code{parameter}. Either \code{function} or \code{parameter}. When set to \code{function}, a valid \code{GRaNIE} function name must be given that has been run before. When set to \code{parameter}, in combination with \code{name}, returns a specific parameter (as specified in \code{GRN@config})). |
|
4311 |
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function. |
|
4477 |
+#' @param type Character. Either \code{function} or \code{parameter}. Default \code{parameter}. When set to \code{function}, a valid \code{GRaNIE} function name must be given that has been run before. When set to \code{parameter}, in combination with \code{name}, returns a specific parameter (as specified in \code{GRN@config})). |
|
4478 |
+#' @return The requested parameters. This function does **NOT** return a \code{\linkS4class{GRN}} object. |
|
4312 | 4479 |
#' @examples |
4313 | 4480 |
#' # See the Workflow vignette on the GRaNIE website for examples |
4314 | 4481 |
#' GRN = loadExampleObject() |
... | ... |
@@ -4316,14 +4483,14 @@ getGRNConnections <- function(GRN, type = "all.filtered", permuted = FALSE, inc |
4316 | 4483 |
getParameters <- function (GRN, type = "parameter", name = "all") { |
4317 | 4484 |
|
4318 | 4485 |
checkmate::assertClass(GRN, "GRN") |
4319 |
- checkmate::assertSubset(type, c("function", "parameter")) |
|
4486 |
+ checkmate::assertChoice(type, c("function", "parameter")) |
|
4320 | 4487 |
|
4321 | 4488 |
if (type == "function") { |
4322 | 4489 |
|
4323 | 4490 |
checkmate::assertCharacter(name, any.missing = FALSE, len = 1) |
4324 | 4491 |
functionParameters = GRN@config$functionParameters[[name]] |
4325 | 4492 |
if (is.null(functionParameters)) { |
4326 |
- checkmate::assertSubset(name, ls(paste0("package:", utils::packageName()))) |
|
4493 |
+ checkmate::assertChoice(name, ls(paste0("package:", utils::packageName()))) |
|
4327 | 4494 |
} |
4328 | 4495 |
|
4329 | 4496 |
return(functionParameters) |
... | ... |
@@ -4335,7 +4502,7 @@ getParameters <- function (GRN, type = "parameter", name = "all") { |
4335 | 4502 |
} else { |
4336 | 4503 |
parameters = GRN@config[[name]] |
4337 | 4504 |
if (is.null(parameters)) { |
4338 |
- checkmate::assertSubset(name, names(GRN@config$parameters)) |
|
4505 |
+ checkmate::assertChoice(name, names(GRN@config$parameters)) |
|
4339 | 4506 |
} |
4340 | 4507 |
|
4341 | 4508 |
return(parameters) |
... | ... |
@@ -4360,17 +4527,16 @@ getParameters <- function (GRN, type = "parameter", name = "all") { |
4360 | 4527 |
#' Optional convenience function to delete intermediate data from the function \code{\link{AR_classification_wrapper}} and summary statistics that may occupy a lot of space |
4361 | 4528 |
#' @export |
4362 | 4529 |
#' @template GRN |
4363 |
-#' @return The same \code{\linkS4class{GRN}} object, with some slots being deleted (\code{GRN@data$TFs$classification} as well as \code{GRN@stats$connectionDetails.l}) |
|
4530 |
+#' @return An updated \code{\linkS4class{GRN}} object, with some slots being deleted (\code{GRN@data$TFs$classification} as well as \code{GRN@stats$connectionDetails.l}) |
|
4364 | 4531 |
#' @examples |
4365 | 4532 |
#' # See the Workflow vignette on the GRaNIE website for examples |
4366 |
-#' # GRN = loadExampleObject() |
|
4367 |
-#' # GRN = deleteIntermediateData(GRN) |
|
4533 |
+#' GRN = loadExampleObject() |
|
4534 |
+#' GRN = deleteIntermediateData(GRN) |
|
4368 | 4535 |
deleteIntermediateData <- function(GRN) { |
4369 | 4536 |
|
4370 |
- |
|
4537 |
+ checkmate::assertClass(GRN, "GRN") |
|
4371 | 4538 |
GRN = .addFunctionLogToObject(GRN) |
4372 | 4539 |
|
4373 |
- checkmate::assertClass(GRN, "GRN") |
|
4374 | 4540 |
|
4375 | 4541 |
for (permutationCur in 0:.getMaxPermutation(GRN)) { |
4376 | 4542 |
|
... | ... |
@@ -4400,7 +4566,9 @@ deleteIntermediateData <- function(GRN) { |
4400 | 4566 |
|
4401 | 4567 |
getBasic_metadata_visualization <- function(GRN, forceRerun = FALSE) { |
4402 | 4568 |
|
4569 |
+ checkmate::assertClass(GRN, "GRN") |
|
4403 | 4570 |
GRN = .addFunctionLogToObject(GRN) |
4571 |
+ checkmate::assertFlag(forceRerun) |
|
4404 | 4572 |
|
4405 | 4573 |
# Get base mean expression for genes and TFs and mean accessibility from peaks |
4406 | 4574 |
|
... | ... |
@@ -4438,7 +4606,7 @@ getBasic_metadata_visualization <- function(GRN, forceRerun = FALSE) { |
4438 | 4606 |
#' @export |
4439 | 4607 |
#' @template GRN |
4440 | 4608 |
#' @param outputDirectory Character. Default \code{.}. New output directory for all output files unless overwritten via the parameter \code{outputFolder}. |
4441 |
-#' @return The same \code{\linkS4class{GRN}} object, with the output directory being adjusted accordingly |
|
4609 |
+#' @return An updated \code{\linkS4class{GRN}} object, with the output directory being adjusted accordingly |
|
4442 | 4610 |
#' @examples |
4443 | 4611 |
#' GRN = loadExampleObject() |
4444 | 4612 |
#' GRN = changeOutputDirectory(GRN, outputDirectory = ".") |
... | ... |
@@ -124,12 +124,15 @@ |
124 | 124 |
|
125 | 125 |
.printExecutionTime(start) |
126 | 126 |
|
127 |
- counts.norm %>% |
|
127 |
+ counts.norm = counts.norm %>% |
|
128 | 128 |
as.data.frame() %>% |
129 | 129 |
tibble::as_tibble() %>% |
130 | 130 |
dplyr::mutate({{idColumn}} := ids) %>% |
131 |
- dplyr::select({{idColumn}}, tidyselect::everything()) %>% |
|
132 |
- purrr::set_names(c(idColumn, colnames_samples)) |
|
131 |
+ dplyr::select({{idColumn}}, tidyselect::everything()) |
|
132 |
+ |
|
133 |
+ colnames(counts.norm) = c(idColumn, colnames_samples) |
|
134 |
+ |
|
135 |
+ counts.norm |
|
133 | 136 |
|
134 | 137 |
} |
135 | 138 |
|
... | ... |
@@ -63,9 +63,9 @@ |
63 | 63 |
|
64 | 64 |
.startLogger <- function(logfile, level, removeOldLog = TRUE, appenderName = "consoleAndFile", verbose = TRUE) { |
65 | 65 |
|
66 |
- checkmate::assertSubset(level, c("TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL")) |
|
66 |
+ checkmate::assertChoice(level, c("TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL")) |
|
67 | 67 |
checkmate::assertFlag(removeOldLog) |
68 |
- checkmate::assertSubset(appenderName, c("console", "file", "consoleAndFile")) |
|
68 |
+ checkmate::assertChoice(appenderName, c("console", "file", "consoleAndFile")) |
|
69 | 69 |
checkmate::assertFlag(verbose) |
70 | 70 |
|
71 | 71 |
if (appenderName != "console") { |
... | ... |
@@ -105,13 +105,61 @@ |
105 | 105 |
# PACKAGE LOADING AND DETACHING FUNCTIONS # |
106 | 106 |
########################################### |
107 | 107 |
|
108 |
-.checkOntologyPackageInstallation <- function(pkg) { |
|
109 |
- if (!is.installed(pkg)) { |
|
110 |
- message = paste0("The package ", pkg, " is not installed, which is however needed for the chosen ontology enrichment. Please install it and re-run this function or change the ontology.") |
|
111 |
- .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) |
|
108 |
+.checkPackageInstallation <- function(pkg, message, isWarning = FALSE) { |
|
109 |
+ |
|
110 |
+ pkgMissing = c() |
|
111 |
+ for (packageCur in pkg) { |
|
112 |
+ if (!is.installed(packageCur)) { |
|
113 |
+ pkgMissing = c(pkgMissing, packageCur) |
|
114 |
+ } |
|
115 |
+ } |
|
116 |
+ |
|
117 |
+ if (length(pkgMissing) > 0) { |
|
118 |
+ message = paste0(message, "\n\nExecute the following line in R to install the missing package(s): `BiocManager::install(c(\"", |
|
119 |
+ paste0(pkgMissing, collapse = "\",\""), |
|
120 |
+ "\"))`") |
|
121 |
+ .checkAndLogWarningsAndErrors(NULL, message, isWarning = isWarning) |
|
122 |
+ } |
|
123 |
+ |
|
124 |
+ |
|
125 |
+} |
|
126 |
+ |
|
127 |
+ |
|
128 |
+.checkAndLoadPackagesGenomeAssembly <- function(genomeAssembly) { |
|
129 |
+ |
|
130 |
+ baseMessage = paste0("For the chosen genome assembly version, particular genome annotation packages are required but not installed. Please install and re-run this function.") |
|
131 |
+ |
|
132 |
+ if (genomeAssembly == "hg38") { |
|
133 |
+ .checkPackageInstallation(c("org.Hs.eg.db", "TxDb.Hsapiens.UCSC.hg38.knownGene", "BSgenome.Hsapiens.UCSC.hg38"), baseMessage) |
|
134 |
+ } else if (genomeAssembly == "hg19") { |
|
135 |
+ .checkPackageInstallation(c("org.Hs.eg.db", "TxDb.Hsapiens.UCSC.hg19.knownGene", "BSgenome.Hsapiens.UCSC.hg19"), baseMessage) |
|
136 |
+ } else if (genomeAssembly == "mm10") { |
|
137 |
+ .checkPackageInstallation(c("org.Mm.eg.db", "TxDb.Mmusculus.UCSC.mm10.knownGene", "BSgenome.Mmusculus.UCSC.mm10"), baseMessage) |
|
138 |
+ } else if (genomeAssembly == "mm9") { |
|
139 |
+ .checkPackageInstallation(c("org.Mm.eg.db", "TxDb.Mmusculus.UCSC.mm9.knownGene", "BSgenome.Mmusculus.UCSC.mm9"), baseMessage) |
|
112 | 140 |
} |
141 |
+ |
|
113 | 142 |
} |
114 | 143 |
|
144 |
+ |
|
145 |
+.checkPackage_topGO_and_arguments <- function (ontology, algorithm, statistic) { |
|
146 |
+ |
|
147 |
+ if (length(intersect(ontology, c("GO_BP", "GO_MF", "GO_CC"))) > 0) { |
|
148 |
+ |
|
149 |
+ packageMessage = paste0("The package topGO is not installed but required when selecting any of the three following ontologies: GO_BP, GO_MF, GO_CC. Please install it and re-run this function or change the ontology.") |
|
150 |
+ .checkPackageInstallation("topGO", packageMessage) |
|
151 |
+ |
|
152 |
+ # This function is calle in the topGO:::.onAttach function and needs to be executed once otherwise |
|
153 |
+ # errors like object 'GOBPTerm' of mode 'environment' was not found are thrown |
|
154 |
+ suppressMessages(topGO::groupGOTerms()) |
|
155 |
+ |
|
156 |
+ checkmate::assertChoice(algorithm , topGO::whichAlgorithms()) |
|
157 |
+ checkmate::assertChoice(statistic , topGO::whichTests()) |
|
158 |
+ } |
|
159 |
+} |
|
160 |
+ |
|
161 |
+ |
|
162 |
+ |
|
115 | 163 |
.clearOpenDevices <- function() { |
116 | 164 |
|
117 | 165 |
while (length(grDevices::dev.list()) > 0) { |
... | ... |
@@ -212,26 +260,11 @@ |
212 | 260 |
|
213 | 261 |
res.l = list() |
214 | 262 |
|
215 |
- # maxCores = tryCatch( |
|
216 |
- # { |
|
217 |
- # out = BiocParallel::multicoreWorkers() / 2 |
|
218 |
- # |
|
219 |
- # }, |
|
220 |
- # error=function() { |
|
221 |
- # |
|
222 |
- # message = "Could not retrieve the available number of cores. There might be a problem with your installation of BiocParallel. Check whether BiocParallel::multicoreWorkers() returns an integer. Try executing the following line to fix the problem if a re-installation of BiocParallel does not work: library(parallel) and then options(mc.cores=4L), with 4 being the maximum number of cores available for the machine you run the pipeline on For now, the function will just run with 1 core." |
|
223 |
- # .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE) |
|
224 |
- # return(1) |
|
225 |
- # } |
|
226 |
- # ) |
|
227 |
- # |
|
228 |
- # |
|
229 |
- # if (nCores > maxCores) { |
|
230 |
- # nCores = max(1, floor(maxCores)) |
|
231 |
- # futile.logger::flog.warn(paste0(" Adjusted nCores down to ", nCores, " (only 50% of the cores are used at max)")) |
|
232 |
- # } |
|
233 |
- |
|
263 |
+ |
|
234 | 264 |
if (nCores > 1) { |
265 |
+ |
|
266 |
+ message = paste0("The package BiocParallel is not installed but required if more than 1 core should be used as requested. Please install it and re-run this function to speed-up the computation time or set the nCores parameter to 1 to disable parallel computation.") |
|
267 |
+ .checkPackageInstallation("BiocParallel", message) |
|
235 | 268 |
|
236 | 269 |
res.l = tryCatch( { |
237 | 270 |
BiocParallel::bplapply(iteration, functionName, ..., BPPARAM = .initBiocParallel(nCores)) |
... | ... |
@@ -381,31 +414,18 @@ |
381 | 414 |
gr |
382 | 415 |
} |
383 | 416 |
|
384 |
-.checkAndLoadPackages <- function(packageList, verbose = FALSE) { |
|
385 |
- |
|
386 |
- for (packageCur in packageList) { |
|
387 |
- if (!is.installed(packageCur)) { |
|
388 |
- message = paste0("The suggested package \"", packageCur, "\" is not yet installed, but it is required for this function and in this context. Please install it and re-run this function.") |
|
389 |
- .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) |
|
390 |
- } |
|
391 |
- } |
|
392 |
- |
|
393 |
-} |
|
417 |
+ |
|
394 | 418 |
|
395 | 419 |
.getGenomeObject <- function(genomeAssembly, type = "txbd") { |
396 | 420 |
|
397 |
- |
|
398 |
- checkmate::assertSubset(type, c("txbd", "BSgenome", "packageName")) |
|
399 |
- checkmate::assertSubset(genomeAssembly, c("hg19","hg38", "mm9", "mm10")) |
|
400 |
- |
|
421 |
+ checkmate::assertChoice(type, c("txbd", "BSgenome", "packageName")) |
|
422 |
+ checkmate::assertChoice(genomeAssembly, c("hg19","hg38", "mm9", "mm10")) |
|
401 | 423 |
|
402 | 424 |
if (genomeAssembly == "hg38") { |
403 | 425 |
|
404 | 426 |
if (type == "txbd") { |
405 |
- .checkAndLoadPackages(c("org.Hs.eg.db", "TxDb.Hsapiens.UCSC.hg38.knownGene"), verbose = FALSE) |
|
406 | 427 |
obj <- TxDb.Hsapiens.UCSC.hg38.knownGene::TxDb.Hsapiens.UCSC.hg38.knownGene |
407 | 428 |
} else if (type == "BSgenome") { |
408 |
- .checkAndLoadPackages(c("BSgenome.Hsapiens.UCSC.hg38"), verbose = FALSE) |
|
409 | 429 |
obj <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 |
410 | 430 |
} else { |
411 | 431 |
obj = "org.Hs.eg.db" |
... | ... |
@@ -414,10 +434,8 @@ |
414 | 434 |
} else if (genomeAssembly == "hg19") { |
415 | 435 |
|
416 | 436 |
if (type == "txbd") { |
417 |
- .checkAndLoadPackages(c("org.Hs.eg.db", "TxDb.Hsapiens.UCSC.hg19.knownGene"), verbose = FALSE) |
|
418 | 437 |
obj <- TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene |
419 | 438 |
} else if (type == "BSgenome") { |
420 |
- .checkAndLoadPackages(c("BSgenome.Hsapiens.UCSC.hg19"), verbose = FALSE) |
|
421 | 439 |
obj <- BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19 |
422 | 440 |
} else { |
423 | 441 |
obj = "org.Hs.eg.db" |
... | ... |
@@ -427,10 +445,8 @@ |
427 | 445 |
} else if (genomeAssembly == "mm10") { |
428 | 446 |
|
429 | 447 |
if (type == "txbd") { |
430 |
- .checkAndLoadPackages(c("org.Mm.eg.db", "TxDb.Mmusculus.UCSC.mm10.knownGene"), verbose = FALSE) |
|
431 | 448 |
obj <- TxDb.Mmusculus.UCSC.mm10.knownGene::TxDb.Mmusculus.UCSC.mm10.knownGene |
432 | 449 |
} else if (type == "BSgenome") { |
433 |
- .checkAndLoadPackages(c("BSgenome.Mmusculus.UCSC.mm10"), verbose = FALSE) |
|
434 | 450 |
obj <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10 |
435 | 451 |
} else { |
436 | 452 |
obj = "org.Mm.eg.db" |
... | ... |
@@ -439,10 +455,8 @@ |
439 | 455 |
} else if (genomeAssembly == "mm9") { |
440 | 456 |
|
441 | 457 |
if (type == "txbd") { |
442 |
- .checkAndLoadPackages(c("org.Mm.eg.db", "TxDb.Mmusculus.UCSC.mm9.knownGene"), verbose = FALSE) |
|
443 | 458 |
obj <- TxDb.Mmusculus.UCSC.mm9.knownGene::TxDb.Mmusculus.UCSC.mm9.knownGene |
444 | 459 |
} else if (type == "BSgenome") { |
445 |
- .checkAndLoadPackages(c("BSgenome.Mmusculus.UCSC.mm9"), verbose = FALSE) |
|
446 | 460 |
obj <- BSgenome.Mmusculus.UCSC.mm9::BSgenome.Mmusculus.UCSC.mm9 |
447 | 461 |
} else { |
448 | 462 |
obj = "org.Mm.eg.db" |
... | ... |
@@ -458,31 +472,6 @@ |
458 | 472 |
GenomeInfoDb::seqlengths(txdb) |
459 | 473 |
} |
460 | 474 |
|
461 |
-.checkAndLoadPackagesGenomeAssembly <- function(genomeAssembly) { |
|
462 |
- |
|
463 |
- checkmate::assertSubset(genomeAssembly, c("hg19","hg38", "mm9", "mm10")) |
|
464 |
- |
|
465 |
- if (genomeAssembly == "hg38") { |
|
466 |
- |
|
467 |
- packages = c("org.Hs.eg.db", "TxDb.Hsapiens.UCSC.hg38.knownGene") |
|
468 |
- |
|
469 |
- } else if (genomeAssembly == "hg19") { |
|
470 |
- |
|
471 |
- packages = c("org.Hs.eg.db", "TxDb.Hsapiens.UCSC.hg19.knownGene") |
|
472 |
- |
|
473 |
- } else if (genomeAssembly == "mm10") { |
|
474 |
- |
|
475 |
- packages = c("org.Mm.eg.db", "TxDb.Mmusculus.UCSC.mm10.knownGene") |
|
476 |
- |
|
477 |
- } else if (genomeAssembly == "mm9") { |
|
478 |
- |
|
479 |
- packages = c("org.Mm.eg.db", "TxDb.Mmusculus.UCSC.mm9.knownGene") |
|
480 |
- |
|
481 |
- } |
|
482 |
- |
|
483 |
- packages |
|
484 |
-} |
|
485 |
- |
|
486 | 475 |
.shuffleColumns <- function(df, nPermutations, returnUnshuffled = TRUE, returnAsList = TRUE, saveMemory = TRUE) { |
487 | 476 |
|
488 | 477 |
start = Sys.time() |
... | ... |
@@ -552,7 +541,7 @@ |
552 | 541 |
|
553 | 542 |
} |
554 | 543 |
|
555 |
- |
|
544 |
+#' @importFrom Matrix Matrix |
|
556 | 545 |
.asSparseMatrix <- function(matrix, convertNA_to_zero=TRUE, dimnames = NULL) { |
557 | 546 |
|
558 | 547 |
if (convertNA_to_zero) { |
... | ... |
@@ -562,7 +551,7 @@ |
562 | 551 |
Matrix::Matrix(matrix, sparse=TRUE, dimnames = dimnames) |
563 | 552 |
} |
564 | 553 |
|
565 |
- |
|
554 |
+#' @importFrom Matrix Matrix |
|
566 | 555 |
.asMatrixFromSparse <-function(matrix, convertZero_to_NA=TRUE) { |
567 | 556 |
|
568 | 557 |
if (methods::is(matrix,"dgeMatrix") | methods::is(matrix,"dgCMatrix") | methods::is(matrix,"dgRMatrix")) { |
... | ... |
@@ -589,7 +578,7 @@ |
589 | 578 |
|
590 | 579 |
.read_tidyverse_wrapper <- function(file, type = "tsv", ncolExpected = NULL, minRows = 0, verbose = TRUE, ...) { |
591 | 580 |
|
592 |
- checkmate::assertSubset(type, c("csv", "csv2", "tsv", "delim")) |
|
581 |
+ checkmate::assertChoice(type, c("csv", "csv2", "tsv", "delim")) |
|
593 | 582 |
|
594 | 583 |
start = Sys.time() |
595 | 584 |
if (verbose) futile.logger::flog.info(paste0("Reading file ", file)) |
... | ... |
@@ -720,11 +709,8 @@ is.installed <- function(mypkg){ |
720 | 709 |
} |
721 | 710 |
} |
722 | 711 |
|
723 |
- |
|
724 | 712 |
.getBiomartParameters <- function(genomeAssembly) { |
725 | 713 |
|
726 |
- .checkOntologyPackageInstallation("biomaRt") |
|
727 |
- |
|
728 | 714 |
if (grepl(x = genomeAssembly, pattern = "^hg\\d\\d")){ |
729 | 715 |
dataset = "hsapiens_gene_ensembl" |
730 | 716 |
if (genomeAssembly == "hg38") { |
... | ... |
@@ -1,5 +1,7 @@ |
1 | 1 |
######## GRN graph functions ######## |
2 | 2 |
#' Builds a graph out of a set of connections |
3 |
+#' |
|
4 |
+#' This function requires a filtered set of connections in the \code{\linkS4class{GRN}} object as generated by \code{\link{filterGRNAndConnectGenes}} |
|
3 | 5 |
#' |
4 | 6 |
#' @template GRN |
5 | 7 |
#' @param model_TF_gene_nodes_separately \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should TF and gene nodes be modeled separately? If set to \code{TRUE},this may lead to unwanted effects in case of TF-TF connections (i.e., a TF regulating another TF) |
... | ... |
@@ -12,11 +14,13 @@ |
12 | 14 |
#' # See the Workflow vignette on the GRaNIE website for examples |
13 | 15 |
#' GRN = loadExampleObject() |
14 | 16 |
#' GRN = build_eGRN_graph(GRN, forceRerun = FALSE) |
15 |
-#' @return The same \code{\linkS4class{GRN}} object. |
|
17 |
+#' @return An updated \code{\linkS4class{GRN}} object, with the graph(s) being stored in the slot `graph` (i.e., `GRN@graph` for both TF-gene and TF-peak-gene graphs) |
|
16 | 18 |
build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE, |
17 | 19 |
allowLoops = FALSE, removeMultiple = FALSE, directed = FALSE, forceRerun = FALSE) { |
18 | 20 |
|
21 |
+ start = Sys.time() |
|
19 | 22 |
checkmate::assertClass(GRN, "GRN") |
23 |
+ GRN = .addFunctionLogToObject(GRN) |
|
20 | 24 |
checkmate::assertFlag(model_TF_gene_nodes_separately) |
21 | 25 |
checkmate::assertFlag(allowLoops) |
22 | 26 |
checkmate::assertFlag(removeMultiple) |
... | ... |
@@ -92,6 +96,7 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE, |
92 | 96 |
|
93 | 97 |
} |
94 | 98 |
|
99 |
+ .printExecutionTime(start) |
|
95 | 100 |
|
96 | 101 |
GRN |
97 | 102 |
|
... | ... |
@@ -189,21 +194,34 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE, |
189 | 194 |
} |
190 | 195 |
|
191 | 196 |
|
192 |
-#' Perform all network-related statistical and descriptive analyses, including community and enrichment analyses. |
|
197 |
+#' Perform all network-related statistical and descriptive analyses, including community and enrichment analyses. See the functions it executes in the @seealso section below. |
|
193 | 198 |
#' |
194 |
-#' A convenience function that calls all network-related functions in one-go, using selected default parameters and a set of adjustable ones also. For full adjustment, run the individual functions separately. |
|
199 |
+#' A convenience function that calls all network-related functions in one-go, using selected default parameters and a set of adjustable ones also. |
|
200 |
+#' For full adjustment, run the individual functions separately. |
|
201 |
+#' This function requires a filtered set of connections in the \code{\linkS4class{GRN}} object as generated by \code{\link{filterGRNAndConnectGenes}} |
|
202 |
+ |
|
195 | 203 |
#' |
196 | 204 |
#' @inheritParams calculateGeneralEnrichment |
197 | 205 |
#' @inheritParams plotCommunitiesStats |
198 | 206 |
#' @inheritParams plotCommunitiesEnrichment |
199 | 207 |
#' @inheritParams calculateCommunitiesStats |
200 | 208 |
#' @template maxWidth_nchar_plot |
209 |
+#' @seealso \code{\link{build_eGRN_graph}} |
|
210 |
+#' @seealso \code{\link{plotGeneralGraphStats}} |
|
211 |
+#' @seealso \code{\link{calculateGeneralEnrichment}} |
|
212 |
+#' @seealso \code{\link{plotGeneralEnrichment}} |
|
213 |
+#' @seealso \code{\link{calculateCommunitiesStats}} |
|
214 |
+#' @seealso \code{\link{plotCommunitiesStats}} |
|
215 |
+#' @seealso \code{\link{calculateCommunitiesEnrichment}} |
|
216 |
+#' @seealso \code{\link{plotCommunitiesEnrichment}} |
|
217 |
+#' @seealso \code{\link{calculateTFEnrichment}} |
|
218 |
+#' @seealso \code{\link{plotTFEnrichment}} |
|
201 | 219 |
#' @export |
202 | 220 |
#' @examples |
203 | 221 |
#' # See the Workflow vignette on the GRaNIE website for examples |
204 | 222 |
#' # GRN = loadExampleObject() |
205 | 223 |
#' # GRN = performAllNetworkAnalyses(GRN, outputFolder = ".", forceRerun = FALSE) |
206 |
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function. |
|
224 |
+#' @return An updated \code{\linkS4class{GRN}} object, with added data from this function. |
|
207 | 225 |
performAllNetworkAnalyses <- function(GRN, ontology = c("GO_BP", "GO_MF"), |
208 | 226 |
algorithm = "weight01", statistic = "fisher", |
209 | 227 |
background = "neighborhood", |
... | ... |
@@ -259,7 +277,7 @@ performAllNetworkAnalyses <- function(GRN, ontology = c("GO_BP", "GO_MF"), |
259 | 277 |
# Retrieve set of background genes (as vector) used for enrichment analyses from a GRN object |
260 | 278 |
.getBackgroundGenes <-function(GRN, type = "neighborhood") { |
261 | 279 |
|
262 |
- checkmate::assertSubset(type, c("all_annotated", "all_RNA", "neighborhood")) |
|
280 |
+ checkmate::assertChoice(type, c("all_annotated", "all_RNA", "neighborhood"), empty.ok = FALSE) |
|
263 | 281 |
|
264 | 282 |
if (type == "all_annotated") { |
265 | 283 |
|
... | ... |
@@ -286,41 +304,56 @@ performAllNetworkAnalyses <- function(GRN, ontology = c("GO_BP", "GO_MF"), |
286 | 304 |
} |
287 | 305 |
|
288 | 306 |
|
289 |
-#' Run an enrichment analysis for the genes in the filtered \code{\linkS4class{GRN}} |
|
307 |
+#' Run an enrichment analysis for the genes in the whole network in the filtered \code{\linkS4class{GRN}} object |
|
308 |
+#' |
|
309 |
+#' The enrichment analysis is based on the whole network, see \code{\link{calculateCommunitiesEnrichment}} and \code{\link{calculateTFEnrichment}} for |
|
310 |
+#' community- and TF-specific enrichment, respectively. |
|
311 |
+#' This function requires the existence of the eGRN graph in the \code{\linkS4class{GRN}} object as produced by \code{\link{build_eGRN_graph}}. |
|
312 |
+#' Results can subsequently be visualized with the function \code{\link{plotGeneralEnrichment}}. |
|
313 |
+#' |
|
314 |
+#' All enrichment functions use the TF-gene graph as defined in the `GRN` object. See the `ontology` argument for currently supported ontologies. |
|
315 |
+#' Also note that some parameter combinations for `algorithm` and `statistic` are incompatible, an error message will be thrown in such a case. |
|
290 | 316 |
#' |
291 |
-#' This function runs an enrichment analysis for the genes in the filtered network. |
|
292 | 317 |
#' @template GRN |
293 |
-#' @param ontology Character vector of ontologies. Default \code{c("GO_BP", "GO_MF")}. Valid values are \code{"GO_BP"}, \code{"GO_MF"}, \code{"GO_CC"}, \code{"KEGG"}, \code{"DO"}, and \code{"Reactome"}, referring to GO Biological Process, GO Molecular Function, GO Cellular Component, KEGG, Disease Ontology, and Reactome Pathways. |
|
318 |
+#' @param ontology Character vector of ontologies. Default \code{c("GO_BP", "GO_MF")}. |
|
319 |
+#' Valid values are \code{"GO_BP"}, \code{"GO_MF"}, \code{"GO_CC"}, \code{"KEGG"}, \code{"DO"}, and \code{"Reactome"}, |
|
320 |
+#' referring to \emph{GO Biological Process}, \emph{GO Molecular Function}, \emph{GO Cellular Component}, \emph{KEGG}, \emph{Disease Ontology}, |
|
321 |
+#' and \emph{Reactome Pathways}, respectively. \code{GO} ontologies require the \code{topGO}, |
|
322 |
+#' \code{"KEGG"} the \code{clusterProfiler}, \code{"DO"} the \code{DOSE}, and \code{"Reactome"} the \code{ReactomePA} packages, respectively. |
|
323 |
+#' As they are listed under \code{Suggests}, they may not yet be installed, and the function will throw an error if they are missing. |
|
294 | 324 |
#' @param algorithm Character. Default \code{"weight01"}. One of: \code{"classic"}, \code{"elim"}, \code{"weight"}, \code{"weight01"}, \code{"lea"}, \code{"parentchild"}. Only relevant if ontology is GO related (GO_BP, GO_MF, GO_CC), ignored otherwise. Name of the algorithm that handles the GO graph structures. Valid inputs are those supported by the \code{topGO} library. |
295 | 325 |
#' @param statistic Character. Default \code{"fisher"}. One of: \code{"fisher"}, \code{"ks"}, \code{"t"}, \code{"globaltest"}, \code{"sum"}, \code{"ks.ties"}. Statistical test to be used. Only relevant if ontology is GO related (GO_BP, GO_MF, GO_CC), and valid inputs are those supported by the topGO library, ignored otherwise. For the other ontologies the test statistic is always Fisher. |
296 | 326 |
#' @param background Character. Default \code{"neighborhood"}. One of: \code{"all_annotated"}, \code{"all_RNA"}, \code{"neighborhood"}. Set of genes to be used to construct the background for the enrichment analysis. This can either be all annotated genes in the reference genome (all_annotated), all differentially expressed genes (all_RNA), or all the genes that are within the neighborhood of a peak in the GRN (neighborhood) |
297 | 327 |
#' @param pAdjustMethod Character. Default \code{"BH"}. One of: \code{"holm"}, \code{"hochberg"}, \code{"hommel"}, \code{"bonferroni"}, \code{"BH"}, \code{"BY"}, \code{"fdr"}. This parameter is only relevant for the following ontologies: KEGG, DO, Reactome. For the other ontologies, the algorithm serves as an adjustment. |
298 | 328 |
#' @template forceRerun |
299 |
-#' @return The same \code{\linkS4class{GRN}} object, with the enrichment results stored in the \code{stats$Enrichment$general} slot. |
|
329 |
+#' @return An updated \code{\linkS4class{GRN}} object, with the enrichment results stored in the \code{stats$Enrichment$general} slot. |
|
300 | 330 |
#' @seealso \code{\link{plotGeneralEnrichment}} |
301 | 331 |
#' @seealso \code{\link{calculateCommunitiesEnrichment}} |
332 |
+#' @seealso \code{\link{calculateTFEnrichment}} |
|
302 | 333 |
#' @seealso \code{\link{plotCommunitiesEnrichment}} |
303 | 334 |
#' @examples |
304 | 335 |
#' # See the Workflow vignette on the GRaNIE website for examples |
305 | 336 |
#' GRN = loadExampleObject() |
306 | 337 |
#' GRN = calculateGeneralEnrichment(GRN, ontology = "GO_BP", forceRerun = FALSE) |
307 | 338 |
#' @export |