Browse code

Version bump

Christian Arnold authored on 14/09/2022 08:59:35
Showing61 changed files

1 1
new file mode 100644
... ...
@@ -0,0 +1,6 @@
1
+^.*\.Rproj$
2
+^\.Rproj\.user$
3
+pkgdown/
4
+TFBS_selected/
5
+exampleDataset/
6
+logo.png
... ...
@@ -4,3 +4,14 @@ vignettes/*_files
4 4
 vignettes/*_cache
5 5
 vignettes/output
6 6
 .Rproj.user
7
+docs
8
+exampleDataset
9
+pkgdown
10
+TFBS_selected
11
+TFBS_selected.zip
12
+_pkgdown.yml
13
+.Rhistory
14
+.RData
15
+GRaNIE.Rproj
16
+fixBugs_RELASE_GRaNIE.sh
17
+logo.png
... ...
@@ -1,6 +1,6 @@
1 1
 Package: GRaNIE
2 2
 Title: GRaNIE: Reconstruction cell type specific gene regulatory networks including enhancers using chromatin accessibility and RNA-seq data
3
-Version: 1.1.10
3
+Version: 1.1.12
4 4
 Encoding: UTF-8
5 5
 Authors@R: c(person("Christian", "Arnold", email =
6 6
         "chrarnold@web.de", role = c("cre","aut")),
... ...
@@ -27,28 +27,22 @@ Imports:
27 27
     RColorBrewer,
28 28
     ComplexHeatmap,
29 29
     DESeq2,
30
-    csaw,
31 30
     circlize,
32
-    robust,
33 31
     progress,
34 32
     utils,
35 33
     methods,
36 34
     stringr,
37 35
     scales,
38
-    BiocManager,
39
-    BiocParallel,
40 36
     igraph,
41 37
     S4Vectors,
42 38
     ggplot2,
43 39
     rlang,
44 40
     Biostrings,
45 41
     GenomeInfoDb,
46
-    IRanges,
47 42
     SummarizedExperiment,
48 43
     forcats,
49 44
     gridExtra,
50 45
     limma,
51
-    purrr,
52 46
     tidyselect,
53 47
     readr,
54 48
     grid,
... ...
@@ -60,12 +54,10 @@ Imports:
60 54
     magrittr,
61 55
     tibble,
62 56
     viridis,
63
-    BiocFileCache,
64
-    colorspace
57
+    colorspace,
58
+    biomaRt
65 59
 Depends:
66
-    R (>= 4.2.0),
67
-    tidyverse,
68
-    topGO
60
+    R (>= 4.2.0)
69 61
 Suggests:
70 62
     knitr,
71 63
     BSgenome.Hsapiens.UCSC.hg19,
... ...
@@ -79,13 +71,17 @@ Suggests:
79 71
     org.Hs.eg.db,
80 72
     org.Mm.eg.db,
81 73
     IHW,
82
-    biomaRt,
83 74
     clusterProfiler,
84 75
     ReactomePA,
85 76
     DOSE,
77
+    BiocFileCache,
86 78
     ChIPseeker,
87 79
     testthat (>= 3.0.0),
88
-    BiocStyle
80
+    BiocStyle,
81
+    csaw,
82
+    BiocParallel,
83
+    topGO,
84
+    robust
89 85
 VignetteBuilder: knitr
90 86
 biocViews: Software, GeneExpression, GeneRegulation, NetworkInference, GeneSetEnrichment, BiomedicalInformatics, Genetics, Transcriptomics, ATACSeq, RNASeq, GraphAndNetwork, Regression, Transcription, ChIPSeq
91 87
 License: Artistic-2.0
... ...
@@ -24,6 +24,7 @@ export(initializeGRN)
24 24
 export(loadExampleObject)
25 25
 export(nGenes)
26 26
 export(nPeaks)
27
+export(nTFs)
27 28
 export(overlapPeaksAndTFBS)
28 29
 export(performAllNetworkAnalyses)
29 30
 export(plotCommunitiesEnrichment)
... ...
@@ -36,8 +37,6 @@ export(plotPCA_all)
36 37
 export(plotTFEnrichment)
37 38
 export(plot_stats_connectionSummary)
38 39
 export(visualizeGRN)
39
-import(BiocFileCache)
40
-import(BiocManager)
41 40
 import(GenomicRanges)
42 41
 import(checkmate)
43 42
 import(ggplot2)
... ...
@@ -45,11 +44,11 @@ import(grDevices)
45 44
 import(graphics)
46 45
 import(patchwork)
47 46
 import(tibble)
48
-import(tidyverse)
49
-import(topGO)
50 47
 import(utils)
51 48
 importFrom(GenomicRanges,mcols)
52
-importFrom(IRanges,width)
49
+importFrom(Matrix,Matrix)
50
+importFrom(biomaRt,getBM)
51
+importFrom(biomaRt,useEnsembl)
53 52
 importFrom(circlize,colorRamp2)
54 53
 importFrom(grid,gpar)
55 54
 importFrom(magrittr,`%>%`)
... ...
@@ -1,3 +1,19 @@
1
+# GRaNIE 1.1.12 (2022-09-13)
2
+
3
+## Major changes
4
+
5
+- many Documentation and R help updates, Package Details Vignette is online
6
+- The workflow vignette is now improved: better figure resolution, figure aspect ratios are optimized, and a few other changes
7
+- the eGRN graph structure as built by `build_eGRN_graph()` in the `GRaNIE` object is now reset whenever the function `filterGRNAndConnectGenes()` is successfully executed to make sure that enrichment functions etc are not using an outdated graph structure. 
8
+- the landing page of the website has been extended and overhauled
9
+- removed some dependency packages and moved others into `Suggests` to lower the installation burden of the package. In addition, removed `topGO` from the `Depends` section (now in `Suggests`) and removed `tidyverse` altogether (before in `Depends`). Detailed explanations when and how the packages listed under `Suggests` are needed can now be found in the new [Package Details Vignette](https://grp-zaugg.embl-community.io/GRaNIE/articles/GRaNIE_packageDetails.html#installation).
10
+- new helper function `installSuggestedPackages()` to install all packages listed under `Suggests`
11
+
12
+## Minor changes
13
+
14
+- many small changes in the code, better argument checking, and preparing rigorous unit test inclusion
15
+- internally renaming the (recently changed / renamed) gene type `lncRNA` from `biomaRt` to `lincRNA` to be compatible with older versions of `GRaNIE`
16
+
1 17
 
2 18
 # GRaNIE 1.1.X (2022-05-31)
3 19
 
... ...
@@ -202,7 +202,7 @@ setMethod("show",
202 202
                 cat(" Communities (TF-gene):\n")
203 203
                 df = igraph::vertex.attributes(GRN@graph[["TF_gene"]]$graph)
204 204
                 if (!is.null(df) & "community" %in% names(df)) {
205
-                    communities = df %>% as.data.frame() %>% dplyr::count(community) %>% dplyr::arrange(desc(n))
205
+                    communities = df %>% as.data.frame() %>% dplyr::count(community) %>% dplyr::arrange(dplyr::desc(.data$n))
206 206
                     cat("  Communities, sorted by size (n = Number of nodes): ", paste0(communities$community, " (n=", communities$n, collapse = "), "), ")\n", sep = "")
207 207
                 } else {
208 208
                     cat("  None found\n")
... ...
@@ -241,11 +241,13 @@ setMethod("show",
241 241
 
242 242
 #' Get the number of peaks for a \code{\linkS4class{GRN}} object.
243 243
 #' 
244
-#' Return the number of peaks (all or only non-filtered ones) that are defined in the \code{\linkS4class{GRN}} object.
244
+#' Returns the number of peaks (all or only non-filtered ones) from the provided peak datain the \code{\linkS4class{GRN}} object.
245 245
 #'
246 246
 #' @template GRN
247 247
 #' @param filter TRUE or FALSE. Default TRUE. Should peaks marked as filtered be included in the count?
248 248
 #' @return Integer. Number of peaks that are defined in the \code{\linkS4class{GRN}} object, either by excluding (filter = TRUE) or including (filter = FALSE) peaks that are currently marked as \emph{filtered}.
249
+#' @seealso \code{\link{nTFs}}
250
+#' @seealso \code{\link{nGenes}}
249 251
 #' @examples
250 252
 #' # See the Workflow vignette on the GRaNIE website for examples
251 253
 #' GRN = loadExampleObject()
... ...
@@ -253,7 +255,6 @@ setMethod("show",
253 255
 #' nPeaks(GRN, filter = FALSE)
254 256
 #' @export
255 257
 #' @aliases peaks
256
-#' @rdname peaks-methods
257 258
 nPeaks <- function(GRN, filter = TRUE) {
258 259
   
259 260
   checkmate::assertClass(GRN, "GRN")
... ...
@@ -276,11 +277,13 @@ nPeaks <- function(GRN, filter = TRUE) {
276 277
 
277 278
 #' Get the number of genes for a \code{\linkS4class{GRN}} object.
278 279
 #' 
279
-#' Return the number of genes (all or only non-filtered ones) that are defined in the \code{\linkS4class{GRN}} object.
280
+#' Returns the number of genes (all or only non-filtered ones) from the provided RNA-seq data in the \code{\linkS4class{GRN}} object.
280 281
 #'
281 282
 #' @template GRN
282 283
 #' @param filter TRUE or FALSE. Default TRUE. Should genes marked as filtered be included in the count?
283 284
 #' @return Integer. Number of genes that are defined in the \code{\linkS4class{GRN}} object, either by excluding (filter = TRUE) or including (filter = FALSE) genes that are currently marked as \emph{filtered}.
285
+#' @seealso \code{\link{nTFs}}
286
+#' @seealso \code{\link{nPeaks}}
284 287
 #' @examples
285 288
 #' # See the Workflow vignette on the GRaNIE website for examples
286 289
 #' GRN = loadExampleObject()
... ...
@@ -288,7 +291,6 @@ nPeaks <- function(GRN, filter = TRUE) {
288 291
 #' nGenes(GRN, filter = FALSE)
289 292
 #' @export
290 293
 #' @aliases genes
291
-#' @rdname genes-methods
292 294
 nGenes <- function(GRN, filter = TRUE) {
293 295
   
294 296
   checkmate::assertClass(GRN, "GRN")
... ...
@@ -306,4 +308,32 @@ nGenes <- function(GRN, filter = TRUE) {
306 308
   
307 309
   nGenes
308 310
   
311
+}
312
+
313
+
314
+
315
+#' Get the number of TFs for a \code{\linkS4class{GRN}} object.
316
+#' 
317
+#' Returns the number of TFs  from the provided TFBS data in the \code{\linkS4class{GRN}} object.
318
+#'
319
+#' @template GRN
320
+#' @return Integer. Number of TFs that are defined in the \code{\linkS4class{GRN}} object.
321
+#' @seealso \code{\link{nGenes}}
322
+#' @seealso \code{\link{nPeaks}}
323
+#' @examples
324
+#' # See the Workflow vignette on the GRaNIE website for examples
325
+#' GRN = loadExampleObject()
326
+#' nTFs(GRN)
327
+#' @export
328
+#' @aliases TFs
329
+nTFs <- function(GRN) {
330
+    
331
+    checkmate::assertClass(GRN, "GRN")
332
+    
333
+    if (is.null(GRN@data$TFs$translationTable)) {
334
+        return(NA)
335
+    }
336
+    
337
+    nrow(GRN@data$TFs$translationTable)
338
+    
309 339
 }
310 340
\ No newline at end of file
... ...
@@ -1,24 +1,42 @@
1 1
 ######## Init GRN ########
2 2
 
3
-#' Initialize a \code{\linkS4class{GRN}} object
3
+#' Create and initialize a \code{\linkS4class{GRN}} object.
4
+#' 
5
+#' Executing this function is the very first step in the *GRaNIE* workflow. After its execution, data can be added to the object. 
6
+#' \strong{Depending on the genome assembly version, additional genome annotation packages are required, as follows:} 
7
+#' For \code{hg19} and \code{hg38},
8
+#' the packages \code{org.Hs.eg.db} as well as \code{BSgenome.Hsapiens.UCSC.hg19}+\code{TxDb.Hsapiens.UCSC.hg19.knownGene} or 
9
+#' \code{BSgenome.Hsapiens.UCSC.hg38}+\code{TxDb.Hsapiens.UCSC.hg38.knownGene} are required, respectively. 
10
+#' For \code{mm9} and \code{mm10},
11
+#' the packages \code{org.Mm.eg.db} as well as \code{BSgenome.Mmusculus.UCSC.mm9}+\code{TxDb.Mmusculus.UCSC.mm9.knownGene} or 
12
+#' \code{Mmusculus.UCSC.mm10}+\code{TxDb.Mmusculus.UCSC.mm10.knownGene} are required, respectively.
13
+#' For more information, see the error message if any of these packages is missing or the 
14
+#' \href{https://grp-zaugg.embl-community.io/GRaNIE/articles/GRaNIE_packageDetails.html#installation}{Package Details Vignette}.
4 15
 #' 
5 16
 #' @export
6
-#' @param objectMetadata List. Default \code{list()}. Optional (named) list with an arbitrary number of elements, all of which capture metadata for the object. This is mainly used to distinguish GRN objects from one another by storing object-specific metadata along with the data.
7
-#' @param outputFolder Output folder, either absolute or relative to the current working directory. No default. Default output folder where all pipeline output will be put unless specified otherwise. We recommend specifying an absolute path. Note that for Windows-based systems, the path must be correctly specified with "/" as path separator.
8
-#' @param genomeAssembly Character. No default. The genome assembly of all data that to be used within this object. Currently, supported genomes are: \code{hg19}, \code{hg38}, and \code{mm10}.
17
+#' @param objectMetadata List. Default \code{list()}. Optional (named) list with an arbitrary number of elements, all of which 
18
+#' capture metadata for the object. This is mainly used to distinguish GRN objects from one another by storing object-specific metadata along with the data.
19
+#' @param outputFolder Output folder, either absolute or relative to the current working directory. Default \code{"."}. 
20
+#' Default output folder where all pipeline output will be put unless specified otherwise. We recommend specifying an absolute path. 
21
+#' Note that for Windows-based systems, the path must be correctly specified with "/" as path separator.
22
+#' @param genomeAssembly Character. No default. The genome assembly of all data that to be used within this object. 
23
+#' Currently, supported genomes are: \code{hg19}, \code{hg38}, \code{mm9} and \code{mm10}. See function description for further information and notes.
9 24
 #' @return Empty \code{\linkS4class{GRN}} object
10 25
 #' @examples 
11 26
 #' meta.l = list(name = "exampleName", date = "01.03.22")
12 27
 #' GRN = initializeGRN(objectMetadata = meta.l, outputFolder = "output", genomeAssembly = "hg38")
13 28
 #' @export
14
-#' @import tidyverse
15 29
 #' @importFrom stats sd median cor cor.test quantile
16 30
 initializeGRN <- function(objectMetadata = list(),
17
-                          outputFolder, 
31
+                          outputFolder = ".", 
18 32
                           genomeAssembly) {
19 33
   
34
+  start = Sys.time()   
35
+    
20 36
   checkmate::assert(checkmate::checkNull(objectMetadata), checkmate::checkList(objectMetadata))
21
-  checkmate::assertSubset(genomeAssembly, c("hg19","hg38", "mm10"))
37
+  checkmate::assertChoice(genomeAssembly, c("hg19","hg38", "mm9", "mm10"))
38
+
39
+  .checkAndLoadPackagesGenomeAssembly(genomeAssembly)
22 40
   
23 41
   # Create the folder first if not yet existing
24 42
   if (!dir.exists(outputFolder)) {
... ...
@@ -51,8 +69,7 @@ initializeGRN <- function(objectMetadata = list(),
51 69
   packageName = utils::packageName()
52 70
   par.l$packageVersion = ifelse(is.null(packageName), NA, paste0(utils::packageVersion(packageName)[[1]], collapse = "."))
53 71
   par.l$genomeAssembly = genomeAssembly
54
-  
55
-  .checkAndLoadPackagesGenomeAssembly(genomeAssembly)
72
+
56 73
   
57 74
   # Make an internal subslot
58 75
   par.l$internal = list()
... ...
@@ -92,29 +109,47 @@ initializeGRN <- function(objectMetadata = list(),
92 109
   
93 110
   futile.logger::flog.info(paste0("Empty GRN object created successfully. Type the object name (e.g., GRN) to retrieve summary information about it at any time."))
94 111
   
112
+  .printExecutionTime(start, prefix = "")
95 113
   
96 114
   GRN
97 115
 }
98 116
 
99 117
 ######## Add and filter data ########
100 118
 
101
-#' Add data to a \code{\linkS4class{GRN}} object
119
+#' Add data to a \code{\linkS4class{GRN}} object.
120
+#' 
121
+#' This function adds both RNA and peak data to a \code{\linkS4class{GRN}} object, along with data normalization.
122
+#' In addition, and highly recommended, sample metadata can be optionally provided.
123
+#' 
124
+#' If the \code{ChIPseeker} package is installed, additional peak annotation is provided in the annotation slot and a peak annotation QC plot is produced as part of peak-gene QC.
125
+#' This is fully optional, however, and has no consequences for downstream functions.
102 126
 #' 
103 127
 #' @export
104 128
 #' @template GRN 
105
-#' @param counts_peaks Data frame. No default. Counts for the peaks, with raw or normalized counts for each peak (rows) across all samples (columns). In addition to the count data, it must also contain one ID column with a particular format, see the argument \code{idColumn_peaks} below. Row names are ignored, column names must be set to the sample names and must match those from the RNA counts and the sample metadata table.
106
-#' @param normalization_peaks Character. Default \code{DESeq_sizeFactor}. Normalization procedure for peak data. Must be one of \code{DESeq_sizeFactor}, \code{none}, or \code{quantile}.
107
-#' @param idColumn_peaks Character. Default \code{peakID}. Name of the column in the counts_peaks data frame that contains peak IDs. The required format must be {chr}:{start}-{end}", with {chr} denoting the abbreviated chromosome name, and {start} and {end} the begin and end of the peak coordinates, respectively. End must be bigger than start. Examples for valid peak IDs are \code{chr1:400-800} or \code{chrX:20-25}.
108
-#' @param counts_rna Data frame. No default. Counts for the RNA-seq data, with raw or normalized counts for each gene (rows) across all samples (columns). In addition to the count data, it must also contain one ID column with a particular format, see the argument \code{idColumn_rna} below. Row names are ignored, column names must be set to the sample names and must match those from the RNA counts and the sample metadata table.
109
-#' @param normalization_rna Character. Default \code{quantile}. Normalization procedure for peak data. Must be one of "DESeq_sizeFactor", "none", or "quantile"
129
+#' @param counts_peaks Data frame. No default. Counts for the peaks, with raw or normalized counts for each peak (rows) across all samples (columns). 
130
+#' In addition to the count data, it must also contain one ID column with a particular format, see the argument \code{idColumn_peaks} below. 
131
+#' Row names are ignored, column names must be set to the sample names and must match those from the RNA counts and the sample metadata table.
132
+#' @param normalization_peaks Character. Default \code{DESeq_sizeFactor}. 
133
+#' Normalization procedure for peak data. Must be one of \code{DESeq_sizeFactor}, \code{none}, or \code{quantile}.
134
+#' @param idColumn_peaks Character. Default \code{peakID}. Name of the column in the counts_peaks data frame that contains peak IDs. 
135
+#' The required format must be {chr}:{start}-{end}", with {chr} denoting the abbreviated chromosome name, and {start} and {end} the begin and end 
136
+#' of the peak coordinates, respectively. End must be bigger than start. Examples for valid peak IDs are \code{chr1:400-800} or \code{chrX:20-25}.
137
+#' @param counts_rna Data frame. No default. Counts for the RNA-seq data, with raw or normalized counts for each gene (rows) across all samples (columns). 
138
+#' In addition to the count data, it must also contain one ID column with a particular format, see the argument \code{idColumn_rna} below. 
139
+#' Row names are ignored, column names must be set to the sample names and must match those from the RNA counts and the sample metadata table.
140
+#' @param normalization_rna Character. Default \code{quantile}. Normalization procedure for peak data. 
141
+#' Must be one of "DESeq_sizeFactor", "none", or "quantile". For "quantile", \code{limma::normalizeQuantiles} is used for normalization.
110 142
 #' @param idColumn_RNA Character. Default \code{ENSEMBL}. Name of the column in the \code{counts_rna} data frame that contains Ensembl IDs.
111
-#' @param sampleMetadata Data frame. Default \code{NULL}. Optional, additional metadata for the samples, such as age, sex, gender etc. If provided, the @seealso [plotPCA_all()] function can then incorporate and plot it. Sample names must match with those from both peak and RNA-Seq data. The first column is expected to contain the sample IDs, the actual column name is irrelevant.
112
-#' @param allowOverlappingPeaks \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should overlapping peaks be allowed (then only a warning is issued when overlapping peaks are found) or (the default) should an error be raised?
143
+#' @param sampleMetadata Data frame. Default \code{NULL}. Optional, additional metadata for the samples, such as age, sex, gender etc. 
144
+#' If provided, the @seealso [plotPCA_all()] function can then incorporate and plot it. Sample names must match with those from both peak and RNA-Seq data. The first column is expected to contain the sample IDs, the actual column name is irrelevant.
145
+#' @param allowOverlappingPeaks \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should overlapping peaks be allowed (then only a warning is issued 
146
+#' when overlapping peaks are found) or (the default) should an error be raised?
113 147
 #' @template forceRerun
114
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function.
148
+#' @return An updated \code{\linkS4class{GRN}} object, with added data from this function (e.g., slots \code{GRN@data$peaks} and \code{GRN@data$RNA})
149
+#' @seealso \code{\link{plotPCA_all}}
115 150
 #' @examples 
116 151
 #' # See the Workflow vignette on the GRaNIE website for examples
117
-#' # library(tidyverse)
152
+#' # library(readr)
118 153
 #' # rna.df   = read_tsv("https://www.embl.de/download/zaugg/GRaNIE/rna.tsv.gz")
119 154
 #' # peaks.df = read_tsv("https://www.embl.de/download/zaugg/GRaNIE/peaks.tsv.gz")
120 155
 #' # meta.df  = read_tsv("https://www.embl.de/download/zaugg/GRaNIE/sampleMetadata.tsv.gz")
... ...
@@ -127,15 +162,18 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
127 162
                     allowOverlappingPeaks= FALSE,
128 163
                     forceRerun = FALSE) {
129 164
   
130
-  GRN = .addFunctionLogToObject(GRN)      
165
+  start = Sys.time()
131 166
   
132 167
   checkmate::assertClass(GRN, "GRN")
168
+  GRN = .addFunctionLogToObject(GRN)      
169
+  
133 170
   checkmate::assertDataFrame(counts_peaks, min.rows = 1, min.cols = 2)
134 171
   checkmate::assertDataFrame(counts_rna, min.rows = 1, min.cols = 2)
135 172
   checkmate::assertCharacter(idColumn_peaks, min.chars = 1, len = 1)
136 173
   checkmate::assertCharacter(idColumn_RNA, min.chars = 1, len = 1)
137 174
   checkmate::assertChoice(normalization_peaks, c("none", "DESeq_sizeFactor", "quantile"))
138 175
   checkmate::assertChoice(normalization_rna, c("none", "DESeq_sizeFactor", "quantile"))  
176
+  checkmate::assertFlag(allowOverlappingPeaks)
139 177
   checkmate::assertFlag(forceRerun)
140 178
   
141 179
   if (is.null(GRN@data$peaks$counts_norm) |
... ...
@@ -328,6 +366,8 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
328 366
   # Add gene annotation once
329 367
   GRN = .populateGeneAnnotation(GRN)
330 368
   
369
+  .printExecutionTime(start, prefix = "")
370
+  
331 371
   GRN
332 372
   
333 373
 }
... ...
@@ -369,10 +409,10 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
369 409
   paste0(ids_chr, ":", format(as.integer(start), scientific = FALSE), "-", format(as.integer(end), scientific= FALSE))
370 410
 }
371 411
 
372
-
412
+#' @importFrom biomaRt useEnsembl getBM
373 413
 .retrieveAnnotationData <- function(genomeAssembly) {
374 414
     
375
-    futile.logger::flog.info(paste0("Retrieving genome annotation data from biomaRt... This may take a while"))
415
+    futile.logger::flog.info(paste0("Retrieving genome annotation data from biomaRt for ", genomeAssembly, "... This may take a while"))
376 416
     
377 417
     params.l = .getBiomartParameters(genomeAssembly)
378 418
     
... ...
@@ -407,7 +447,9 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
407 447
         dplyr::mutate(chromosome_name = paste0("chr", chromosome_name)) %>%
408 448
         dplyr::rename(gene.chr = chromosome_name, gene.start = start_position, gene.end = end_position, 
409 449
                       gene.strand = strand, gene.ENSEMBL = ensembl_gene_id, gene.type = gene_biotype, gene.name = external_gene_name) %>%
450
+        tidyr::replace_na(list(gene.type = "unknown")) %>%
410 451
         dplyr::mutate_if(is.character, as.factor) %>%
452
+        dplyr::mutate(gene.type = dplyr::recode_factor(gene.type, lncRNA = "lincRNA")) %>%  # there seems to be a name change from lincRNA -> lncRNA, lets change it here 
411 453
         dplyr::mutate(gene.strand = factor(gene.strand, levels = c(1,-1), labels = c("+", "-")))
412 454
     
413 455
 }
... ...
@@ -458,11 +500,11 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
458 500
                                   peak.CV = CV_peaks)
459 501
   
460 502
   GRN@annotation$consensusPeaks = metadata_peaks
461
-  
503
+
462 504
   if (!is.installed("ChIPseeker")) {
463
-    message = paste0("The package ChIPseeker is currently not installed, which is needed for additional peak annotation that can be useful for further downstream analyses. ", 
464
-                     " You may want to install it and re-run this function. However, this is optional and except for some missing additional annotation columns, there is no limitation.")
465
-    .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)
505
+      packageMessage = paste0("The package ChIPseeker is currently not installed, which is needed for additional peak annotation that can be useful for further downstream analyses. ", 
506
+                              " You may want to install it and re-run this function. However, this is optional and except for some missing additional annotation columns, there is no limitation.")
507
+      .checkPackageInstallation("ChIPseeker", packageMessage, isWarning = TRUE)
466 508
   } else {
467 509
     
468 510
     futile.logger::flog.info(paste0(" Retrieve peak annotation using ChipSeeker. This may take a while"))
... ...
@@ -545,7 +587,8 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
545 587
                                 gene.mean = rowMeans_rna, 
546 588
                                 gene.median = rowMedians_rna, 
547 589
                                 gene.CV = CV_rna) %>%
548
-    dplyr::full_join(genomeAnnotation.df, by = c("gene.ENSEMBL"))
590
+    dplyr::full_join(genomeAnnotation.df, by = c("gene.ENSEMBL")) %>%
591
+    dplyr::mutate(gene.type = forcats::fct_explicit_na(gene.type, na_level = "unknown/missing"))
549 592
   
550 593
   GRN@annotation$genes = metadata_rna
551 594
   
... ...
@@ -560,7 +603,6 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
560 603
   
561 604
 }
562 605
 
563
-#' @importFrom IRanges width
564 606
 #' @importFrom rlang .data `:=`
565 607
 .calcGCContentPeaks <- function(GRN) {
566 608
   
... ...
@@ -582,7 +624,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
582 624
   
583 625
   GC_content.df = GC_content.df %>%
584 626
     tibble::as_tibble() %>%
585
-    dplyr::mutate(length = IRanges::width(query),
627
+    dplyr::mutate(length = GenomicRanges::width(query),
586 628
                   peak.ID = query$peakID,
587 629
                   GC_class = cut(`G|C`, breaks = seq(0,1,0.1), include.lowest = TRUE, ordered_result = TRUE))
588 630
   
... ...
@@ -611,22 +653,22 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
611 653
   GRN
612 654
 }
613 655
 
614
-#' Filter data from a \code{\linkS4class{GRN}} object
656
+#' Filter RNA-seq and/or peak data from a \code{\linkS4class{GRN}} object
615 657
 #' 
616 658
 #' @template GRN 
617
-#' @param minNormalizedMean_peaks Numeric or \code{NULL}. Default 5. Minimum mean across all samples for a peak to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter.
618
-#' @param maxNormalizedMean_peaks Numeric or \code{NULL}. Default \code{NULL}. Maximum mean across all samples for a peak to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter.
619
-#' @param minNormalizedMeanRNA Numeric or \code{NULL}. Default 5. Minimum mean across all samples for a gene to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter.
620
-#' @param maxNormalizedMeanRNA Numeric or \code{NULL}. Default \code{NULL}. Maximum mean across all samples for a gene to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter.
659
+#' @param minNormalizedMean_peaks Numeric[0,] or \code{NULL}. Default 5. Minimum mean across all samples for a peak to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter.
660
+#' @param maxNormalizedMean_peaks Numeric[0,] or \code{NULL}. Default \code{NULL}. Maximum mean across all samples for a peak to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter.
661
+#' @param minNormalizedMeanRNA Numeric[0,] or \code{NULL}. Default 5. Minimum mean across all samples for a gene to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter.
662
+#' @param maxNormalizedMeanRNA Numeric[0,] or \code{NULL}. Default \code{NULL}. Maximum mean across all samples for a gene to be retained for the normalized counts table. Set to \code{NULL} for not applying the filter.
621 663
 #' @param chrToKeep_peaks Character vector or \code{NULL}. Default \code{NULL}. Vector of chromosomes that peaks are allowed to come from. This filter can be used to filter sex chromosomes from the peaks, for example (e.g, \code{c(paste0("chr", 1:22), "chrX", "chrY")})
622
-#' @param minSize_peaks Integer or \code{NULL}. Default \code{NULL}. Minimum peak size (width, end - start) for a peak to be retained. Set to \code{NULL} for not applying the filter.
623
-#' @param maxSize_peaks Integer or \code{NULL}. Default 10000. Maximum peak size (width, end - start) for a peak to be retained. Set to \code{NULL} for not applying the filter.
624
-#' @param minCV_peaks Numeric or \code{NULL}. Default \code{NULL}. Minimum CV (coefficient of variation, a unitless measure of variation) for a peak to be retained. Set to \code{NULL} for not applying the filter.
625
-#' @param maxCV_peaks Numeric or \code{NULL}. Default \code{NULL}. Maximum CV (coefficient of variation, a unitless measure of variation) for a peak to be retained. Set to \code{NULL} for not applying the filter.
626
-#' @param minCV_genes Numeric or \code{NULL}. Default \code{NULL}. Minimum CV (coefficient of variation, a unitless measure of variation) for a gene to be retained. Set to \code{NULL} for not applying the filter.
627
-#' @param maxCV_genes Numeric or \code{NULL}. Default \code{NULL}. Maximum CV (coefficient of variation, a unitless measure of variation) for a gene to be retained. Set to \code{NULL} for not applying the filter.
664
+#' @param minSize_peaks Integer[1,] or \code{NULL}. Default \code{NULL}. Minimum peak size (width, end - start) for a peak to be retained. Set to \code{NULL} for not applying the filter.
665
+#' @param maxSize_peaks Integer[1,] or \code{NULL}. Default 10000. Maximum peak size (width, end - start) for a peak to be retained. Set to \code{NULL} for not applying the filter.
666
+#' @param minCV_peaks Numeric[0,] or \code{NULL}. Default \code{NULL}. Minimum CV (coefficient of variation, a unitless measure of variation) for a peak to be retained. Set to \code{NULL} for not applying the filter.
667
+#' @param maxCV_peaks Numeric[0,] or \code{NULL}. Default \code{NULL}. Maximum CV (coefficient of variation, a unitless measure of variation) for a peak to be retained. Set to \code{NULL} for not applying the filter.
668
+#' @param minCV_genes Numeric[0,] or \code{NULL}. Default \code{NULL}. Minimum CV (coefficient of variation, a unitless measure of variation) for a gene to be retained. Set to \code{NULL} for not applying the filter.
669
+#' @param maxCV_genes Numeric[0,] or \code{NULL}. Default \code{NULL}. Maximum CV (coefficient of variation, a unitless measure of variation) for a gene to be retained. Set to \code{NULL} for not applying the filter.
628 670
 #' @template forceRerun
629
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function.
671
+#' @return An updated \code{\linkS4class{GRN}} object, with added data from this function.
630 672
 #' @examples 
631 673
 #' # See the Workflow vignette on the GRaNIE website for examples
632 674
 #' GRN = loadExampleObject()
... ...
@@ -640,9 +682,12 @@ filterData <- function (GRN,
640 682
                         minCV_peaks = NULL, maxCV_peaks = NULL,
641 683
                         minCV_genes = NULL, maxCV_genes = NULL,
642 684
                         forceRerun = FALSE) {
643
-  GRN = .addFunctionLogToObject(GRN) 
644 685
   
686
+  start = Sys.time()
687
+    
645 688
   checkmate::assertClass(GRN, "GRN")
689
+  GRN = .addFunctionLogToObject(GRN) 
690
+
646 691
   checkmate::assertNumber(minNormalizedMean_peaks, lower = 0, null.ok = TRUE)
647 692
   checkmate::assertNumber(minNormalizedMeanRNA, lower = 0, null.ok = TRUE)
648 693
   checkmate::assertNumber(maxNormalizedMean_peaks, lower = minNormalizedMean_peaks , null.ok = TRUE)
... ...
@@ -717,6 +762,8 @@ filterData <- function (GRN,
717 762
     GRN@config$isFiltered = TRUE
718 763
   } 
719 764
   
765
+  .printExecutionTime(start, prefix = "")
766
+  
720 767
   GRN
721 768
 }
722 769
 
... ...
@@ -887,17 +934,18 @@ filterData <- function (GRN,
887 934
 #' @param TFs Character vector. Default \code{all}. Vector of TF names to include. The special keyword \code{all} can be used to include all TF found in the folder as specified by \code{motifFolder}. If \code{all} is specified anywhere, all TFs will be included. TF names must otherwise match the file names that are found in the folder, without the file suffix.
888 935
 #' @param filesTFBSPattern Character. Default \code{"_TFBS"}. Suffix for the file names in the TFBS folder that is not part of the TF name. Can be empty. For example, for the TF CTCF, if the file is called \code{CTCF.all.TFBS.bed}, set this parameter to \code{".all.TFBS"}.
889 936
 #' @param fileEnding Character. Default \code{".bed"}. File ending for the files from the motif folder.
890
-#' @param nTFMax \code{NULL} or integer. Default \code{NULL}. Maximal number of TFs to import. Can be used for testing purposes, e.g., setting to 5 only imports 5 TFs even though the whole \code{motifFolder} has many more TFs defined.
937
+#' @param nTFMax \code{NULL} or Integer[1,]. Default \code{NULL}. Maximal number of TFs to import. Can be used for testing purposes, e.g., setting to 5 only imports 5 TFs even though the whole \code{motifFolder} has many more TFs defined.
891 938
 #' @template forceRerun
892
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function.
939
+#' @return An updated \code{\linkS4class{GRN}} object, with additional information added from this function (\code{GRN@data$TFs$translationTable} in particular)
893 940
 #' @examples 
894 941
 #' # See the Workflow vignette on the GRaNIE website for examples
895 942
 #' @export
896 943
 addTFBS <- function(GRN, motifFolder, TFs = "all", nTFMax = NULL, filesTFBSPattern = "_TFBS", fileEnding = ".bed", forceRerun = FALSE) {
897 944
   
898
-  GRN = .addFunctionLogToObject(GRN)
899
-  
945
+  start = Sys.time()
900 946
   checkmate::assertClass(GRN, "GRN")
947
+  GRN = .addFunctionLogToObject(GRN)
948
+
901 949
   checkmate::assertDirectoryExists(motifFolder)
902 950
   checkmate::assertCharacter(TFs, min.len = 1)
903 951
   checkmate::assert(checkmate::testNull(nTFMax), checkmate::testIntegerish(nTFMax, lower = 1))
... ...
@@ -927,6 +975,8 @@ addTFBS <- function(GRN, motifFolder, TFs = "all", nTFMax = NULL, filesTFBSPatte
927 975
     
928 976
   } 
929 977
   
978
+  .printExecutionTime(start, prefix = "")
979
+  
930 980
   GRN
931 981
   
932 982
 }
... ...
@@ -1006,7 +1056,7 @@ addTFBS <- function(GRN, motifFolder, TFs = "all", nTFMax = NULL, filesTFBSPatte
1006 1056
 #' @template GRN
1007 1057
 #' @template nCores
1008 1058
 #' @template forceRerun
1009
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function. 
1059
+#' @return An updated \code{\linkS4class{GRN}} object, with added data from this function (\code{GRN@data$TFs$TF_peak_overlap} in particular)
1010 1060
 #' @examples 
1011 1061
 #' # See the Workflow vignette on the GRaNIE website for examples
1012 1062
 #' GRN = loadExampleObject()
... ...
@@ -1014,9 +1064,11 @@ addTFBS <- function(GRN, motifFolder, TFs = "all", nTFMax = NULL, filesTFBSPatte
1014 1064
 #' @export
1015 1065
 overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) {
1016 1066
   
1067
+  start = Sys.time()
1068
+    
1069
+  checkmate::assertClass(GRN, "GRN")
1017 1070
   GRN = .addFunctionLogToObject(GRN)
1018 1071
   
1019
-  checkmate::assertClass(GRN, "GRN")
1020 1072
   checkmate::assertIntegerish(nCores, lower = 1)
1021 1073
   checkmate::assertFlag(forceRerun)
1022 1074
   
... ...
@@ -1092,6 +1144,8 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) {
1092 1144
     
1093 1145
   } 
1094 1146
   
1147
+  .printExecutionTime(start, prefix = "")
1148
+  
1095 1149
   GRN
1096 1150
 }
1097 1151
 
... ...
@@ -1140,7 +1194,7 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) {
1140 1194
     dplyr::group_by(peakID) %>%
1141 1195
     dplyr::slice(which.min(distance)) %>%
1142 1196
     #arrange(distance, .by_group = TRUE) %>%
1143
-    # top_n(n = 2, desc(distance)) %>%
1197
+    # top_n(n = 2, dplyr::desc(distance)) %>%
1144 1198
     dplyr::ungroup()
1145 1199
   
1146 1200
   futile.logger::flog.info(paste0(" Calculating intersection for TF ", TFCur, " finished. Number of overlapping TFBS after filtering: ", nrow(final.df)))
... ...
@@ -1226,7 +1280,7 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) {
1226 1280
   # Reorder to make sure the order is the same. Due to the duplication ID issue, the number of columns may increase after the column selection
1227 1281
   
1228 1282
   # Some columns may be removed here due to zero standard deviation
1229
-  HOCOMOCO_mapping.exp.filt = HOCOMOCO_mapping.exp %>% dplyr::filter(ENSEMBL %in% colnames(sort.cor.m))
1283
+  HOCOMOCO_mapping.exp.filt = HOCOMOCO_mapping.exp %>% dplyr::filter(.data$ENSEMBL %in% colnames(sort.cor.m))
1230 1284
   
1231 1285
   sort.cor.m = sort.cor.m[,as.character(HOCOMOCO_mapping.exp.filt$ENSEMBL)] 
1232 1286
   colnames(sort.cor.m) = as.character(HOCOMOCO_mapping.exp.filt$HOCOID)
... ...
@@ -1239,8 +1293,8 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) {
1239 1293
   
1240 1294
   peak_TF_overlapCur.df = .asMatrixFromSparse(GRN@data$TFs$TF_peak_overlap, convertZero_to_NA = FALSE) %>% 
1241 1295
     tibble::as_tibble() %>%
1242
-    dplyr::filter(!isFiltered) %>% 
1243
-    dplyr::select(-isFiltered) 
1296
+    dplyr::filter(!.data$isFiltered) %>% 
1297
+    dplyr::select(-.data$isFiltered) 
1244 1298
   
1245 1299
   if (perm > 0 & shuffle) {
1246 1300
     peak_TF_overlapCur.df = .shuffleRowsPerColumn(peak_TF_overlapCur.df)
... ...
@@ -1257,18 +1311,22 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) {
1257 1311
 
1258 1312
 #' Add TF activity data to GRN object using a simplified procedure for estimating it. EXPERIMENTAL.
1259 1313
 #' 
1314
+#' We do not yet provide full support for this function. It is currently being tested. Use at our own risk.
1315
+#' 
1260 1316
 #' @template GRN
1261 1317
 #' @template normalization_TFActivity
1262 1318
 #' @template name_TFActivity
1263 1319
 #' @template forceRerun
1264
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function. 
1320
+#' @return An updated \code{\linkS4class{GRN}} object, with added data from this function
1321
+#' (\code{GRN@data$TFs[[name]]} in particular, with \code{name} referring to the value of tje \code{name} parameter) 
1265 1322
 
1266 1323
 addData_TFActivity <- function(GRN, normalization = "cyclicLoess", name = "TF_activity", forceRerun = FALSE) {
1267 1324
   
1325
+  checkmate::assertClass(GRN, "GRN")
1268 1326
   GRN = .addFunctionLogToObject(GRN)
1269 1327
   start = Sys.time()
1270 1328
   
1271
-  checkmate::assertClass(GRN, "GRN")
1329
+  
1272 1330
   checkmate::assertChoice(normalization, c("cyclicLoess", "sizeFactors", "quantile", "none"))
1273 1331
   checkmate::assertCharacter(name, min.chars = 1, len = 1)
1274 1332
   checkmate::assertFlag(forceRerun)
... ...
@@ -1365,6 +1423,11 @@ addData_TFActivity <- function(GRN, normalization = "cyclicLoess", name = "TF_ac
1365 1423
   if (normalization == "cyclicLoess") {
1366 1424
     
1367 1425
     futile.logger::flog.info(paste0(" Normalizing data using cyclic LOESS"))
1426
+      
1427
+      
1428
+    packageMessage = paste0("The package csaw is not installed but required for the cyclic LOESS normalization. Please install it and re-run this function or change the normalization type (if possible).")
1429
+    .checkPackageInstallation("csaw", packageMessage)   
1430
+      
1368 1431
     # Perform a cyclic loess normalization
1369 1432
     # We use a slighlty more complicated setup to derive size factors for library normalization
1370 1433
     # Instead of just determining the size factors in DeSeq2 via cirtual samples, we use 
... ...
@@ -1413,6 +1476,8 @@ addData_TFActivity <- function(GRN, normalization = "cyclicLoess", name = "TF_ac
1413 1476
 
1414 1477
 #' Import externally derived TF Activity data. EXPERIMENTAL.
1415 1478
 #' 
1479
+#' We do not yet provide full support for this function. It is currently being tested. Use at our own risk.
1480
+#' 
1416 1481
 #' @template GRN
1417 1482
 #' @param data Data frame. No default. Data with TF data.
1418 1483
 #' @template name_TFActivity
... ...
@@ -1420,18 +1485,19 @@ addData_TFActivity <- function(GRN, normalization = "cyclicLoess", name = "TF_ac
1420 1485
 #' @param nameColumn Character. Default \code{TF.name}. Must be unique for each TF / row.
1421 1486
 #' @template normalization_TFActivity
1422 1487
 #' @template forceRerun
1423
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function.  
1488
+#' @return An updated \code{\linkS4class{GRN}} object, with added data from this function.  
1424 1489
 importTFData <- function(GRN, data, name, idColumn = "ENSEMBL", nameColumn = "TF.name", normalization = "none", forceRerun = FALSE) {
1425 1490
   
1491
+  checkmate::assertClass(GRN, "GRN")
1426 1492
   GRN = .addFunctionLogToObject(GRN)
1427 1493
   
1428 1494
   start = Sys.time()
1429 1495
   
1430
-  checkmate::assertClass(GRN, "GRN")
1496
+  
1431 1497
   checkmate::assertDataFrame(data, min.cols = 2, min.rows = 1)
1432
-  checkmate::assertSubset(idColumn, colnames(data))
1433
-  checkmate::assertSubset(nameColumn, colnames(data))
1434
-  checkmate::assertSubset(GRN@config$sharedSamples, colnames(data))
1498
+  checkmate::assertChoice(idColumn, colnames(data))
1499
+  checkmate::assertChoice(nameColumn, colnames(data))
1500
+  checkmate::assertSubset(GRN@config$sharedSamples, colnames(data), empty.ok = FALSE)
1435 1501
   checkmate::assertCharacter(name, min.chars = 1, any.missing = FALSE, len = 1)
1436 1502
   checkmate::assertChoice(normalization, c("cyclicLoess", "sizeFactors", "quantile", "none"))
1437 1503
   
... ...
@@ -1482,7 +1548,7 @@ importTFData <- function(GRN, data, name, idColumn = "ENSEMBL", nameColumn = "TF
1482 1548
     countsNorm$ENSEMBL = data$ENSEMBL
1483 1549
     
1484 1550
     nRowBefore = nrow(countsNorm)
1485
-    countsNorm.subset = dplyr::filter(countsNorm, ENSEMBL %in% GRN@data$TFs$translationTable$ENSEMBL)
1551
+    countsNorm.subset = dplyr::filter(countsNorm, .data$ENSEMBL %in% GRN@data$TFs$translationTable$ENSEMBL)
1486 1552
     nRowAfter = nrow(countsNorm.subset)
1487 1553
     if (nRowAfter == 0) {
1488 1554
       message = "No rows overlapping with translation table, check ENSEMBL IDs."
... ...
@@ -1535,14 +1601,14 @@ importTFData <- function(GRN, data, name, idColumn = "ENSEMBL", nameColumn = "TF
1535 1601
 #' Run the activator-repressor classification for the TFs for a \code{\linkS4class{GRN}} object
1536 1602
 #' 
1537 1603
 #' @template GRN
1538
-#' @param significanceThreshold_Wilcoxon Numeric between 0 and 1. Default 0.05. Significance threshold for Wilcoxon test that is run in the end for the final classification. See the Vignette and *diffTF* paper for details.
1539
-#' @param plot_minNoTFBS_heatmap Integer. Default 100. Minimum number of TFBS for a TF to be included in the heatmap that is part of the output of this function.
1604
+#' @param significanceThreshold_Wilcoxon Numeric[0,1]. Default 0.05. Significance threshold for Wilcoxon test that is run in the end for the final classification. See the Vignette and *diffTF* paper for details.
1605
+#' @param plot_minNoTFBS_heatmap Integer[1,]. Default 100. Minimum number of TFBS for a TF to be included in the heatmap that is part of the output of this function.
1540 1606
 #' @param deleteIntermediateData \code{TRUE} or \code{FALSE}.  Default \code{TRUE}. Should intermediate data be deleted before returning the object after a successful run? Due to the size of the produced intermediate data, we recommend setting this to \code{TRUE}, but if memory or object size are not an issue, the information can also be kept.
1541 1607
 #' @template plotDiagnosticPlots
1542 1608
 #' @template outputFolder
1543 1609
 #' @template corMethod
1544 1610
 #' @template forceRerun
1545
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function.  
1611
+#' @return An updated \code{\linkS4class{GRN}} object, with additional information added from this function. 
1546 1612
 #' @examples 
1547 1613
 #' # See the Workflow vignette on the GRaNIE website for examples
1548 1614
 #' # GRN = loadExampleObject()
... ...
@@ -1554,9 +1620,12 @@ AR_classification_wrapper<- function (GRN, significanceThreshold_Wilcoxon = 0.05
1554 1620
                                       corMethod = "pearson",
1555 1621
                                       forceRerun = FALSE) {
1556 1622
   
1623
+  start = Sys.time()
1624
+    
1625
+  checkmate::assertClass(GRN, "GRN")
1557 1626
   GRN = .addFunctionLogToObject(GRN)
1558 1627
   
1559
-  checkmate::assertClass(GRN, "GRN")
1628
+  
1560 1629
   checkmate::assertNumber(significanceThreshold_Wilcoxon, lower = 0, upper = 1)
1561 1630
   checkmate::assertNumber(plot_minNoTFBS_heatmap, lower = 1)
1562 1631
   checkmate::assertFlag(deleteIntermediateData)
... ...
@@ -1730,6 +1799,7 @@ AR_classification_wrapper<- function (GRN, significanceThreshold_Wilcoxon = 0.05
1730 1799
     
1731 1800
   } # end of for each connection type
1732 1801
   
1802
+  .printExecutionTime(start, prefix = "")
1733 1803
   
1734 1804
   GRN
1735 1805
   
... ...
@@ -1753,6 +1823,8 @@ AR_classification_wrapper<- function (GRN, significanceThreshold_Wilcoxon = 0.05
1753 1823
 
1754 1824
 #' Add TF-peak connections to a \code{\linkS4class{GRN}} object
1755 1825
 #' 
1826
+#' After the execution of this function, QC plots can be plotted with the function \code{\link{plotDiagnosticPlots_TFPeaks}} unless this has already been done by default due to \code{plotDiagnosticPlots = TRUE}
1827
+#' 
1756 1828
 #' @template GRN 
1757 1829
 #' @template plotDiagnosticPlots
1758 1830
 #' @template plotDetails
... ...
@@ -1760,13 +1832,14 @@ AR_classification_wrapper<- function (GRN, significanceThreshold_Wilcoxon = 0.05
1760 1832
 #' @template corMethod
1761 1833
 #' @param connectionTypes Character vector. Default \code{expression}. Vector of connection types to include for the TF-peak connections. If an additional connection type is specified here, it has to be available already within the object (EXPERIMENTAL). See the function \code{\link{addData_TFActivity}} for details.
1762 1834
 #' @param removeNegativeCorrelation  Vector of \code{TRUE} or \code{FALSE}. Default \code{FALSE}. EXPERIMENTAL. Must be a logical vector of the same length as the parameter \code{connectionType}. Should negatively correlated TF-peak connections be removed for the specific connection type? For connection type expression, the default is \code{FALSE}, while for any TF Activity related connection type, we recommend setting this to \code{TRUE}.  
1763
-#' @param maxFDRToStore Numeric. Default 0.3. Maximum TF-peak FDR value to permanently store a particular TF-peak connection in the object? This parameter has a large influence on the overall memory size of the object, and we recommend not storing connections with a high FDR due to their sheer number.
1835
+#' @param maxFDRToStore Numeric[0,1]. Default 0.3. Maximum TF-peak FDR value to permanently store a particular TF-peak connection in the object? This parameter has a large influence on the overall memory size of the object, and we recommend not storing connections with a high FDR due to their sheer number.
1764 1836
 #' @param addForPermuted \code{TRUE} or \code{FALSE}.  Default \code{FALSE}. Add connections also for permuted data. Leave at \code{TRUE} unless you know what you are doing.
1765 1837
 #' @param useGCCorrection \code{TRUE} or \code{FALSE}.  Default \code{FALSE}. EXPERIMENTAL. Should a GC-matched background be used when calculating FDRs?
1766
-#' @param percBackground_size Numeric (0 to 100). Default 75. EXPERIMENTAL. Description will follow. Only relevant if \code{useGCCorrection} is set to \code{TRUE}, ignored otherwise.
1838
+#' @param percBackground_size Numeric[0,100]. Default 75. EXPERIMENTAL. Description will follow. Only relevant if \code{useGCCorrection} is set to \code{TRUE}, ignored otherwise.
1767 1839
 #' @param percBackground_resample \code{TRUE} or \code{FALSE}.  Default \code{TRUE}. EXPERIMENTAL. Should resampling be enabled for those GC bins for which not enough background peaks are available?. Only relevant if \code{useGCCorrection} is set to \code{TRUE}, ignored otherwise.
1768 1840
 #' @template forceRerun
1769
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function.
1841
+#' @seealso \code{\link{plotDiagnosticPlots_TFPeaks}}
1842
+#' @return An updated \code{\linkS4class{GRN}} object, with additional information added from this function. 
1770 1843
 #' @examples 
1771 1844
 #' # See the Workflow vignette on the GRaNIE website for examples
1772 1845
 #' GRN = loadExampleObject()
... ...
@@ -1781,16 +1854,18 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
1781 1854
                                     useGCCorrection = FALSE, percBackground_size = 75, percBackground_resample = TRUE,
1782 1855
                                     forceRerun = FALSE) {
1783 1856
   
1784
-  GRN = .addFunctionLogToObject(GRN)
1785
-  
1857
+  start = Sys.time()
1858
+
1786 1859
   checkmate::assertClass(GRN, "GRN")
1860
+  GRN = .addFunctionLogToObject(GRN)
1861
+
1787 1862
   checkmate::assertFlag(plotDiagnosticPlots)
1788 1863
   checkmate::assertFlag(plotDetails)
1789 1864
   checkmate::assertChoice(corMethod, c("pearson", "spearman"))
1790 1865
   checkmate::assertFlag(addForPermuted)
1791 1866
   
1792 1867
   GRN = .checkAndUpdateConnectionTypes(GRN) # For compatibility with older versions
1793
-  checkmate::assertSubset(connectionTypes, GRN@config$TF_peak_connectionTypes)
1868
+  checkmate::assertSubset(connectionTypes, GRN@config$TF_peak_connectionTypes, empty.ok = FALSE)
1794 1869
   
1795 1870
   checkmate::assertLogical(removeNegativeCorrelation, any.missing = FALSE, len = length(connectionTypes))
1796 1871
   
... ...
@@ -1852,6 +1927,8 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
1852 1927
     
1853 1928
   }
1854 1929
   
1930
+  .printExecutionTime(start, prefix = "")
1931
+  
1855 1932
   GRN
1856 1933
 }
1857 1934
 
... ...
@@ -1928,7 +2005,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
1928 2005
     
1929 2006
     pb <- progress::progress_bar$new(total = length(allTF))
1930 2007
     
1931
-    peaksFiltered = GRN@data$peaks$consensusPeaks %>% dplyr::filter(!isFiltered) 
2008
+    peaksFiltered = GRN@data$peaks$consensusPeaks %>% dplyr::filter(!.data$isFiltered) 
1932 2009
     
1933 2010
     if (!useGCCorrection) {
1934 2011
       minPerc = 100
... ...
@@ -2036,7 +2113,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
2036 2113
         peakIDsSel = c()
2037 2114
         for (i in seq_len(nrow(GC_classes_foreground.df))) {
2038 2115
           
2039
-          peaksBackgroundGCCur =  peaksBackground %>% dplyr::filter(GC_class == GC_classes_foreground.df$GC_class[i])
2116
+          peaksBackgroundGCCur =  peaksBackground %>% dplyr::filter(.data$GC_class == GC_classes_foreground.df$GC_class[i])
2040 2117
           
2041 2118
           if (nrow( peaksBackgroundGCCur) == 0) {
2042 2119
             next
... ...
@@ -2180,7 +2257,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
2180 2257
         if (connectionTypeCur %in% connectionTypes_removeNegCor ) {
2181 2258
           
2182 2259
           # futile.logger::flog.info(paste0(" Remove negatively correlated TF-peak pairs for connection type ", connectionTypeCur))
2183
-          cor.peak.tf = dplyr::filter(cor.peak.tf, TF_peak.r >= 0)
2260
+          cor.peak.tf = dplyr::filter(cor.peak.tf, .data$TF_peak.r >= 0)
2184 2261
           
2185 2262
         }
2186 2263
         
... ...
@@ -2218,10 +2295,10 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
2218 2295
         # DISCARD other rows altogether
2219 2296
         # Left join here is what we want, as we need this df only for "real" data
2220 2297
         tblFilt.df = dplyr::left_join(cor.peak.tf, fdr.curve, by = "TF_peak.r_bin") %>%
2221
-          dplyr::filter(TF_peak.fdr <= maxFDRToStore) %>%
2222
-          dplyr::select(TF.name, TF_peak.r_bin, TF_peak.r, TF_peak.fdr, 
2223
-                        TF_peak.fdr_orig, peak.ID, TF_peak.fdr_direction, 
2224
-                        TF_peak.connectionType, tidyselect::contains("value"))
2298
+          dplyr::filter(.data$TF_peak.fdr <= maxFDRToStore) %>%
2299
+          dplyr::select(.data$TF.name, .data$TF_peak.r_bin, .data$TF_peak.r, .data$TF_peak.fdr, 
2300
+                        .data$TF_peak.fdr_orig, .data$peak.ID, .data$TF_peak.fdr_direction, 
2301
+                        .data$TF_peak.connectionType, tidyselect::contains("value"))
2225 2302
         
2226 2303
         
2227 2304
         if (!plotDetails) {
... ...
@@ -2277,7 +2354,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
2277 2354
       requiredNoPeaks = round(n_rel * targetNoPeaks, 0)
2278 2355
       # Check in background
2279 2356
       availableNoPeaks = GC_classes_background.df %>% 
2280
-        dplyr::filter(GC_class == GC_class.cur) %>%
2357
+        dplyr::filter(.data$GC_class == GC_class.cur) %>%
2281 2358
         dplyr::pull(.data$n)
2282 2359
       #futile.logger::flog.info(paste0(" GC.class ", GC.class.cur, ": Required: ", requiredNoPeaks, ", available: ", availableNoPeaks))
2283 2360
       if ( availableNoPeaks < requiredNoPeaks) {
... ...
@@ -2309,6 +2386,8 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
2309 2386
 
2310 2387
 #' Add peak-gene connections to a \code{\linkS4class{GRN}} object
2311 2388
 #' 
2389
+#' After the execution of this function, QC plots can be plotted with the function \code{\link{plotDiagnosticPlots_peakGene}} unless this has already been done by default due to \code{plotDiagnosticPlots = TRUE}
2390
+#' 
2312 2391
 #' @export
2313 2392
 #' @template GRN
2314 2393
 #' @param  overlapTypeGene Character. \code{"TSS"} or \code{"full"}. Default \code{"TSS"}. If set to \code{"TSS"}, only the TSS of the gene is used as reference for finding genes in the neighborhood of a peak. If set to \code{"full"}, the whole annotated gene (including all exons and introns) is used instead. 
... ...
@@ -2317,11 +2396,12 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
2317 2396
 #' @param TADs Data frame with TAD domains. Default \code{NULL}. If provided, the neighborhood of a peak is defined by the TAD domain the peak is in rather than a fixed-sized neighborhood. The expected format is a BED-like data frame with at least 3 columns in this particular order: chromosome, start, end, the 4th column is optional and will be taken as ID column. All additional columns as well as column names are ignored. For the first 3 columns, the type is checked as part of a data integrity check.
2318 2397
 #' @template nCores
2319 2398
 #' @template plotDiagnosticPlots
2320
-#' @param plotGeneTypes List of character vectors. Default \code{list(c("all"), c("protein_coding"), c("protein_coding", "lincRNA"))}. Each list element may consist of one or multiple gene types that are plotted collectively in one PDF. The special keyword \code{"all"} denotes all gene types that are found (be aware: this typically contains 20+ gene types, see \url{https://www.gencodegenes.org/pages/biotypes.html} for details).
2399
+#' @param plotGeneTypes List of character vectors. Default \code{list(c("all"), c("protein_coding"))}. Each list element may consist of one or multiple gene types that are plotted collectively in one PDF. The special keyword \code{"all"} denotes all gene types that are found (be aware: this typically contains 20+ gene types, see \url{https://www.gencodegenes.org/pages/biotypes.html} for details).
2321 2400
 #' @template outputFolder
2322 2401
 #' @template addRobustRegression
2323 2402
 #' @template forceRerun
2324
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function in different flavors.
2403
+#' @seealso \code{\link{plotDiagnosticPlots_peakGene}}
2404
+#' @return An updated \code{\linkS4class{GRN}} object, with additional information added from this function. 
2325 2405
 #' @examples 
2326 2406
 #' # See the Workflow vignette on the GRaNIE website for examples
2327 2407
 #' GRN = loadExampleObject()
... ...
@@ -2330,25 +2410,36 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2330 2410
                                      promoterRange = 250000, TADs = NULL,
2331 2411
                                      nCores = 4, 
2332 2412
                                      plotDiagnosticPlots = TRUE, 
2333
-                                     plotGeneTypes = list(c("all"), c("protein_coding"), c("protein_coding", "lincRNA")), 
2413
+                                     plotGeneTypes = list(c("all"), c("protein_coding")), 
2334 2414
                                      outputFolder = NULL,
2335 2415
                                      addRobustRegression = FALSE,
2336 2416
                                      forceRerun = FALSE) {
2337 2417
   
2418
+  start = Sys.time() 
2419
+    
2420
+  checkmate::assertClass(GRN, "GRN")
2338 2421
   GRN = .addFunctionLogToObject(GRN)
2339 2422
   
2340
-  checkmate::assertClass(GRN, "GRN")
2423
+  
2341 2424
   checkmate::assertChoice(overlapTypeGene, c("TSS", "full"))
2342 2425
   checkmate::assertChoice(corMethod, c("pearson", "spearman"))
2343 2426
   checkmate::assertIntegerish(promoterRange, lower = 0)
2344 2427
   checkmate::assert(checkmate::testNull(TADs), checkmate::testDataFrame(TADs))
2345 2428
   checkmate::assertIntegerish(nCores, lower = 1)
2346 2429
   checkmate::assertFlag(plotDiagnosticPlots) 
2347
-  checkmate::assertList(plotGeneTypes, any.missing = FALSE, min.len = 1, types = "character")
2430
+  for (elemCur in plotGeneTypes) {
2431
+      checkmate::assertSubset(elemCur, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE)
2432
+  }
2433
+  
2348 2434
   checkmate::assert(checkmate::testNull(outputFolder), checkmate::testDirectoryExists(outputFolder))
2349 2435
   checkmate::assertFlag(addRobustRegression)
2350 2436
   checkmate::assertFlag(forceRerun)
2351 2437
   
2438
+  if (addRobustRegression) {
2439
+      packageMessage = paste0("The package robust is not installed, but needed here due to addRobustRegression = TRUE. Please install it and re-run this function or change addRobustRegression to FALSE.")
2440
+      .checkPackageInstallation("robust", packageMessage)  
2441
+  }
2442
+  
2352 2443
   # As this is independent of the underlying GRN, it has to be done only once
2353 2444
   
2354 2445
   if (is.null(GRN@connections$peak_genes[["0"]]) | forceRerun) {
... ...
@@ -2362,7 +2453,8 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2362 2453
     
2363 2454
     # Check which gene types are available for the particular genome annotation
2364 2455
     # Use all of them to collect statistics. Filtering can be done later
2365
-    gene.types = unique(GRN@annotation$genes$gene.type)
2456
+    # Just remove NA
2457
+    gene.types = unique(GRN@annotation$genes$gene.type) %>% stats::na.omit()
2366 2458
     
2367 2459
     
2368 2460
     for (permutationCur in 0:.getMaxPermutation(GRN)) {
... ...
@@ -2381,7 +2473,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2381 2473
         .calculatePeakGeneCorrelations(GRN, permutationCur,
2382 2474
                                        TADs = TADs,
2383 2475
                                        neighborhoodSize = promoterRange,
2384
-                                       gene.types = names(gene.types),
2476
+                                       gene.types = as.character(gene.types),
2385 2477
                                        corMethod = corMethod,
2386 2478
                                        randomizePeakGeneConnections = randomizePeakGeneConnections,
2387 2479
                                        overlapTypeGene,
... ...
@@ -2400,10 +2492,10 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2400 2492
     
2401 2493
   }
2402 2494
   
2403
-  GRN
2404
-  
2405
-  
2495
+  .printExecutionTime(start, prefix = "")
2406 2496
   
2497
+  GRN
2498
+
2407 2499
 }
2408 2500
 
2409 2501
 .calculatePeakGeneOverlaps <- function(GRN, allPeaks, peak_TAD_mapping = NULL, par.l, neighborhoodSize = 250000, genomeAssembly, 
... ...
@@ -2470,7 +2562,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2470 2562
   subject = .getKnownGeneAnnotationNew(GRN, gene.types = gene.types, extendRegions = NULL)
2471 2563
   
2472 2564
   requiredColnames = c("gene.ENSEMBL","gene.type", "gene.name")
2473
-  checkmate::assertSubset(requiredColnames, colnames(subject))
2565
+  checkmate::assertSubset(requiredColnames, colnames(subject), empty.ok = FALSE)
2474 2566
   
2475 2567
   subject.gr = GenomicRanges::makeGRangesFromDataFrame(subject, keep.extra.columns = TRUE)
2476 2568
   
... ...
@@ -2517,7 +2609,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2517 2609
                                                         TRUE ~ pmin(abs(orig_peak_start - gene.start), abs(orig_peak_end - gene.start))))
2518 2610
   
2519 2611
   if (removeEnsemblNA) {
2520
-    overlaps.sub.df = dplyr::filter(overlaps.sub.df, !is.na(gene.ENSEMBL))
2612
+    overlaps.sub.df = dplyr::filter(overlaps.sub.df, !is.na(.data$gene.ENSEMBL))
2521 2613
   }
2522 2614
   
2523 2615
   .printExecutionTime(start)
... ...
@@ -2532,7 +2624,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2532 2624
                                            TADs = NULL, 
2533 2625
                                            mergeOverlappingTADs = FALSE, 
2534 2626
                                            neighborhoodSize = 250000,
2535
-                                           gene.types = c("protein_coding", "lincRNA"),
2627
+                                           gene.types = c("protein_coding"),
2536 2628
                                            overlapTypeGene = "TSS",
2537 2629
                                            corMethod = "pearson",
2538 2630
                                            randomizePeakGeneConnections = FALSE, 
... ...
@@ -2545,7 +2637,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2545 2637
   
2546 2638
   genomeAssembly = GRN@config$parameters$genomeAssembly
2547 2639
   
2548
-  checkmate::assertSubset(gene.types, c("all", names(.getAllGeneTypesAndFrequencies(GRN@config$parameters$genomeAssembly, verbose = FALSE))))
2640
+  # checkmate::assertSubset(gene.types, c("all", names(.getAllGeneTypesAndFrequencies(GRN@config$parameters$genomeAssembly, verbose = FALSE))), empty.ok = FALSE)
2549 2641
   
2550 2642
   if (is.null(nCores)) {
2551 2643
     nCores = 1
... ...
@@ -2554,7 +2646,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2554 2646
   
2555 2647
   #consensusPeaks = .createDF_fromCoordinates(getCounts(GRN, type = "peaks", norm = TRUE, 0)$peakID, "peakID")
2556 2648
   
2557
-  consensusPeaks = GRN@data$peaks$consensusPeaks %>% dplyr::filter(!isFiltered)
2649
+  consensusPeaks = GRN@data$peaks$consensusPeaks %>% dplyr::filter(!.data$isFiltered)
2558 2650
   # Preprocess TAD boundaries
2559 2651
   if (!is.null(TADs)) {
2560 2652
     
... ...
@@ -2786,6 +2878,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2786 2878
     res.m[rowCur, "r"]     = res$estimate
2787 2879
     
2788 2880
     if (addRobustRegression) {
2881
+        
2789 2882
       lmRob.sum = tryCatch( {
2790 2883
         summary(robust::lmRob(data1 ~data2))
2791 2884
         
... ...
@@ -2838,18 +2931,29 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2838 2931
 
2839 2932
 
2840 2933
 
2841
-#' Filter the GRN and integrate peak-gene connections. 
2934
+#' Filter TF-peaks and peak-gene connections and combine them to TF-peak-gene connections to construct an eGRN.
2842 2935
 #' 
2843
-#' This is one of the main integrative functions of the \code{GRN} package. It has two main functions: Filtering the TF-peak and peak-gene connections that have been identified before, and combining the 3 major elements (TFs, peaks, genes) into one data frame, with one row per connection. Here, a connection can either be a TF-peak, peak-gene or TF-peak-gene link, depending on the parameters. 
2844
-#' Internally, first, the TF-peak are filtered before the peak-gene connections are added for reasons of memory and computational efficacy: It takes a lot of time and particularly space to connect the full GRN with all peak-gene connections - as most of the links have weak support (i.e., high FDR), first filtering out unwanted links dramatically reduces the memory needed for the combined GRN 
2936
+#' This is one of the main integrative functions of the \code{GRaNIE} package. It has two main functions: 
2937
+#' First, filtering both TF-peak and peak-gene connections according to different criteria such as FDR and other properties 
2938
+#' Second, joining the three major elements that an eGRN consist of (TFs, peaks, genes) into one data frame, with one row per unique TF-peak-gene connection.
2939
+#' \strong{Note that the eGRN graph is reset upon successful execution of this function, and re-running the function \code{\link{build_eGRN_graph}} 
2940
+#' is necessary when any of the network functions of the package shall be executed.}
2941
+
2942
+#' Internally, before joining them, both TF-peak links and peak-gene connections are filtered separately for reasons of memory and computational efficacy:
2943
+#' First filtering out unwanted links dramatically reduces the memory needed for the full eGRN. Peak-gene p-value adjustment is only done after all filtering steps on the remaining set of
2944
+#' connections to lower the statistical burden of multiple-testing adjustment; therefore, this may lead to initially counter-intuitive effects such as a particular connections not being included anymore as compared to a 
2945
+#' filtering based on different thresholds, or the FDR being different for the same reason.
2845 2946
 #' @template GRN
2846 2947
 #' @param TF_peak.fdr.threshold Numeric[0,1]. Default 0.2. Maximum FDR for the TF-peak links. Set to 1 or NULL to disable this filter.
2847 2948
 #' @template TF_peak.connectionTypes
2848 2949
 #' @param peak_gene.p_raw.threshold Numeric[0,1]. Default NULL. Threshold for the peak-gene connections, based on the raw p-value. All peak-gene connections with a larger raw p-value will be filtered out.
2849 2950
 #' @param peak_gene.fdr.threshold Numeric[0,1]. Default 0.2. Threshold for the peak-gene connections, based on the FDR. All peak-gene connections with a larger FDR will be filtered out.
2850
-#' @param peak_gene.fdr.method Character. Default "BH". One of: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none", "IHW". Method for adjusting p-values for multiple testing. If set to "IHW", independent hypothesis weighting will be performed, and a suitable covariate has to be specified for the parameter \code{peak_gene.IHW.covariate}.
2951
+#' @param peak_gene.fdr.method Character. Default "BH". One of: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none", "IHW". 
2952
+#' Method for adjusting p-values for multiple testing. 
2953
+#' If set to "IHW", the package \code{IHW} is required (as it is listed under \code{Suggests}, it may not be installed), 
2954
+#' and independent hypothesis weighting will be performed, and a suitable covariate has to be specified for the parameter \code{peak_gene.IHW.covariate}.
2851 2955
 #' @param peak_gene.IHW.covariate Character. Default NULL. Name of the covariate to use for IHW (column name from the table thatis returned with the function \code{getGRNConnections}. Only relevant if \code{peak_gene.fdr.method} is set to "IHW". You have to make sure the specified covariate is suitable or IHW, see the diagnostic plots that are generated in this function for this. For many datasets, the peak-gene distance (called \code{peak_gene.distance} in the object) seems suitable.
2852
-#' @param peak_gene.IHW.nbins Integer or "auto". Default 5. Number of bins for IHW. Only relevant if \code{peak_gene.fdr.method} is set to "IHW".
2956
+#' @param peak_gene.IHW.nbins Integer or "auto". Default "auto". Number of bins for IHW. Only relevant if \code{peak_gene.fdr.method} is set to "IHW".
2853 2957
 #' @template gene.types
2854 2958
 #' @param allowMissingTFs \code{TRUE} or \code{FALSE}.  Default \code{FALSE}. Should connections be returned for which the TF is NA (i.e., connections consisting only of peak-gene links?). If set to \code{TRUE}, this generally greatly increases the number of connections but it may not be what you aim for.
2855 2959
 #' @param allowMissingGenes \code{TRUE} or \code{FALSE}.  Default \code{TRUE}. Should connections be returned for which the gene is NA (i.e., connections consisting only of TF-peak links?). If set to \code{TRUE}, this generally increases the number of connections.
... ...
@@ -2863,7 +2967,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2863 2967
 #' @param silent \code{TRUE} or \code{FALSE}.  Default \code{FALSE}. Print progress messages and filter statistics.
2864 2968
 #' @param filterLoops  \code{TRUE} or \code{FALSE}. Default \code{TRUE}. If a TF regulates itself (i.e., the TF and the gene are the same entity), should such loops be filtered from the GRN?
2865 2969
 #' @template outputFolder
2866
-#' @return The same \code{\linkS4class{GRN}} object, with the filtered and merged TF-peak and peak-gene connections in the slot connections$all.filtered.
2970
+#' @return An updated \code{\linkS4class{GRN}} object, with additional information added from this function. The filtered and merged TF-peak and peak-gene connections in the slot \code{GRN@connections$all.filtered}.
2867 2971
 #' @examples 
2868 2972
 #' # See the Workflow vignette on the GRaNIE website for examples
2869 2973
 #' GRN = loadExampleObject()
... ...
@@ -2871,6 +2975,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2871 2975
 #' @seealso \code{\link{visualizeGRN}}
2872 2976
 #' @seealso \code{\link{addConnections_TF_peak}} 
2873 2977
 #' @seealso \code{\link{addConnections_peak_gene}} 
2978
+#' @seealso \code{\link{build_eGRN_graph}} 
2874 2979
 #' @importFrom rlang .data
2875 2980
 #' @importFrom magrittr `%>%`
2876 2981
 #' @export
... ...
@@ -2881,8 +2986,8 @@ filterGRNAndConnectGenes <- function(GRN,
2881 2986
                                      peak_gene.fdr.threshold= 0.2,
2882 2987
                                      peak_gene.fdr.method = "BH",
2883 2988
                                      peak_gene.IHW.covariate = NULL,
2884
-                                     peak_gene.IHW.nbins = 5,
2885
-                                     gene.types = c("protein_coding", "lincRNA"), 
2989
+                                     peak_gene.IHW.nbins = "auto",
2990
+                                     gene.types = c("protein_coding"), 
2886 2991
                                      allowMissingTFs = FALSE, allowMissingGenes = TRUE,
2887 2992
                                      peak_gene.r_range = c(0,1), 
2888 2993
                                      peak_gene.selection = "all",
... ...
@@ -2893,28 +2998,36 @@ filterGRNAndConnectGenes <- function(GRN,
2893 2998
                                      outputFolder = NULL,
2894 2999
                                      silent = FALSE) {
2895 3000
   
2896
-  
3001
+  start = Sys.time()  
3002
+
3003
+  checkmate::assertClass(GRN, "GRN")
2897 3004
   GRN = .addFunctionLogToObject(GRN)
2898 3005
   
2899
-  checkmate::assertClass(GRN, "GRN")
3006
+  
2900 3007
   checkmate::assertCharacter(TF_peak.connectionTypes, min.len = 1, any.missing = FALSE)
2901 3008
   checkmate::assert(checkmate::checkNull(peak_gene.p_raw.threshold), checkmate::checkNumber(peak_gene.p_raw.threshold, lower = 0, upper = 1))
2902 3009
   checkmate::assertNumeric(peak_gene.r_range, lower = -1, upper = 1, len = 2)
2903
-  checkmate::assertCharacter(gene.types, min.len = 1)
3010
+  checkmate::assertSubset(gene.types, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE)
2904 3011
   checkmate::assertNumber(TF_peak.fdr.threshold, lower = 0, upper = 1)
2905 3012
   checkmate::assert(checkmate::checkNull(peak_gene.fdr.threshold), checkmate::checkNumber(peak_gene.fdr.threshold, lower = 0, upper = 1))
2906 3013
   
2907
-  checkmate::assertSubset(peak_gene.fdr.method, c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none", "IHW"))
3014
+  checkmate::assertChoice(peak_gene.fdr.method, c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none", "IHW"))
2908 3015
   checkmate::assert(checkmate::checkNull(peak_gene.IHW.covariate), checkmate::checkCharacter(peak_gene.IHW.covariate, min.chars = 1, len = 1))
3016
+  
3017
+  checkmate::assert(checkIntegerish(peak_gene.IHW.nbins, lower = 1), checkSubset(peak_gene.IHW.nbins, "auto"))
3018
+  
2909 3019
   checkmate::assertFlag(silent)
2910 3020
   checkmate::assertChoice(peak_gene.selection, c("all", "closest"))
2911 3021
   checkmate::assertFlag(allowMissingTFs)
2912 3022
   checkmate::assertFlag(allowMissingGenes)
3023
+  
3024
+  checkmate::assertFlag(TF_peak_FDR_selectViaCorBins)
2913 3025
   checkmate::assertFlag(filterLoops)
2914 3026
   
2915 3027
   if (peak_gene.fdr.method == "IHW" & !is.installed("IHW")) {
2916
-    message = "IHW has been selected for p-value adjustment, but IHW is currently not installed. Please install it and re-run the function or choose a different method."
2917
-    .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
3028
+    packageMessage = "IHW has been selected for p-value adjustment, but IHW is currently not installed. Please install it and re-run the function or choose a different method."
3029
+    .checkPackageInstallation("IHW", packageMessage)  
3030
+
2918 3031
   }
2919 3032
   
2920 3033
   if (!is.null(peak_gene.p_raw.threshold) & !is.null(peak_gene.fdr.threshold)) {
... ...
@@ -2922,7 +3035,6 @@ filterGRNAndConnectGenes <- function(GRN,
2922 3035
     .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
2923 3036
   }
2924 3037
   
2925
-  start = Sys.time()
2926 3038
   if (silent) {
2927 3039
     futile.logger::flog.threshold(futile.logger::WARN)
2928 3040
   }
... ...
@@ -2964,7 +3076,7 @@ filterGRNAndConnectGenes <- function(GRN,
2964 3076
     futile.logger::flog.info(paste0("Inital number of rows left before all filtering steps: ", nrow(grn.filt)))
2965 3077
     
2966 3078
     if (! "all" %in% TF_peak.connectionTypes) {
2967
-      checkmate::assertSubset(TF_peak.connectionTypes, .getAll_TF_peak_connectionTypes(GRN))
3079
+      checkmate::assertSubset(TF_peak.connectionTypes, .getAll_TF_peak_connectionTypes(GRN), empty.ok = FALSE)
2968 3080
       futile.logger::flog.info(paste0(" Filter network and retain only rows with one of the following TF-peak connection types: ", paste0(TF_peak.connectionTypes, collapse = ", ")))
2969 3081
       futile.logger::flog.info(paste0("  Number of TF-peak rows before filtering connection types: ", nrow(grn.filt)))
2970 3082
       grn.filt = dplyr::filter(grn.filt, .data$TF_peak.connectionType %in% TF_peak.connectionTypes)
... ...
@@ -3169,7 +3281,7 @@ filterGRNAndConnectGenes <- function(GRN,
3169 3281
       
3170 3282
       if (peak_gene.fdr.method == "IHW") {
3171 3283
         
3172
-        # Identify those entries for which both p-value and covairate are not NA
3284
+        # Identify those entries for which both p-value and covariate are not NA
3173 3285
         
3174 3286
         covariate_val = grn.filt %>% dplyr::pull(!!(peak_gene.IHW.covariate))
3175 3287
         indexes = which(!is.na(grn.filt$peak_gene.p_raw) & !is.na(covariate_val))
... ...
@@ -3267,14 +3379,22 @@ filterGRNAndConnectGenes <- function(GRN,
3267 3379
       .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)
3268 3380
     }
3269 3381
     
3270
-    .printExecutionTime(start)
3382
+    
3271 3383
     
3272 3384
     
3273 3385
   } #end for all permutations
3274 3386
   
3387
+  if (!is.null(GRN@graph)) {
3388
+      futile.logger::flog.info(paste0("Reset the graph slot in the object. Rerun the method build_eGRN_graph."))
3389
+      GRN@graph = list()
3390
+  }
3391
+ 
3392
+  
3275 3393
   
3276 3394
   if (silent) futile.logger::flog.threshold(futile.logger::INFO)
3277 3395
   
3396
+  .printExecutionTime(start)
3397
+  
3278 3398
   GRN
3279 3399
   
3280 3400
 }
... ...
@@ -3294,7 +3414,7 @@ filterGRNAndConnectGenes <- function(GRN,
3294 3414
   checkmate::assert(checkmate::checkNumeric(covariates), checkmate::checkFactor(covariates))
3295 3415
   stopifnot(length(pvalues) == length(covariates))
3296 3416
   checkmate::assertNumber(alpha, lower = 0, upper = 1)
3297
-  checkmate::assertSubset(covariate_type, c("ordinal", "nominal"))
3417
+  checkmate::assertChoice(covariate_type, c("ordinal", "nominal"))
3298 3418
   checkmate::assert(checkmate::checkInt(nbins, lower = 1), checkmate::checkSubset(nbins, c("auto")))
3299 3419
   checkmate::assert(checkmate::checkNull(m_groups), checkmate::checkInteger(m_groups, len = length(covariates)))
3300 3420
   checkmate::assertLogical(quiet)
... ...
@@ -3303,9 +3423,9 @@ filterGRNAndConnectGenes <- function(GRN,
3303 3423
   checkmate::assertInt(nsplits_internal, lower = 1)
3304 3424
   checkmate::assert(checkmate::checkNumeric(lambdas), checkmate::checkSubset(lambdas, c("auto")))
3305 3425
   checkmate::assert(checkmate::checkNull(seed), checkmate::checkInteger(seed)) 
3306
-  checkmate::assertSubset(distrib_estimator, c("grenander", "ECDF"))
3307
-  checkmate::assertSubset(lp_solver, c("lpsymphony", "gurobi"))
3308
-  checkmate::assertSubset(adjustment_type, c("BH", "bonferroni"))
3426
+  checkmate::assertChoice(distrib_estimator, c("grenander", "ECDF"))
3427
+  checkmate::assertChoice(lp_solver, c("lpsymphony", "gurobi"))
3428
+  checkmate::assertChoice(adjustment_type, c("BH", "bonferroni"))
3309 3429
   checkmate::assertLogical(return_internal)
3310 3430
   checkmate::assertLogical(doDiagnostics)
3311 3431
   checkmate::assert(checkmate::checkNull(pdfFile), checkmate::checkDirectoryExists(dirname(pdfFile), access = "w"))
... ...
@@ -3448,14 +3568,26 @@ filterGRNAndConnectGenes <- function(GRN,
3448 3568
 #' @template addRobustRegression
3449 3569
 #' @template nCores
3450 3570
 #' @template forceRerun
3451
-#' @return  The same \code{\linkS4class{GRN}} object, with added data from this function.
3571
+#' @return An updated \code{\linkS4class{GRN}} object, with additional information added from this function.
3452 3572
 #' @examples 
3453 3573
 #' # See the Workflow vignette on the GRaNIE website for examples
3454 3574
 #' GRN = loadExampleObject()
3455 3575
 #' GRN = add_TF_gene_correlation(GRN, forceRerun = FALSE)
3456 3576
 add_TF_gene_correlation <- function(GRN, corMethod = "pearson", addRobustRegression = FALSE, nCores = 1, forceRerun = FALSE) {
3457 3577
   
3578
+  start = Sys.time() 
3579
+  
3580
+  checkmate::assertClass(GRN, "GRN")  
3458 3581
   GRN = .addFunctionLogToObject(GRN)    
3582
+  checkmate::assertChoice(corMethod, c("pearson", "spearman"))
3583
+  assertFlag(addRobustRegression)
3584
+  checkmate::assertIntegerish(nCores, lower = 1)
3585
+  assertFlag(forceRerun)
3586
+  
3587
+  if (addRobustRegression) {
3588
+      packageMessage = paste0("The package robust is not installed, but needed here due to addRobustRegression = TRUE. Please install it and re-run this function or change addRobustRegression to FALSE.")
3589
+      .checkPackageInstallation("robust", packageMessage)  
3590
+  }
3459 3591
   
3460 3592
   if (is.null(GRN@connections$TF_genes.filtered) | forceRerun) {
3461 3593
     
... ...
@@ -3562,6 +3694,8 @@ add_TF_gene_correlation <- function(GRN, corMethod = "pearson", addRobustRegress
3562 3694
     } # end for each permutation
3563 3695
   }
3564 3696
   
3697
+  .printExecutionTime(start, prefix = "")
3698
+  
3565 3699
   GRN
3566 3700
 }
3567 3701
 
... ...
@@ -3580,8 +3714,8 @@ addSNPOverlap <- function(grn, SNPData, col_chr = "chr", col_pos = "pos", col_pe
3580 3714
   
3581 3715
   futile.logger::flog.info(paste0(" Checking validity of input"))
3582 3716
   checkmate::assertDataFrame(SNPData, min.rows = 1)
3583
-  checkmate::assertSubset(c(col_chr, col_pos), colnames(SNPData))
3584
-  checkmate::assertSubset(c(col_peakID), colnames(grn))
3717
+  checkmate::assertSubset(c(col_chr, col_pos), colnames(SNPData), empty.ok = FALSE)
3718
+  checkmate::assertSubset(c(col_peakID), colnames(grn), empty.ok = FALSE)
3585 3719
   
3586 3720
   if (genomeAssembly_peaks != genomeAssembly_SNP) {
3587 3721
     
... ...
@@ -3683,22 +3817,24 @@ addSNPOverlap <- function(grn, SNPData, col_chr = "chr", col_pos = "pos", col_pe
3683 3817
 ####### STATS #########
3684 3818
 
3685 3819
 
3686
-#' Generate a summary PDF for the number of connections for a \code{\linkS4class{GRN}} object. 
3820
+#' Generate a summary for the number of connections for different filtering criteria for a \code{\linkS4class{GRN}} object. 
3687 3821
 #' 
3688
-#' Essentially, this functions calls \code{\link{filterGRNAndConnectGenes}} repeatedly and stores the total number of connections and other statistics each time to summarize them afterwards. All arguments are identical to the ones in \code{\link{filterGRNAndConnectGenes}}, see the help for this function for details.
3822
+#' This functions calls \code{\link{filterGRNAndConnectGenes}} repeatedly and stores the total number of connections and other statistics each time to summarize them afterwards. All arguments are identical to the ones in \code{\link{filterGRNAndConnectGenes}}, see the help for this function for details.
3823
+#' The function \code{\link{plot_stats_connectionSummary}} can be used afterwards for plotting.
3689 3824
 #' 
3690 3825
 #' @export
3691 3826
 #' @template GRN 
3692
-#' @param TF_peak.fdr Numeric vector. Default \code{c(0.001, 0.01, 0.05, 0.1, 0.2)}. TF-peak FDR values to iterate over.
3827
+#' @param TF_peak.fdr Numeric vector[0,1]. Default \code{c(0.001, 0.01, 0.05, 0.1, 0.2)}. TF-peak FDR values to iterate over.
3693 3828
 #' @template TF_peak.connectionTypes
3694
-#' @param peak_gene.fdr Numeric vector. Default \code{c(0.001, 0.01, 0.05, 0.1, 0.2)}. Peak-gene FDR values to iterate over.
3695
-#' @param peak_gene.p_raw  Numeric vector. Default \code{NULL}. Peak-gene raw p-value values to iterate over. Skipped if set to NULL.
3696
-#' @param peak_gene.r_range Numeric vector of length 2 (minimum -1, maximum 1). Default \code{c(0,1)}. The correlation range of peak-gene connections to keep.
3829
+#' @param peak_gene.fdr Numeric vector[0,1]. Default \code{c(0.001, 0.01, 0.05, 0.1, 0.2)}. Peak-gene FDR values to iterate over.
3830
+#' @param peak_gene.p_raw  Numeric vector[0,1]. Default \code{NULL}. Peak-gene raw p-value values to iterate over. Skipped if set to NULL.
3831
+#' @param peak_gene.r_range Numeric vector of length 2[-1,1]. Default \code{c(0,1)}. The correlation range of peak-gene connections to keep.
3697 3832
 #' @template gene.types
3698
-#' @param allowMissingGenes Logical vector. Default \code{c(FALSE, TRUE)}. Allow genes to be missing for peak-gene connections?
3699
-#' @param allowMissingTFs Logical vector. Default \code{c(FALSE)}. Allow TFs to be missing for TF-peak connections?
3833
+#' @param allowMissingGenes Logical vector of length 1 or 2. Default \code{c(FALSE, TRUE)}. Allow genes to be missing for peak-gene connections? If both \code{FALSE} and \code{TRUE} are given, the code loops over both
3834
+#' @param allowMissingTFs Logical vector of length 1 or 2. Default \code{c(FALSE)}. Allow TFs to be missing for TF-peak connections?  If both \code{FALSE} and \code{TRUE} are given, the code loops over both
3700 3835
 #' @template forceRerun
3701
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function.
3836
+#' @seealso \code{\link{plot_stats_connectionSummary}}
3837
+#' @return An updated \code{\linkS4class{GRN}} object, with additional information added from this function.
3702 3838
 #' @examples 
3703 3839
 #' # See the Workflow vignette on the GRaNIE website for examples
3704 3840
 #' GRN = loadExampleObject()
... ...
@@ -3710,21 +3846,23 @@ generateStatsSummary <- function(GRN,
3710 3846
                                  peak_gene.fdr = c(0.001, 0.01, 0.05, 0.1, 0.2),
3711 3847
                                  peak_gene.p_raw = NULL,
3712 3848
                                  peak_gene.r_range = c(0,1),
3713
-                                 gene.types = c("protein_coding", "lincRNA"),
3849
+                                 gene.types = c("protein_coding"),
3714 3850
                                  allowMissingGenes = c(FALSE, TRUE),
3715 3851
                                  allowMissingTFs = c(FALSE),
3716 3852
                                  forceRerun = FALSE
3717 3853
 ) {
3718 3854
   
3719
-  GRN = .addFunctionLogToObject(GRN)
3855
+  start = Sys.time()   
3720 3856
   
3721 3857
   checkmate::assertClass(GRN, "GRN")
3858
+  GRN = .addFunctionLogToObject(GRN)
3859
+
3722 3860
   checkmate::assertNumeric(TF_peak.fdr, lower = 0, upper = 1, min.len = 1)
3723
-  checkmate::assertSubset(TF_peak.connectionTypes, c("all", GRN@config$TF_peak_connectionTypes))
3861
+  checkmate::assertSubset(TF_peak.connectionTypes, c("all", GRN@config$TF_peak_connectionTypes), empty.ok = FALSE)
3724 3862
   checkmate::assertNumeric(peak_gene.fdr, lower = 0, upper = 1, min.len = 1)
3725 3863
   checkmate::assert(checkmate::checkNull(peak_gene.p_raw), checkmate::checkNumeric(peak_gene.p_raw, lower = 0, upper = 1, min.len = 1))
3726 3864
   checkmate::assertNumeric(peak_gene.r_range, lower = -1, upper = 1, len = 2)
3727
-  checkmate::assertCharacter(gene.types, min.len = 1)
3865
+  checkmate::assertSubset(gene.types, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE)
3728 3866
   checkmate::assertSubset(allowMissingGenes, c(TRUE, FALSE))
3729 3867
   checkmate::assertSubset(allowMissingTFs, c(TRUE, FALSE))
3730 3868
   
... ...
@@ -3859,7 +3997,7 @@ generateStatsSummary <- function(GRN,
3859 3997
     futile.logger::flog.info(paste0("Data already exists in object. Set forceRerun = TRUE to regenerate and overwrite."))
3860 3998
   }
3861 3999
   
3862
-  
4000
+  .printExecutionTime(start, prefix = "")
3863 4001
   
3864 4002
   GRN
3865 4003
   
... ...
@@ -3975,36 +4113,53 @@ generateStatsSummary <- function(GRN,
3975 4113
 
3976 4114
 #' Load example GRN dataset
3977 4115
 #' 
4116
+#' Loads an example GRN object with 6 TFs, ~61.000 peaks, ~19.000 genes, 259 filtered connections and pre-calculated enrichments from the internet. 
4117
+#' This function uses \code{BiocFileCache} if installed to cache the example object, which is 
4118
+#' considerably faster than re-downloading the file anew every time the function is executed.
4119
+#' If not, the file is re-downloaded every time anew. Thus, to enable caching, you may install the package \code{BiocFileCache}.
4120
+#' 
3978 4121
 #' @export
3979 4122
 #' @param forceDownload \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should the download be enforced even if the local cached file is already present?
3980 4123
 #' @param fileURL Character. Default \url{https://www.embl.de/download/zaugg/GRaNIE/GRN.rds}. URL to the GRN example object in rds format.
3981 4124
 #' @examples 
3982 4125
 #' GRN = loadExampleObject()
3983
-#' @return 
3984
-#' An example \code{\linkS4class{GRN}} object
3985
-#' @import BiocFileCache
4126
+#' @return An small example \code{\linkS4class{GRN}} object
3986 4127
 loadExampleObject <- function(forceDownload = FALSE, fileURL = "https://www.embl.de/download/zaugg/GRaNIE/GRN.rds") {
3987 4128
   
3988 4129
   checkmate::assertFlag(forceDownload)
3989
-  # Taken and modified from https://www.bioconductor.org/packages/release/bioc/vignettes/BiocFileCache/inst/doc/BiocFileCache.html
3990
-  
3991
-  bfc <- .get_cache()
3992
-  
3993
-  rid <- BiocFileCache::bfcquery(bfc, "GRaNIE_object_example")$rid
3994
-  if (!length(rid)) {
3995
-    rid <- names(BiocFileCache::bfcadd(bfc, "GRaNIE_object_example", fileURL))
3996
-  }
3997
-  if (!isFALSE(BiocFileCache::bfcneedsupdate(bfc, rid)) | forceDownload) {
3998
-    messageStr = paste0("Downloading GRaNIE example object from ", fileURL)
3999
-    message(messageStr)
4000
-    filePath = BiocFileCache::bfcdownload(bfc, rid, ask = FALSE)
4130
+    
4131
+  if (!is.installed("BiocFileCache")) {
4132
+      
4133
+      message = paste0("The package BiocFileCache is not installed, but recommended if you want to speed-up the retrieval of the GRN example object ",
4134
+      "via this function when using it multiple times. If not installed, the example object has to be downloaded anew every time you use this function.")
4135
+      .checkPackageInstallation("BiocFileCache", message, isWarning = TRUE)
4136
+      
4137
+      GRN = readRDS(url(fileURL))
4138
+
4139
+  } else {
4140
+      
4141
+      
4142
+      # Taken and modified from https://www.bioconductor.org/packages/release/bioc/vignettes/BiocFileCache/inst/doc/BiocFileCache.html
4143
+      
4144
+      bfc <- .get_cache()
4145
+      
4146
+      rid <- BiocFileCache::bfcquery(bfc, "GRaNIE_object_example")$rid
4147
+      if (!length(rid)) {
4148
+          rid <- names(BiocFileCache::bfcadd(bfc, "GRaNIE_object_example", fileURL))
4149
+      }
4150
+      if (!isFALSE(BiocFileCache::bfcneedsupdate(bfc, rid)) | forceDownload) {
4151
+          messageStr = paste0("Downloading GRaNIE example object from ", fileURL)
4152
+          message(messageStr)
4153
+          filePath = BiocFileCache::bfcdownload(bfc, rid, ask = FALSE)
4154
+      }
4155
+      
4156
+      
4157
+      filePath = BiocFileCache::bfcrpath(bfc, rids = rid)
4158
+      
4159
+      # Now we can read in the locally stored file
4160
+      GRN = readRDS(filePath)
4001 4161
   }
4002
-  
4003
-  
4004
-  filePath = BiocFileCache::bfcrpath(bfc, rids = rid)
4005
-  
4006
-  # Now we can read in the locally stored file
4007
-  GRN = readRDS(filePath)
4162
+
4008 4163
   
4009 4164
   # Change the default path to the current working directory
4010 4165
   GRN@config$directories$outputRoot = "."
... ...
@@ -4012,6 +4167,9 @@ loadExampleObject <- function(forceDownload = FALSE, fileURL = "https://www.embl
4012 4167
   GRN@config$directories$motifFolder = "."
4013 4168
   GRN@config$files$output_log = "GRN.log"
4014 4169
   
4170
+  # Temporary fix: Replace lincRNA with lncRNA due to a recent change in biomaRt until we update the object directly
4171
+  GRN@annotation$genes = dplyr::mutate(GRN@annotation$genes, gene.type = dplyr::recode_factor(gene.type, lncRNA = "lincRNA")) 
4172
+  
4015 4173
   GRN
4016 4174
   
4017 4175
 }
... ...
@@ -4020,10 +4178,11 @@ loadExampleObject <- function(forceDownload = FALSE, fileURL = "https://www.embl
4020 4178
 #' Get counts for the various data defined in a \code{\linkS4class{GRN}} object
4021 4179
 #' 
4022 4180
 #' Get counts for the various data defined in a \code{\linkS4class{GRN}} object.
4181
+#' \strong{Note: This function, as all \code{get} functions from this package, does NOT return a \code{\linkS4class{GRN}} object.}
4023 4182
 #' 
4024 4183
 #' @template GRN 
4025 4184
 #' @param type Character. Either \code{peaks} or \code{rna}. \code{peaks} corresponds to the counts for the open chromatin data, while \code{rna} refers to th RNA-seq counts. If set to \code{rna}, both permuted and non-permuted data can be retrieved, while for \code{peaks}, only the non-permuted one (i.e., 0) can be retrieved.
4026
-#' @param norm Logical. \code{TRUE} or \code{FALSE}.  Should original (often raw, but this may not necessarily be the case) or normalized counts be returned?
4185
+#' @param norm Logical. \code{TRUE} or \code{FALSE}. Should original (often raw, but this may not necessarily be the case) or normalized counts be returned?
4027 4186
 #' @template permuted
4028 4187
 #' @export
4029 4188
 #' @import tibble
... ...
@@ -4031,10 +4190,11 @@ loadExampleObject <- function(forceDownload = FALSE, fileURL = "https://www.embl
4031 4190
 #' # See the Workflow vignette on the GRaNIE website for examples
4032 4191
 #' GRN = loadExampleObject()
4033 4192
 #' counts.df = getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE)
4034
-#' @return Data frame of counts, with the type as indicated by the function parameters.
4193
+#' @return Data frame of counts, with the type as indicated by the function parameters. This function does **NOT** return a \code{\linkS4class{GRN}} object.
4035 4194
 
4036 4195
 getCounts <- function(GRN, type, norm, permuted = FALSE) {
4037 4196
   
4197
+  checkmate::assertClass(GRN, "GRN")
4038 4198
   GRN = .addFunctionLogToObject(GRN)      
4039 4199
   
4040 4200
   checkmate::assertChoice(type, c("peaks", "rna"))
... ...
@@ -4083,7 +4243,7 @@ getCounts <- function(GRN, type, norm, permuted = FALSE) {
4083 4243
   } else { # permutation 1
4084 4244
     
4085 4245
     if (type == "peaks") {
4086
-      message = "Could not find counts in object."
4246
+      message = "Could not find permuted peak counts in GRN object. Peaks are not stored as pemruted, set permuted = FALSE for type = \"peaks\""
4087 4247
       .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4088 4248
     } else {
4089 4249
       
... ...
@@ -4110,23 +4270,28 @@ getCounts <- function(GRN, type, norm, permuted = FALSE) {
4110 4270
 
4111 4271
 
4112 4272
 
4113
-#' Extract connections from a \code{\linkS4class{GRN}} object
4273
+#' Extract connections or links from a \code{\linkS4class{GRN}} object as da data frame.
4274
+#' 
4275
+#' Returns stored connections/links (either TF-peak, peak-genes or the filtered set of connections as produced by \code{\link{filterGRNAndConnectGenes}}). 
4276
+#' \strong{Note: This function, as all \code{get} functions from this package, does NOT return a \code{\linkS4class{GRN}} object.}
4114 4277
 #' 
4115 4278
 #' @export
4116 4279
 #' @template GRN 
4117 4280
 #' @template permuted
4118
-#' @param type Character. Default \code{all.filtered}. Must be one of \code{TF_peaks}, \code{peak_genes}, \code{all.filtered}. The type of connections to retrieve.
4119
-#' @param include_TF_gene_correlations Logical. \code{TRUE} or \code{FALSE}. Should TFs and gene correlations be returned as well? If set to \code{TRUE}, they must have been computed beforehand with \code{\link{add_TF_gene_correlation}}.
4120
-#' @return A data frame with the connections. Importantly, this function does **NOT** return a \code{\linkS4class{GRN}} object.
4281
+#' @param type Character. One of \code{TF_peaks}, \code{peak_genes}, \code{all.filtered}. Default \code{all.filtered}. The type of connections to retrieve.
4282
+#' @param include_TF_gene_correlations Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should TFs and gene correlations be returned as well? If set to \code{TRUE}, they must have been computed beforehand with \code{\link{add_TF_gene_correlation}}.
4283
+#' @return A data frame with the requested connections. This function does **NOT** return a \code{\linkS4class{GRN}} object.
4284
+#' @seealso \code{\link{filterGRNAndConnectGenes}}
4121 4285
 #' @examples 
4122 4286
 #' # See the Workflow vignette on the GRaNIE website for examples
4123 4287
 #' GRN = loadExampleObject()
4124 4288
 #' GRN_con.all.df = getGRNConnections(GRN)
4125 4289
 getGRNConnections <- function(GRN, type = "all.filtered",  permuted = FALSE, include_TF_gene_correlations = FALSE) {
4126 4290
   
4291
+  checkmate::assertClass(GRN, "GRN")  
4127 4292
   GRN = .addFunctionLogToObject(GRN)
4128 4293
   
4129
-  checkmate::assertClass(GRN, "GRN")  
4294
+   
4130 4295
   checkmate::assertChoice(type, c("TF_peaks", "peak_genes", "all.filtered"))
4131 4296
   checkmate::assertFlag(permuted)
4132 4297
   #checkmate::assertIntegerish(permutation, lower = 0, upper = .getMaxPermutation(GRN))
... ...
@@ -4304,11 +4469,13 @@ getGRNConnections <- function(GRN, type = "all.filtered",  permuted = FALSE, inc
4304 4469
 
4305 4470
 #' Retrieve parameters for previously used function calls and general parameters for a \code{\linkS4class{GRN}} object. 
4306 4471
 #' 
4472
+#' \strong{Note: This function, as all \code{get} functions from this package, does NOT return a \code{\linkS4class{GRN}} object.}
4473
+#' 
4307 4474
 #' @export
4308 4475
 #' @template GRN 
4309 4476
 #' @param name Character. Default \code{all}. Name of parameter or function name to retrieve. Set to the special keyword \code{all} to retrieve all parameters.
4310
-#' @param type Character. Default \code{parameter}. Either \code{function} or \code{parameter}. When set to \code{function}, a valid \code{GRaNIE} function name must be given that has been run before. When set to \code{parameter}, in combination with \code{name}, returns a specific parameter (as specified in \code{GRN@config})).
4311
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function.
4477
+#' @param type Character. Either \code{function} or \code{parameter}. Default \code{parameter}. When set to \code{function}, a valid \code{GRaNIE} function name must be given that has been run before. When set to \code{parameter}, in combination with \code{name}, returns a specific parameter (as specified in \code{GRN@config})).
4478
+#' @return The requested parameters. This function does **NOT** return a \code{\linkS4class{GRN}} object.
4312 4479
 #' @examples 
4313 4480
 #' # See the Workflow vignette on the GRaNIE website for examples
4314 4481
 #' GRN = loadExampleObject()
... ...
@@ -4316,14 +4483,14 @@ getGRNConnections <- function(GRN, type = "all.filtered",  permuted = FALSE, inc
4316 4483
 getParameters <- function (GRN, type = "parameter", name = "all") {
4317 4484
   
4318 4485
   checkmate::assertClass(GRN, "GRN")
4319
-  checkmate::assertSubset(type, c("function", "parameter"))
4486
+  checkmate::assertChoice(type, c("function", "parameter"))
4320 4487
   
4321 4488
   if (type == "function") {
4322 4489
     
4323 4490
     checkmate::assertCharacter(name, any.missing = FALSE, len = 1)
4324 4491
     functionParameters = GRN@config$functionParameters[[name]]
4325 4492
     if (is.null(functionParameters)) {
4326
-      checkmate::assertSubset(name, ls(paste0("package:", utils::packageName())))
4493
+      checkmate::assertChoice(name, ls(paste0("package:", utils::packageName())))
4327 4494
     } 
4328 4495
     
4329 4496
     return(functionParameters)
... ...
@@ -4335,7 +4502,7 @@ getParameters <- function (GRN, type = "parameter", name = "all") {
4335 4502
     } else {
4336 4503
       parameters = GRN@config[[name]]
4337 4504
       if (is.null(parameters)) {
4338
-        checkmate::assertSubset(name, names(GRN@config$parameters))
4505
+        checkmate::assertChoice(name, names(GRN@config$parameters))
4339 4506
       } 
4340 4507
       
4341 4508
       return(parameters)
... ...
@@ -4360,17 +4527,16 @@ getParameters <- function (GRN, type = "parameter", name = "all") {
4360 4527
 #' Optional convenience function to delete intermediate data from the function \code{\link{AR_classification_wrapper}} and summary statistics that may occupy a lot of space
4361 4528
 #' @export
4362 4529
 #' @template GRN
4363
-#' @return The same \code{\linkS4class{GRN}} object, with some slots being deleted (\code{GRN@data$TFs$classification} as well as \code{GRN@stats$connectionDetails.l})
4530
+#' @return An updated \code{\linkS4class{GRN}} object, with some slots being deleted (\code{GRN@data$TFs$classification} as well as \code{GRN@stats$connectionDetails.l})
4364 4531
 #' @examples 
4365 4532
 #' # See the Workflow vignette on the GRaNIE website for examples
4366
-#' # GRN = loadExampleObject()
4367
-#' # GRN = deleteIntermediateData(GRN)
4533
+#' GRN = loadExampleObject()
4534
+#' GRN = deleteIntermediateData(GRN)
4368 4535
 deleteIntermediateData <- function(GRN) {
4369 4536
   
4370
-  
4537
+  checkmate::assertClass(GRN, "GRN")
4371 4538
   GRN = .addFunctionLogToObject(GRN)
4372 4539
   
4373
-  checkmate::assertClass(GRN, "GRN")
4374 4540
   
4375 4541
   for (permutationCur in 0:.getMaxPermutation(GRN)) {
4376 4542
     
... ...
@@ -4400,7 +4566,9 @@ deleteIntermediateData <- function(GRN) {
4400 4566
 
4401 4567
 getBasic_metadata_visualization <- function(GRN, forceRerun = FALSE) {
4402 4568
   
4569
+  checkmate::assertClass(GRN, "GRN")
4403 4570
   GRN = .addFunctionLogToObject(GRN) 
4571
+  checkmate::assertFlag(forceRerun)
4404 4572
   
4405 4573
   # Get base mean expression for genes and TFs and mean accessibility from peaks
4406 4574
   
... ...
@@ -4438,7 +4606,7 @@ getBasic_metadata_visualization <- function(GRN, forceRerun = FALSE) {
4438 4606
 #' @export
4439 4607
 #' @template GRN
4440 4608
 #' @param outputDirectory Character. Default \code{.}. New output directory for all output files unless overwritten via the parameter \code{outputFolder}.
4441
-#' @return The same \code{\linkS4class{GRN}} object, with the output directory being adjusted accordingly
4609
+#' @return An updated \code{\linkS4class{GRN}} object, with the output directory being adjusted accordingly
4442 4610
 #' @examples 
4443 4611
 #' GRN = loadExampleObject()
4444 4612
 #' GRN = changeOutputDirectory(GRN, outputDirectory = ".")
... ...
@@ -124,12 +124,15 @@
124 124
   
125 125
   .printExecutionTime(start)
126 126
   
127
-  counts.norm %>% 
127
+  counts.norm = counts.norm %>% 
128 128
     as.data.frame()  %>% 
129 129
     tibble::as_tibble() %>% 
130 130
     dplyr::mutate({{idColumn}} := ids) %>%
131
-    dplyr::select({{idColumn}}, tidyselect::everything()) %>%
132
-    purrr::set_names(c(idColumn, colnames_samples))
131
+    dplyr::select({{idColumn}}, tidyselect::everything()) 
132
+  
133
+  colnames(counts.norm) = c(idColumn, colnames_samples)
134
+  
135
+  counts.norm
133 136
   
134 137
 }
135 138
 
... ...
@@ -63,9 +63,9 @@
63 63
 
64 64
 .startLogger <- function(logfile, level, removeOldLog = TRUE, appenderName = "consoleAndFile", verbose = TRUE) {
65 65
   
66
-  checkmate::assertSubset(level, c("TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL"))
66
+  checkmate::assertChoice(level, c("TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL"))
67 67
   checkmate::assertFlag(removeOldLog)
68
-  checkmate::assertSubset(appenderName, c("console", "file", "consoleAndFile"))
68
+  checkmate::assertChoice(appenderName, c("console", "file", "consoleAndFile"))
69 69
   checkmate::assertFlag(verbose)
70 70
   
71 71
   if (appenderName != "console") {
... ...
@@ -105,13 +105,61 @@
105 105
 # PACKAGE LOADING AND DETACHING FUNCTIONS #
106 106
 ###########################################
107 107
 
108
-.checkOntologyPackageInstallation <- function(pkg) {
109
-    if (!is.installed(pkg)) {
110
-        message = paste0("The package ", pkg, " is not installed, which is however needed for the chosen ontology enrichment. Please install it and re-run this function or change the ontology.")
111
-        .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
108
+.checkPackageInstallation <- function(pkg, message, isWarning = FALSE) {
109
+    
110
+    pkgMissing = c()
111
+    for (packageCur in pkg) {
112
+        if (!is.installed(packageCur)) {
113
+            pkgMissing = c(pkgMissing, packageCur)
114
+        }
115
+    }
116
+    
117
+    if (length(pkgMissing) > 0) {
118
+        message = paste0(message, "\n\nExecute the following line in R to install the missing package(s): `BiocManager::install(c(\"", 
119
+                         paste0(pkgMissing, collapse = "\",\""), 
120
+                         "\"))`")
121
+        .checkAndLogWarningsAndErrors(NULL, message, isWarning = isWarning)
122
+    }
123
+   
124
+    
125
+}
126
+
127
+
128
+.checkAndLoadPackagesGenomeAssembly <- function(genomeAssembly) {
129
+    
130
+    baseMessage = paste0("For the chosen genome assembly version, particular genome annotation packages are required but not installed. Please install and re-run this function.")
131
+
132
+    if (genomeAssembly == "hg38") {
133
+        .checkPackageInstallation(c("org.Hs.eg.db", "TxDb.Hsapiens.UCSC.hg38.knownGene", "BSgenome.Hsapiens.UCSC.hg38"), baseMessage)
134
+    } else if (genomeAssembly == "hg19") {
135
+        .checkPackageInstallation(c("org.Hs.eg.db", "TxDb.Hsapiens.UCSC.hg19.knownGene", "BSgenome.Hsapiens.UCSC.hg19"), baseMessage)
136
+    } else if (genomeAssembly == "mm10") {
137
+        .checkPackageInstallation(c("org.Mm.eg.db", "TxDb.Mmusculus.UCSC.mm10.knownGene", "BSgenome.Mmusculus.UCSC.mm10"), baseMessage)
138
+    } else if (genomeAssembly == "mm9") {
139
+        .checkPackageInstallation(c("org.Mm.eg.db", "TxDb.Mmusculus.UCSC.mm9.knownGene", "BSgenome.Mmusculus.UCSC.mm9"), baseMessage)
112 140
     }
141
+
113 142
 }
114 143
 
144
+
145
+.checkPackage_topGO_and_arguments <- function (ontology, algorithm, statistic) {
146
+    
147
+    if (length(intersect(ontology, c("GO_BP", "GO_MF", "GO_CC"))) > 0) {
148
+        
149
+        packageMessage = paste0("The package topGO is not installed but required when selecting any of the three following ontologies: GO_BP, GO_MF, GO_CC. Please install it and re-run this function or change the ontology.")
150
+        .checkPackageInstallation("topGO", packageMessage) 
151
+        
152
+        # This function is calle in the topGO:::.onAttach function and needs to be executed once otherwise
153
+        # errors like object 'GOBPTerm' of mode 'environment' was not found are thrown
154
+        suppressMessages(topGO::groupGOTerms()) 
155
+        
156
+        checkmate::assertChoice(algorithm , topGO::whichAlgorithms())
157
+        checkmate::assertChoice(statistic , topGO::whichTests())
158
+    }
159
+}
160
+
161
+
162
+
115 163
 .clearOpenDevices <- function() {
116 164
   
117 165
   while (length(grDevices::dev.list()) > 0) {
... ...
@@ -212,26 +260,11 @@
212 260
   
213 261
   res.l = list()
214 262
   
215
-  # maxCores = tryCatch(
216
-  #   {
217
-  #      out = BiocParallel::multicoreWorkers() / 2
218
-  #    
219
-  #   },
220
-  #   error=function() {
221
-  # 
222
-  #     message = "Could not retrieve the available number of cores. There might be a problem with your installation of BiocParallel. Check whether BiocParallel::multicoreWorkers() returns an integer. Try executing the following line to fix the problem if a re-installation of BiocParallel does not work: library(parallel) and then options(mc.cores=4L), with 4 being the maximum number of cores available for the machine you run the pipeline on For now, the function will just run with 1 core."
223
-  #     .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)
224
-  #     return(1)
225
-  #   }
226
-  # )    
227
-  # 
228
-  # 
229
-  # if (nCores > maxCores) {
230
-  #     nCores = max(1, floor(maxCores))
231
-  #     futile.logger::flog.warn(paste0(" Adjusted nCores down to ", nCores, " (only 50% of the cores are used at max)"))
232
-  # }
233
-  
263
+
234 264
   if (nCores > 1) {
265
+      
266
+    message = paste0("The package BiocParallel is not installed but required if more than 1 core should be used as requested. Please install it and re-run this function to speed-up the computation time or set the nCores parameter to 1 to disable parallel computation.")
267
+    .checkPackageInstallation("BiocParallel", message)
235 268
     
236 269
     res.l = tryCatch( {
237 270
         BiocParallel::bplapply(iteration, functionName, ..., BPPARAM = .initBiocParallel(nCores))
... ...
@@ -381,31 +414,18 @@
381 414
   gr
382 415
 }
383 416
 
384
-.checkAndLoadPackages <- function(packageList, verbose = FALSE) {
385
-    
386
-    for (packageCur in packageList) {
387
-        if (!is.installed(packageCur)) {
388
-            message = paste0("The suggested package \"", packageCur, "\" is not yet installed, but it is required for this function and in this context. Please install it and re-run this function.")
389
-            .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
390
-        }
391
-    }
392
-    
393
-}
417
+
394 418
 
395 419
 .getGenomeObject <- function(genomeAssembly, type = "txbd") {
396 420
     
397
-    
398
-    checkmate::assertSubset(type, c("txbd", "BSgenome", "packageName"))
399
-    checkmate::assertSubset(genomeAssembly, c("hg19","hg38", "mm9", "mm10"))
400
-    
421
+    checkmate::assertChoice(type, c("txbd", "BSgenome", "packageName"))
422
+    checkmate::assertChoice(genomeAssembly, c("hg19","hg38", "mm9", "mm10"))
401 423
     
402 424
     if (genomeAssembly == "hg38") {
403 425
 
404 426
         if (type == "txbd") {
405
-            .checkAndLoadPackages(c("org.Hs.eg.db", "TxDb.Hsapiens.UCSC.hg38.knownGene"), verbose = FALSE) 
406 427
             obj <- TxDb.Hsapiens.UCSC.hg38.knownGene::TxDb.Hsapiens.UCSC.hg38.knownGene
407 428
         } else if (type == "BSgenome") {
408
-            .checkAndLoadPackages(c("BSgenome.Hsapiens.UCSC.hg38"), verbose = FALSE) 
409 429
             obj <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
410 430
         } else {
411 431
             obj = "org.Hs.eg.db"
... ...
@@ -414,10 +434,8 @@
414 434
     } else if (genomeAssembly == "hg19") {
415 435
         
416 436
         if (type == "txbd") {
417
-            .checkAndLoadPackages(c("org.Hs.eg.db", "TxDb.Hsapiens.UCSC.hg19.knownGene"), verbose = FALSE) 
418 437
             obj <- TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene
419 438
         } else if (type == "BSgenome") {
420
-            .checkAndLoadPackages(c("BSgenome.Hsapiens.UCSC.hg19"), verbose = FALSE) 
421 439
             obj <- BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19
422 440
         } else {
423 441
             obj = "org.Hs.eg.db"
... ...
@@ -427,10 +445,8 @@
427 445
     } else if (genomeAssembly == "mm10") {
428 446
         
429 447
         if (type == "txbd") { 
430
-            .checkAndLoadPackages(c("org.Mm.eg.db", "TxDb.Mmusculus.UCSC.mm10.knownGene"), verbose = FALSE) 
431 448
             obj <- TxDb.Mmusculus.UCSC.mm10.knownGene::TxDb.Mmusculus.UCSC.mm10.knownGene
432 449
         } else if (type == "BSgenome") {
433
-            .checkAndLoadPackages(c("BSgenome.Mmusculus.UCSC.mm10"), verbose = FALSE) 
434 450
             obj <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
435 451
         } else {
436 452
             obj = "org.Mm.eg.db"
... ...
@@ -439,10 +455,8 @@
439 455
     } else if (genomeAssembly == "mm9") {
440 456
         
441 457
         if (type == "txbd") {
442
-            .checkAndLoadPackages(c("org.Mm.eg.db", "TxDb.Mmusculus.UCSC.mm9.knownGene"), verbose = FALSE) 
443 458
             obj <- TxDb.Mmusculus.UCSC.mm9.knownGene::TxDb.Mmusculus.UCSC.mm9.knownGene
444 459
         } else if (type == "BSgenome") {
445
-            .checkAndLoadPackages(c("BSgenome.Mmusculus.UCSC.mm9"), verbose = FALSE) 
446 460
             obj <- BSgenome.Mmusculus.UCSC.mm9::BSgenome.Mmusculus.UCSC.mm9
447 461
         } else {
448 462
             obj = "org.Mm.eg.db"
... ...
@@ -458,31 +472,6 @@
458 472
   GenomeInfoDb::seqlengths(txdb)
459 473
 }
460 474
 
461
-.checkAndLoadPackagesGenomeAssembly <- function(genomeAssembly) {
462
-    
463
-    checkmate::assertSubset(genomeAssembly, c("hg19","hg38", "mm9", "mm10"))
464
-    
465
-    if (genomeAssembly == "hg38") {
466
-        
467
-        packages = c("org.Hs.eg.db", "TxDb.Hsapiens.UCSC.hg38.knownGene")
468
-        
469
-    } else if (genomeAssembly == "hg19") {
470
-        
471
-        packages = c("org.Hs.eg.db", "TxDb.Hsapiens.UCSC.hg19.knownGene")
472
-        
473
-    } else if (genomeAssembly == "mm10") {
474
-        
475
-        packages = c("org.Mm.eg.db", "TxDb.Mmusculus.UCSC.mm10.knownGene")
476
-        
477
-    } else if (genomeAssembly == "mm9") {
478
-        
479
-        packages = c("org.Mm.eg.db", "TxDb.Mmusculus.UCSC.mm9.knownGene")
480
-        
481
-    }
482
-    
483
-    packages
484
-}
485
-
486 475
 .shuffleColumns <- function(df, nPermutations, returnUnshuffled = TRUE, returnAsList = TRUE, saveMemory = TRUE) {
487 476
     
488 477
     start = Sys.time()
... ...
@@ -552,7 +541,7 @@
552 541
     
553 542
 }
554 543
 
555
-
544
+#' @importFrom Matrix Matrix
556 545
 .asSparseMatrix <- function(matrix, convertNA_to_zero=TRUE, dimnames = NULL) {
557 546
   
558 547
   if (convertNA_to_zero) {
... ...
@@ -562,7 +551,7 @@
562 551
   Matrix::Matrix(matrix, sparse=TRUE, dimnames = dimnames)
563 552
 }
564 553
 
565
-
554
+#' @importFrom Matrix Matrix
566 555
 .asMatrixFromSparse <-function(matrix, convertZero_to_NA=TRUE) {
567 556
   
568 557
   if (methods::is(matrix,"dgeMatrix") | methods::is(matrix,"dgCMatrix") | methods::is(matrix,"dgRMatrix")) {
... ...
@@ -589,7 +578,7 @@
589 578
 
590 579
 .read_tidyverse_wrapper <- function(file, type = "tsv", ncolExpected = NULL, minRows = 0, verbose = TRUE, ...) {
591 580
   
592
-  checkmate::assertSubset(type, c("csv", "csv2", "tsv", "delim"))
581
+  checkmate::assertChoice(type, c("csv", "csv2", "tsv", "delim"))
593 582
   
594 583
   start = Sys.time()
595 584
   if (verbose) futile.logger::flog.info(paste0("Reading file ", file))
... ...
@@ -720,11 +709,8 @@ is.installed <- function(mypkg){
720 709
   }
721 710
 }
722 711
 
723
-
724 712
 .getBiomartParameters <- function(genomeAssembly) {
725 713
     
726
-    .checkOntologyPackageInstallation("biomaRt")
727
-    
728 714
     if (grepl(x = genomeAssembly, pattern = "^hg\\d\\d")){
729 715
         dataset = "hsapiens_gene_ensembl"
730 716
         if (genomeAssembly == "hg38") {
... ...
@@ -1,5 +1,7 @@
1 1
 ######## GRN graph functions ########
2 2
 #' Builds a graph out of a set of connections
3
+#' 
4
+#' This function requires a filtered set of connections in the \code{\linkS4class{GRN}} object as generated by \code{\link{filterGRNAndConnectGenes}}
3 5
 #'
4 6
 #' @template GRN
5 7
 #' @param model_TF_gene_nodes_separately \code{TRUE} or \code{FALSE}.  Default \code{FALSE}. Should TF and gene nodes be modeled separately? If set to \code{TRUE},this may lead to unwanted effects in case of TF-TF connections (i.e., a TF regulating another TF)
... ...
@@ -12,11 +14,13 @@
12 14
 #' # See the Workflow vignette on the GRaNIE website for examples
13 15
 #' GRN = loadExampleObject()
14 16
 #' GRN = build_eGRN_graph(GRN, forceRerun = FALSE)
15
-#' @return The same \code{\linkS4class{GRN}} object.
17
+#' @return An updated \code{\linkS4class{GRN}} object, with the graph(s) being stored in the slot `graph` (i.e., `GRN@graph` for both TF-gene and TF-peak-gene graphs)
16 18
 build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE, 
17 19
                              allowLoops = FALSE, removeMultiple = FALSE, directed = FALSE, forceRerun = FALSE) {
18 20
   
21
+  start = Sys.time()  
19 22
   checkmate::assertClass(GRN, "GRN")
23
+  GRN = .addFunctionLogToObject(GRN)
20 24
   checkmate::assertFlag(model_TF_gene_nodes_separately)
21 25
   checkmate::assertFlag(allowLoops)
22 26
   checkmate::assertFlag(removeMultiple)
... ...
@@ -92,6 +96,7 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE,
92 96
     
93 97
   }
94 98
   
99
+  .printExecutionTime(start)
95 100
   
96 101
   GRN
97 102
   
... ...
@@ -189,21 +194,34 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE,
189 194
 }
190 195
 
191 196
 
192
-#' Perform all network-related statistical and descriptive analyses, including community and enrichment analyses. 
197
+#' Perform all network-related statistical and descriptive analyses, including community and enrichment analyses. See the functions it executes in the @seealso section below.
193 198
 #' 
194
-#' A convenience function that calls all network-related functions in one-go, using selected default parameters and a set of adjustable ones also. For full adjustment, run the individual functions separately.
199
+#' A convenience function that calls all network-related functions in one-go, using selected default parameters and a set of adjustable ones also. 
200
+#' For full adjustment, run the individual functions separately. 
201
+#' This function requires a filtered set of connections in the \code{\linkS4class{GRN}} object as generated by \code{\link{filterGRNAndConnectGenes}}
202
+
195 203
 #'
196 204
 #' @inheritParams calculateGeneralEnrichment
197 205
 #' @inheritParams plotCommunitiesStats
198 206
 #' @inheritParams plotCommunitiesEnrichment
199 207
 #' @inheritParams calculateCommunitiesStats
200 208
 #' @template maxWidth_nchar_plot
209
+#' @seealso \code{\link{build_eGRN_graph}}
210
+#' @seealso \code{\link{plotGeneralGraphStats}} 
211
+#' @seealso \code{\link{calculateGeneralEnrichment}} 
212
+#' @seealso \code{\link{plotGeneralEnrichment}} 
213
+#' @seealso \code{\link{calculateCommunitiesStats}} 
214
+#' @seealso \code{\link{plotCommunitiesStats}} 
215
+#' @seealso \code{\link{calculateCommunitiesEnrichment}} 
216
+#' @seealso \code{\link{plotCommunitiesEnrichment}} 
217
+#' @seealso \code{\link{calculateTFEnrichment}} 
218
+#' @seealso \code{\link{plotTFEnrichment}} 
201 219
 #' @export
202 220
 #' @examples 
203 221
 #' # See the Workflow vignette on the GRaNIE website for examples
204 222
 #' # GRN = loadExampleObject()
205 223
 #' # GRN = performAllNetworkAnalyses(GRN, outputFolder = ".", forceRerun = FALSE)
206
-#' @return The same \code{\linkS4class{GRN}} object, with added data from this function.
224
+#' @return An updated \code{\linkS4class{GRN}} object, with added data from this function.
207 225
 performAllNetworkAnalyses <- function(GRN, ontology = c("GO_BP", "GO_MF"), 
208 226
                                       algorithm = "weight01", statistic = "fisher",
209 227
                                       background = "neighborhood", 
... ...
@@ -259,7 +277,7 @@ performAllNetworkAnalyses <- function(GRN, ontology = c("GO_BP", "GO_MF"),
259 277
 # Retrieve set of background genes (as vector) used for enrichment analyses from a GRN object
260 278
 .getBackgroundGenes <-function(GRN, type = "neighborhood") {
261 279
   
262
-  checkmate::assertSubset(type, c("all_annotated", "all_RNA", "neighborhood"))
280
+  checkmate::assertChoice(type, c("all_annotated", "all_RNA", "neighborhood"), empty.ok = FALSE)
263 281
   
264 282
   if (type == "all_annotated") {
265 283
     
... ...
@@ -286,41 +304,56 @@ performAllNetworkAnalyses <- function(GRN, ontology = c("GO_BP", "GO_MF"),
286 304
 }
287 305
 
288 306
 
289
-#' Run an enrichment analysis for the genes in the filtered \code{\linkS4class{GRN}}
307
+#' Run an enrichment analysis for the genes in the whole network in the filtered \code{\linkS4class{GRN}} object
308
+#' 
309
+#' The enrichment analysis is based on the whole network, see \code{\link{calculateCommunitiesEnrichment}} and \code{\link{calculateTFEnrichment}} for 
310
+#' community- and TF-specific enrichment, respectively.
311
+#' This function requires the existence of the eGRN graph in the \code{\linkS4class{GRN}} object as produced by \code{\link{build_eGRN_graph}}.
312
+#' Results can subsequently be visualized with the function \code{\link{plotGeneralEnrichment}}.
313
+#' 
314
+#' All enrichment functions use the TF-gene graph as defined in the `GRN` object. See the `ontology` argument for currently supported ontologies.
315
+#' Also note that some parameter combinations for `algorithm` and `statistic` are incompatible, an error message will be thrown in such a case.
290 316
 #' 
291
-#' This function runs an enrichment analysis for the genes in the filtered network.
292 317
 #' @template GRN
293
-#' @param ontology Character vector of ontologies. Default \code{c("GO_BP", "GO_MF")}. Valid values are \code{"GO_BP"}, \code{"GO_MF"}, \code{"GO_CC"}, \code{"KEGG"}, \code{"DO"}, and \code{"Reactome"}, referring to GO Biological Process, GO Molecular Function, GO Cellular Component, KEGG, Disease Ontology, and Reactome Pathways.
318
+#' @param ontology Character vector of ontologies. Default \code{c("GO_BP", "GO_MF")}. 
319
+#' Valid values are \code{"GO_BP"}, \code{"GO_MF"}, \code{"GO_CC"}, \code{"KEGG"}, \code{"DO"}, and \code{"Reactome"}, 
320
+#' referring to \emph{GO Biological Process}, \emph{GO Molecular Function}, \emph{GO Cellular Component}, \emph{KEGG}, \emph{Disease Ontology}, 
321
+#' and \emph{Reactome Pathways}, respectively. \code{GO} ontologies require the \code{topGO}, 
322
+#' \code{"KEGG"} the \code{clusterProfiler}, \code{"DO"} the \code{DOSE}, and \code{"Reactome"} the \code{ReactomePA} packages, respectively.
323
+#' As they are listed under \code{Suggests}, they may not yet be installed, and the function will throw an error if they are missing.
294 324
 #' @param algorithm Character. Default \code{"weight01"}. One of: \code{"classic"}, \code{"elim"}, \code{"weight"}, \code{"weight01"}, \code{"lea"}, \code{"parentchild"}. Only relevant if ontology is GO related (GO_BP, GO_MF, GO_CC), ignored otherwise. Name of the algorithm that handles the GO graph structures. Valid inputs are those supported by the \code{topGO} library.
295 325
 #' @param statistic Character. Default \code{"fisher"}. One of: \code{"fisher"}, \code{"ks"}, \code{"t"}, \code{"globaltest"}, \code{"sum"}, \code{"ks.ties"}. Statistical test to be used. Only relevant if ontology is GO related (GO_BP, GO_MF, GO_CC), and valid inputs are those supported by the topGO library, ignored otherwise. For the other ontologies the test statistic is always Fisher. 
296 326
 #' @param background Character. Default \code{"neighborhood"}. One of: \code{"all_annotated"}, \code{"all_RNA"}, \code{"neighborhood"}. Set of genes to be used to construct the background for the enrichment analysis. This can either be all annotated genes in the reference genome (all_annotated), all differentially expressed genes (all_RNA), or all the genes that are within the neighborhood of a peak in the GRN (neighborhood)
297 327
 #' @param pAdjustMethod Character. Default \code{"BH"}. One of: \code{"holm"}, \code{"hochberg"}, \code{"hommel"}, \code{"bonferroni"}, \code{"BH"}, \code{"BY"}, \code{"fdr"}. This parameter is only relevant for the following ontologies: KEGG, DO, Reactome. For the other ontologies, the algorithm serves as an adjustment.
298 328
 #' @template forceRerun
299
-#' @return The same \code{\linkS4class{GRN}} object, with the enrichment results stored in the \code{stats$Enrichment$general} slot.
329
+#' @return An updated \code{\linkS4class{GRN}} object, with the enrichment results stored in the \code{stats$Enrichment$general} slot.
300 330
 #' @seealso \code{\link{plotGeneralEnrichment}}
301 331
 #' @seealso \code{\link{calculateCommunitiesEnrichment}}
332
+#' @seealso \code{\link{calculateTFEnrichment}}
302 333
 #' @seealso \code{\link{plotCommunitiesEnrichment}}
303 334
 #' @examples 
304 335
 #' # See the Workflow vignette on the GRaNIE website for examples
305 336
 #' GRN =  loadExampleObject()
306 337
 #' GRN =  calculateGeneralEnrichment(GRN, ontology = "GO_BP", forceRerun = FALSE)
307 338
 #' @export