Browse code

Code cleanup and careful check for potential issues

Christian Arnold authored on 14/09/2022 14:19:35
Showing22 changed files

... ...
@@ -88,5 +88,5 @@ License: Artistic-2.0
88 88
 LazyData: false
89 89
 URL: https://grp-zaugg.embl-community.io/GRaNIE
90 90
 BugReports: https://git.embl.de/grp-zaugg/GRaNIE/issues
91
-RoxygenNote: 7.2.0
91
+RoxygenNote: 7.2.1
92 92
 Config/testthat/edition: 3
... ...
@@ -2,16 +2,15 @@
2 2
 
3 3
 ## Major changes
4 4
 
5
-- many Documentation and R help updates, Package Details Vignette is online
5
+- many Documentation and R help updates, the [Package Details Vignette](https://grp-zaugg.embl-community.io/GRaNIE/articles/GRaNIE_packageDetails.html#installation) is online
6 6
 - The workflow vignette is now improved: better figure resolution, figure aspect ratios are optimized, and a few other changes
7 7
 - the eGRN graph structure as built by `build_eGRN_graph()` in the `GRaNIE` object is now reset whenever the function `filterGRNAndConnectGenes()` is successfully executed to make sure that enrichment functions etc are not using an outdated graph structure. 
8 8
 - the landing page of the website has been extended and overhauled
9
-- removed some dependency packages and moved others into `Suggests` to lower the installation burden of the package. In addition, removed `topGO` from the `Depends` section (now in `Suggests`) and removed `tidyverse` altogether (before in `Depends`). Detailed explanations when and how the packages listed under `Suggests` are needed can now be found in the new [Package Details Vignette](https://grp-zaugg.embl-community.io/GRaNIE/articles/GRaNIE_packageDetails.html#installation).
10
-- new helper function `installSuggestedPackages()` to install all packages listed under `Suggests`
9
+- removed some dependency packages and moved others into `Suggests` to lower the installation burden of the package. In addition, removed `topGO` from the `Depends` section (now in `Suggests`) and removed `tidyverse` altogether (before in `Depends`). Detailed explanations when and how the packages listed under `Suggests` are needed can now be found in the new [Package Details Vignette](https://grp-zaugg.embl-community.io/GRaNIE/articles/GRaNIE_packageDetails.html#installation) and are clearly given to the user when executing the respective functions
11 10
 
12 11
 ## Minor changes
13 12
 
14
-- many small changes in the code, better argument checking, and preparing rigorous unit test inclusion
13
+- many small changes in the code, updated argument checking, and preparing rigorous unit test inclusion
15 14
 - internally renaming the (recently changed / renamed) gene type `lncRNA` from `biomaRt` to `lincRNA` to be compatible with older versions of `GRaNIE`
16 15
 
17 16
 
... ...
@@ -202,7 +202,7 @@ setMethod("show",
202 202
                 cat(" Communities (TF-gene):\n")
203 203
                 df = igraph::vertex.attributes(GRN@graph[["TF_gene"]]$graph)
204 204
                 if (!is.null(df) & "community" %in% names(df)) {
205
-                    communities = df %>% as.data.frame() %>% dplyr::count(community) %>% dplyr::arrange(dplyr::desc(.data$n))
205
+                    communities = df %>% as.data.frame() %>% dplyr::count(.data$community) %>% dplyr::arrange(dplyr::desc(.data$n))
206 206
                     cat("  Communities, sorted by size (n = Number of nodes): ", paste0(communities$community, " (n=", communities$n, collapse = "), "), ")\n", sep = "")
207 207
                 } else {
208 208
                     cat("  None found\n")
... ...
@@ -265,7 +265,7 @@ nPeaks <- function(GRN, filter = TRUE) {
265 265
   }
266 266
   
267 267
   if (filter) {
268
-    nPeaks = GRN@data$peaks$consensusPeaks %>% dplyr::filter(!isFiltered) %>% nrow()
268
+    nPeaks = GRN@data$peaks$consensusPeaks %>% dplyr::filter(!.data$isFiltered) %>% nrow()
269 269
   } else {
270 270
     nPeaks = GRN@data$peaks$consensusPeaks %>% nrow()
271 271
   }
... ...
@@ -301,7 +301,7 @@ nGenes <- function(GRN, filter = TRUE) {
301 301
   }
302 302
   
303 303
   if (filter) {
304
-    nGenes = GRN@data$RNA$counts_norm.l[["0"]] %>% dplyr::filter(!isFiltered) %>% nrow()
304
+    nGenes = GRN@data$RNA$counts_norm.l[["0"]] %>% dplyr::filter(!.data$isFiltered) %>% nrow()
305 305
   } else {
306 306
     nGenes = GRN@data$RNA$counts_norm.l[["0"]] %>% nrow()
307 307
   }
... ...
@@ -226,7 +226,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
226 226
     if (nDuplicateRows > 0) {
227 227
       futile.logger::flog.warn(paste0(" Found ", nDuplicateRows, " duplicate rows in RNA-Seq data, consolidating them by summing them up."))
228 228
       counts_rna = counts_rna %>%
229
-        dplyr::group_by(ENSEMBL) %>%
229
+        dplyr::group_by(.data$ENSEMBL) %>%
230 230
         dplyr::summarise_if(is.numeric, sum) 
231 231
       # dplyr::summarise_if(is.numeric, sum, .groups = 'drop') # the .drop caused an error with dplyr 1.0.5
232 232
     }
... ...
@@ -243,8 +243,8 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
243 243
     GRN@config$TF_peak_connectionTypes = "expression"
244 244
     
245 245
     # Make sure ENSEMBL is the first column
246
-    countsRNA.norm.df = dplyr::select(countsRNA.norm.df, ENSEMBL, tidyselect::everything())
247
-    countsPeaks.norm.df = dplyr::select(countsPeaks.norm.df, peakID, tidyselect::everything())
246
+    countsRNA.norm.df = dplyr::select(countsRNA.norm.df, .data$ENSEMBL, tidyselect::everything())
247
+    countsPeaks.norm.df = dplyr::select(countsPeaks.norm.df, .data$peakID, tidyselect::everything())
248 248
     
249 249
     samples_rna  = colnames(countsRNA.norm.df)
250 250
     samples_peaks =  colnames(countsPeaks.norm.df)
... ...
@@ -262,7 +262,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
262 262
     if (!is.null(sampleMetadata)) {
263 263
       
264 264
       futile.logger::flog.info("Parsing provided metadata...")
265
-      GRN@data$metadata = sampleMetadata %>% dplyr::distinct() %>% tibble::set_tidy_names(syntactic = TRUE, quiet = TRUE)
265
+      GRN@data$metadata = sampleMetadata %>% dplyr::distinct() %>% tibble::tibble(.name_repair = "universal")
266 266
       
267 267
       # Force the first column to be the ID column
268 268
       if ("sampleID" %in% colnames(GRN@data$metadata)) {
... ...
@@ -288,12 +288,12 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
288 288
     }
289 289
     
290 290
     GRN@data$metadata =  GRN@data$metadata %>%
291
-      dplyr::mutate(has_RNA = sampleID  %in% samples_rna,
292
-                    has_peaks = sampleID %in% samples_peaks,
293
-                    has_both = has_RNA & has_peaks
291
+      dplyr::mutate(has_RNA = .data$sampleID  %in% samples_rna,
292
+                    has_peaks = .data$sampleID %in% samples_peaks,
293
+                    has_both = .data$has_RNA & .data$has_peaks
294 294
       )
295 295
     
296
-    GRN@config$sharedSamples = dplyr::filter(GRN@data$metadata, has_both) %>% dplyr::pull(sampleID) %>% as.character()
296
+    GRN@config$sharedSamples = dplyr::filter(GRN@data$metadata, .data$has_both) %>% dplyr::pull(.data$sampleID) %>% as.character()
297 297
     
298 298
     counts_peaks = as.data.frame(counts_peaks)
299 299
     counts_rna = as.data.frame(counts_rna)
... ...
@@ -306,7 +306,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
306 306
     
307 307
     if (isIntegerMatrix(counts_peaks[, GRN@config$sharedSamples])) {
308 308
       GRN@data$peaks$counts_orig = DESeq2::DESeqDataSetFromMatrix(counts_peaks[, GRN@config$sharedSamples], 
309
-                                                                  colData = dplyr::filter(GRN@data$metadata, has_both) %>% tibble::column_to_rownames("sampleID"),
309
+                                                                  colData = dplyr::filter(GRN@data$metadata, .data$has_both) %>% tibble::column_to_rownames("sampleID"),
310 310
                                                                   design = ~1)
311 311
     } else {
312 312
       
... ...
@@ -315,7 +315,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
315 315
     
316 316
     if (isIntegerMatrix(counts_rna[, GRN@config$sharedSamples])) {
317 317
       GRN@data$RNA$counts_orig = DESeq2::DESeqDataSetFromMatrix(counts_rna[, GRN@config$sharedSamples], 
318
-                                                                colData = dplyr::filter(GRN@data$metadata, has_both) %>% tibble::column_to_rownames("sampleID"),
318
+                                                                colData = dplyr::filter(GRN@data$metadata, .data$has_both) %>% tibble::column_to_rownames("sampleID"),
319 319
                                                                 design = ~1)   
320 320
     } else {
321 321
       GRN@data$RNA$counts_orig = as.matrix(counts_rna[, GRN@config$sharedSamples])
... ...
@@ -442,15 +442,15 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
442 442
     }
443 443
     
444 444
     geneAnnotation %>%
445
-        as_tibble() %>%
446
-        dplyr::filter(stringr::str_length(chromosome_name) <= 5) %>%
447
-        dplyr::mutate(chromosome_name = paste0("chr", chromosome_name)) %>%
448
-        dplyr::rename(gene.chr = chromosome_name, gene.start = start_position, gene.end = end_position, 
449
-                      gene.strand = strand, gene.ENSEMBL = ensembl_gene_id, gene.type = gene_biotype, gene.name = external_gene_name) %>%
445
+        tibble::as_tibble() %>%
446
+        dplyr::filter(stringr::str_length(.data$chromosome_name) <= 5) %>%
447
+        dplyr::mutate(chromosome_name = paste0("chr", .data$chromosome_name)) %>%
448
+        dplyr::rename(gene.chr = .data$chromosome_name, gene.start = .data$start_position, gene.end = .data$end_position, 
449
+                      gene.strand = .data$strand, gene.ENSEMBL = .data$ensembl_gene_id, gene.type = .data$gene_biotype, gene.name = .data$external_gene_name) %>%
450 450
         tidyr::replace_na(list(gene.type = "unknown")) %>%
451 451
         dplyr::mutate_if(is.character, as.factor) %>%
452
-        dplyr::mutate(gene.type = dplyr::recode_factor(gene.type, lncRNA = "lincRNA")) %>%  # there seems to be a name change from lincRNA -> lncRNA, lets change it here 
453
-        dplyr::mutate(gene.strand = factor(gene.strand, levels = c(1,-1), labels = c("+", "-")))
452
+        dplyr::mutate(gene.type = dplyr::recode_factor(.data$gene.type, lncRNA = "lincRNA")) %>%  # there seems to be a name change from lincRNA -> lncRNA, lets change it here 
453
+        dplyr::mutate(gene.strand = factor(.data$gene.strand, levels = c(1,-1), labels = c("+", "-")))
454 454
     
455 455
 }
456 456
 
... ...
@@ -473,7 +473,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
473 473
   
474 474
   if (!is.null(gene.types)) {
475 475
     if (! "all" %in% gene.types) {
476
-      genes.filt.df = dplyr::filter(genes.filt.df, gene.type %in% gene.types)
476
+      genes.filt.df = dplyr::filter(genes.filt.df, .data$gene.type %in% gene.types)
477 477
     }
478 478
   }
479 479
   
... ...
@@ -488,7 +488,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
488 488
   
489 489
   futile.logger::flog.info(paste0(" Calculate statistics for each peak (mean and CV)"))
490 490
   
491
-  countsPeaks.m = as.matrix(dplyr::select(countsPeaks.clean, -peakID))
491
+  countsPeaks.m = as.matrix(dplyr::select(countsPeaks.clean, -.data$peakID))
492 492
   
493 493
   rowMeans_peaks   = rowMeans(countsPeaks.m)
494 494
   rowMedians_peaks = matrixStats::rowMedians(countsPeaks.m)
... ...
@@ -509,7 +509,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
509 509
     
510 510
     futile.logger::flog.info(paste0(" Retrieve peak annotation using ChipSeeker. This may take a while"))
511 511
     genomeAssembly = GRN@config$parameters$genomeAssembly
512
-    consensusPeaks     = GRN@data$peaks$consensusPeaks %>% dplyr::filter(!isFiltered)
512
+    consensusPeaks     = GRN@data$peaks$consensusPeaks %>% dplyr::filter(!.data$isFiltered)
513 513
     consensusPeaks.gr  = .constructGRanges(consensusPeaks, seqlengths = .getChrLengths(genomeAssembly), genomeAssembly)
514 514
     
515 515
     # Add ChIPSeeker anotation
... ...
@@ -538,22 +538,22 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
538 538
     
539 539
     GRN@annotation$consensusPeaks = dplyr::left_join(GRN@annotation$consensusPeaks, 
540 540
                                                      peaks.annotated.df  %>% 
541
-                                                       dplyr::select(peakID, annotation, tidyselect::starts_with("gene"), -geneId, distanceToTSS, ENSEMBL, SYMBOL, GENENAME) %>%
542
-                                                       dplyr::mutate(annotation  = as.factor(annotation), 
543
-                                                                     ENSEMBL = as.factor(ENSEMBL), 
544
-                                                                     GENENAME = as.factor(GENENAME),
545
-                                                                     SYMBOL = as.factor(SYMBOL)),
541
+                                                       dplyr::select(.data$peakID, .data$annotation, tidyselect::starts_with("gene"), -.data$geneId, .data$distanceToTSS, .data$ENSEMBL, .data$SYMBOL, .data$GENENAME) %>%
542
+                                                       dplyr::mutate(annotation  = as.factor(.data$annotation), 
543
+                                                                     ENSEMBL = as.factor(.data$ENSEMBL), 
544
+                                                                     GENENAME = as.factor(.data$GENENAME),
545
+                                                                     SYMBOL = as.factor(.data$SYMBOL)),
546 546
                                                      by = c("peak.ID" = "peakID")) %>%
547
-      dplyr::rename(peak.gene.chr = geneChr,
548
-                    peak.gene.start = geneStart, 
549
-                    peak.gene.end = geneEnd, 
550
-                    peak.gene.length = geneLength, 
551
-                    peak.gene.strand = geneStrand, 
552
-                    peak.gene.name = GENENAME,
553
-                    peak.gene.distanceToTSS = distanceToTSS,
554
-                    peak.gene.ENSEMBL = ENSEMBL,
555
-                    peak.gene.symbol = SYMBOL,
556
-                    peak.annotation = annotation
547
+      dplyr::rename(peak.gene.chr = .data$geneChr,
548
+                    peak.gene.start = .data$geneStart, 
549
+                    peak.gene.end = .data$geneEnd, 
550
+                    peak.gene.length = .data$geneLength, 
551
+                    peak.gene.strand = .data$geneStrand, 
552
+                    peak.gene.name = .data$GENENAME,
553
+                    peak.gene.distanceToTSS = .data$distanceToTSS,
554
+                    peak.gene.ENSEMBL = .data$ENSEMBL,
555
+                    peak.gene.symbol = .data$SYMBOL,
556
+                    peak.annotation = .data$annotation
557 557
       )
558 558
     
559 559
     
... ...
@@ -575,7 +575,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
575 575
   
576 576
   countsRNA.clean  = getCounts(GRN, type = "rna", norm = TRUE, permuted = FALSE)
577 577
   
578
-  countsRNA.m = as.matrix(dplyr::select(countsRNA.clean, -ENSEMBL))
578
+  countsRNA.m = as.matrix(dplyr::select(countsRNA.clean, -.data$ENSEMBL))
579 579
   
580 580
   rowMeans_rna = rowMeans(countsRNA.m)
581 581
   rowMedians_rna = matrixStats::rowMedians(countsRNA.m)
... ...
@@ -588,7 +588,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
588 588
                                 gene.median = rowMedians_rna, 
589 589
                                 gene.CV = CV_rna) %>%
590 590
     dplyr::full_join(genomeAnnotation.df, by = c("gene.ENSEMBL")) %>%
591
-    dplyr::mutate(gene.type = forcats::fct_explicit_na(gene.type, na_level = "unknown/missing"))
591
+    dplyr::mutate(gene.type = forcats::fct_explicit_na(.data$gene.type, na_level = "unknown/missing"))
592 592
   
593 593
   GRN@annotation$genes = metadata_rna
594 594
   
... ...
@@ -626,24 +626,24 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
626 626
     tibble::as_tibble() %>%
627 627
     dplyr::mutate(length = GenomicRanges::width(query),
628 628
                   peak.ID = query$peakID,
629
-                  GC_class = cut(`G|C`, breaks = seq(0,1,0.1), include.lowest = TRUE, ordered_result = TRUE))
629
+                  GC_class = cut(.data$`G|C`, breaks = seq(0,1,0.1), include.lowest = TRUE, ordered_result = TRUE))
630 630
   
631 631
   GC_classes.df = GC_content.df %>%
632
-    dplyr::group_by(GC_class) %>%
632
+    dplyr::group_by(.data$GC_class) %>%
633 633
     dplyr::summarise(n= dplyr::n(), peak_width_mean = mean(length), peak_width_sd = sd(length)) %>%
634 634
     dplyr::ungroup() %>% 
635
-    tidyr::complete(GC_class, fill = list(n = 0)) %>%
635
+    tidyr::complete(.data$GC_class, fill = list(n = 0)) %>%
636 636
     dplyr::mutate(n_rel = .data$n / length(query))
637 637
   
638 638
   # TODO: Put where
639
-  #ggplot(GC_content.df, aes(GC.class)) + geom_histogram(stat = "count") + theme_bw()
639
+  #ggplot2::ggplot(GC_content.df, ggplot2::aes(GC.class)) + geom_histogram(stat = "count") + ggplot2::theme_bw()
640 640
   
641
-  #ggplot(GC_classes.df , aes(GC.class, n_rel)) + geom_bar(stat = "identity") + theme_bw()
641
+  #ggplot2::ggplot(GC_classes.df , ggplot2::aes(GC.class, n_rel)) + geom_bar(stat = "identity") + ggplot2::theme_bw()
642 642
   
643 643
   GRN@annotation$consensusPeaks = dplyr::left_join(GRN@annotation$consensusPeaks, GC_content.df, by = "peak.ID") %>%
644
-    dplyr::rename( peak.GC.perc    = `G|C`,
645
-                   peak.width      = length,
646
-                   peak.GC.class   = GC_class)
644
+    dplyr::rename( peak.GC.perc    = .data$`G|C`,
645
+                   peak.width      = .data$length,
646
+                   peak.GC.class   = .data$GC_class)
647 647
   
648 648
   GRN@stats$peaks = list()
649 649
   GRN@stats$peaks$GC = GC_classes.df
... ...
@@ -749,7 +749,7 @@ filterData <- function (GRN,
749 749
     futile.logger::flog.info(paste0(" Number of rows in total: ", nrow(GRN@data$RNA$counts_norm.l[["0"]])))
750 750
     for (permutationCur in c(0)) {
751 751
       permIndex = as.character(permutationCur)
752
-      rowMeans = rowMeans(dplyr::select(GRN@data$RNA$counts_norm.l[[permIndex]], -ENSEMBL))
752
+      rowMeans = rowMeans(dplyr::select(GRN@data$RNA$counts_norm.l[[permIndex]], -.data$ENSEMBL))
753 753
       GRN@data$RNA$counts_norm.l[[permIndex]]$isFiltered = rowMeans < minNormalizedMeanRNA 
754 754
     }
755 755
     nRowsFlagged = length(which(GRN@data$RNA$counts_norm.l[["0"]]$isFiltered))
... ...
@@ -816,12 +816,12 @@ filterData <- function (GRN,
816 816
   if (is.null(maxCV)) {
817 817
     futile.logger::flog.info(paste0("Filter peaks by CV: Min CV = ", minCV))
818 818
     
819
-    peaksFiltered = dplyr::filter(GRN@annotation$consensusPeaks, peak.CV >= minCV)
819
+    peaksFiltered = dplyr::filter(GRN@annotation$consensusPeaks, .data$peak.CV >= minCV)
820 820
     
821 821
   } else {
822 822
     futile.logger::flog.info(paste0("Filter peaks by CV: Min CV = ", minCV, ", max CV = ", maxCV))
823 823
     
824
-    peaksFiltered = dplyr::filter(GRN@annotation$consensusPeaks, peak.CV >= minCV, peak.CV <= maxCV)
824
+    peaksFiltered = dplyr::filter(GRN@annotation$consensusPeaks, .data$peak.CV >= minCV, .data$peak.CV <= maxCV)
825 825
   }
826 826
   
827 827
   futile.logger::flog.info(paste0(" Number of peaks after filtering : ", nrow(peaksFiltered)))
... ...
@@ -865,8 +865,8 @@ filterData <- function (GRN,
865 865
   
866 866
   
867 867
   peaksFiltered = dplyr::filter(GRN@annotation$consensusPeaks, 
868
-                                peak.CV >= minCV, peak.CV <= maxCV, 
869
-                                peak.mean >= minMean, peak.mean <= maxMean)
868
+                                .data$peak.CV >= minCV, .data$peak.CV <= maxCV, 
869
+                                .data$peak.mean >= minMean, .data$peak.mean <= maxMean)
870 870
   
871 871
   futile.logger::flog.info(paste0(" Number of peaks after filtering : ", nrow(peaksFiltered)))
872 872
   
... ...
@@ -912,8 +912,8 @@ filterData <- function (GRN,
912 912
   
913 913
   
914 914
   genesFiltered = dplyr::filter(GRN@annotation$genes, 
915
-                                gene.CV >= minCV, gene.CV <= maxCV, 
916
-                                gene.mean >= minMean, gene.mean <= maxMean)
915
+                                .data$gene.CV >= minCV, .data$gene.CV <= maxCV, 
916
+                                .data$gene.mean >= minMean, .data$gene.mean <= maxMean)
917 917
   
918 918
   
919 919
   futile.logger::flog.info(paste0(" Number of genes after filtering : ", nrow(genesFiltered)))
... ...
@@ -962,7 +962,7 @@ addTFBS <- function(GRN, motifFolder, TFs = "all", nTFMax = NULL, filesTFBSPatte
962 962
     
963 963
     GRN@data$TFs$translationTable = GRN@data$TFs$translationTable %>%
964 964
       dplyr::select(c("HOCOID", "ENSEMBL")) %>%
965
-      dplyr::mutate(TF.name = HOCOID, TF.ENSEMBL = ENSEMBL) 
965
+      dplyr::mutate(TF.name = .data$HOCOID, TF.ENSEMBL = .data$ENSEMBL) 
966 966
     
967 967
     # TODO: Change here and make it more logical what to put where
968 968
     GRN@config$allTF = GRN@data$TFs$translationTable$TF.name
... ...
@@ -1013,14 +1013,16 @@ addTFBS <- function(GRN, motifFolder, TFs = "all", nTFMax = NULL, filesTFBSPatte
1013 1013
   file_input_HOCOMOCO = paste0(folder_input_TFBS, .Platform$file.sep, "translationTable.csv")
1014 1014
   HOCOMOCO_mapping.df = .readHOCOMOCOTable(file_input_HOCOMOCO, delim = " ")
1015 1015
   
1016
-  TF_notExpressed = sort(dplyr::filter(HOCOMOCO_mapping.df, ! ENSEMBL %in% countsRNA$ENSEMBL, HOCOID %in% TFsWithTFBSPredictions) %>% dplyr::pull(HOCOID))
1016
+  TF_notExpressed = sort(dplyr::filter(HOCOMOCO_mapping.df, ! .data$ENSEMBL %in% countsRNA$ENSEMBL, .data$HOCOID %in% TFsWithTFBSPredictions) %>% dplyr::pull(.data$HOCOID))
1017 1017
   
1018 1018
   if (length(TF_notExpressed) > 0) {
1019 1019
     futile.logger::flog.info(paste0("Filtering the following ", length(TF_notExpressed), " TFs as they are not present in the RNA-Seq data: ", paste0(TF_notExpressed, collapse = ",")))
1020 1020
     
1021 1021
   }
1022 1022
   
1023
-  allTF = sort(dplyr::filter(HOCOMOCO_mapping.df, ENSEMBL %in% countsRNA$ENSEMBL, HOCOID %in% TFsWithTFBSPredictions) %>% dplyr::pull(HOCOID))
1023
+  allTF = sort(dplyr::filter(HOCOMOCO_mapping.df, 
1024
+                             .data$ENSEMBL %in% countsRNA$ENSEMBL, 
1025
+                             .data$HOCOID %in% TFsWithTFBSPredictions) %>% dplyr::pull(.data$HOCOID))
1024 1026
   
1025 1027
   nTF = length(allTF)
1026 1028
   if (nTF == 0) {
... ...
@@ -1041,7 +1043,7 @@ addTFBS <- function(GRN, motifFolder, TFs = "all", nTFMax = NULL, filesTFBSPatte
1041 1043
   nTF = length(allTF)
1042 1044
   futile.logger::flog.info(paste0("Running the pipeline for ", nTF, " TF in total."))
1043 1045
   
1044
-  HOCOMOCO_mapping.df.exp = dplyr::filter(HOCOMOCO_mapping.df, HOCOID %in% allTF)
1046
+  HOCOMOCO_mapping.df.exp = dplyr::filter(HOCOMOCO_mapping.df, .data$HOCOID %in% allTF)
1045 1047
   if (nrow(HOCOMOCO_mapping.df.exp) == 0) {
1046 1048
     message = paste0("Number of rows of HOCOMOCO_mapping.df.exp is 0. Something is wrong with the mapping table or the filtering")
1047 1049
     .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
... ...
@@ -1125,13 +1127,13 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) {
1125 1127
     }
1126 1128
     
1127 1129
     # new 
1128
-    filteredPeaks = dplyr::filter(GRN@data$peaks$counts_norm, isFiltered) %>% dplyr::pull(peakID)
1130
+    filteredPeaks = dplyr::filter(GRN@data$peaks$counts_norm, .data$isFiltered) %>% dplyr::pull(.data$peakID)
1129 1131
     # Collect binary 0/1 binding matrix from all TF and concatenate
1130 1132
     GRN@data$TFs$TF_peak_overlap = TFBS_bindingMatrix.df %>%
1131 1133
       dplyr::mutate(peakID = GRN@data$peaks$consensusPeaks$peakID,
1132
-                    isFiltered = peakID %in% filteredPeaks) %>%
1134
+                    isFiltered = .data$peakID %in% filteredPeaks) %>%
1133 1135
       dplyr::mutate_if(is.logical, as.numeric) %>%
1134
-      dplyr::select(tidyselect::all_of(sort(GRN@config$allTF)), isFiltered)
1136
+      dplyr::select(tidyselect::all_of(sort(GRN@config$allTF)), .data$isFiltered)
1135 1137
     
1136 1138
     GRN@data$TFs$TF_peak_overlap = .asSparseMatrix(as.matrix(GRN@data$TFs$TF_peak_overlap), 
1137 1139
                                                    convertNA_to_zero = FALSE, 
... ...
@@ -1186,13 +1188,13 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) {
1186 1188
   query_overlap_df$peak_end    = end(query.gr)  [query_row_ids]
1187 1189
   
1188 1190
   final.df = cbind.data.frame(query_overlap_df, subject_overlap_df) %>%
1189
-    dplyr::select(-score, -annotation) %>%
1190
-    dplyr::mutate(tfbsID = paste0(tfbs_chr, ":", tfbs_start, "-", tfbs_end),
1191
-                  coordCentTfbs = round((tfbs_start + tfbs_end)/2, 0),
1192
-                  coordSummit   = round((peak_start + peak_end)/2, 0),
1193
-                  distance = abs(coordSummit - coordCentTfbs))  %>%
1194
-    dplyr::group_by(peakID) %>%
1195
-    dplyr::slice(which.min(distance)) %>%
1191
+    dplyr::select(-.data$score, -.data$annotation) %>%
1192
+    dplyr::mutate(tfbsID = paste0(.data$tfbs_chr, ":", .data$tfbs_start, "-", .data$tfbs_end),
1193
+                  coordCentTfbs = round((.data$tfbs_start + .data$tfbs_end)/2, 0),
1194
+                  coordSummit   = round((.data$peak_start + .data$peak_end)/2, 0),
1195
+                  distance = abs(.data$coordSummit - .data$coordCentTfbs))  %>%
1196
+    dplyr::group_by(.data$peakID) %>%
1197
+    dplyr::slice(which.min(.data$distance)) %>%
1196 1198
     #arrange(distance, .by_group = TRUE) %>%
1197 1199
     # top_n(n = 2, dplyr::desc(distance)) %>%
1198 1200
     dplyr::ungroup()
... ...
@@ -1236,10 +1238,10 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) {
1236 1238
   # Filter to only the TFs
1237 1239
   # In addition, the no of TF because multiple TFs can map to the same gene/ ENSEMBL ID
1238 1240
   # Also filter 0 count genes because they otherwise cause errors downstream
1239
-  rowSums = rowSums(dplyr::select(matrix1, -ENSEMBL))
1241
+  rowSums = rowSums(dplyr::select(matrix1, -.data$ENSEMBL))
1240 1242
   
1241 1243
   # Keep only Ensembl IDs from TFs we have data from
1242
-  matrix1.norm.TFs.df = dplyr::filter(matrix1, ENSEMBL %in% HOCOMOCO_mapping$ENSEMBL, rowSums != 0)
1244
+  matrix1.norm.TFs.df = dplyr::filter(matrix1, .data$ENSEMBL %in% HOCOMOCO_mapping$ENSEMBL, rowSums != 0)
1243 1245
   
1244 1246
   diff = nrow(matrix1) - nrow(matrix1.norm.TFs.df)
1245 1247
   if (diff > 0) {
... ...
@@ -1252,7 +1254,7 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) {
1252 1254
     .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
1253 1255
   }
1254 1256
   
1255
-  HOCOMOCO_mapping.exp = dplyr::filter(HOCOMOCO_mapping, ENSEMBL %in% matrix1.norm.TFs.df$ENSEMBL)
1257
+  HOCOMOCO_mapping.exp = dplyr::filter(HOCOMOCO_mapping, .data$ENSEMBL %in% matrix1.norm.TFs.df$ENSEMBL)
1256 1258
   futile.logger::flog.info(paste0(whitespacePrefix, "Correlate TF/gene data for ", nrow(matrix1.norm.TFs.df), " unique Ensembl IDs (TFs) and peak counts for ", nrow(matrix_peaks), " peaks."))
1257 1259
   futile.logger::flog.info(paste0(whitespacePrefix, "Note: For subsequent steps, the same gene may be associated with multiple TF, depending on the translation table."))
1258 1260
   # Correlate TF gene counts with peak counts 
... ...
@@ -1263,7 +1265,7 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) {
1263 1265
   # counts for peaks may be 0 throughout, then a warning is thrown
1264 1266
   
1265 1267
   #If the sd is zero, a warning is issued. We suppress it here to not confuse users as this is being dealt with later by ignoring the NA entries
1266
-  cor.m = suppressWarnings(t(cor(t(dplyr::select(matrix1.norm.TFs.df, -ENSEMBL)), t(dplyr::select(matrix_peaks, -peakID)), method = corMethod)))
1268
+  cor.m = suppressWarnings(t(cor(t(dplyr::select(matrix1.norm.TFs.df, -.data$ENSEMBL)), t(dplyr::select(matrix_peaks, -.data$peakID)), method = corMethod)))
1267 1269
   
1268 1270
   colnames(cor.m) = matrix1.norm.TFs.df$ENSEMBL
1269 1271
   rownames(cor.m) = matrix_peaks$peakID
... ...
@@ -1384,7 +1386,7 @@ addData_TFActivity <- function(GRN, normalization = "cyclicLoess", name = "TF_ac
1384 1386
       tibble::rownames_to_column("TF.name") %>%
1385 1387
       tibble::as_tibble() %>%
1386 1388
       dplyr::left_join(GRN@data$TFs$translationTable, by = "TF.name") %>%
1387
-      dplyr::select(ENSEMBL, TF.name, tidyselect::all_of(GRN@config$sharedSamples))
1389
+      dplyr::select(.data$ENSEMBL, .data$TF.name, tidyselect::all_of(GRN@config$sharedSamples))
1388 1390
     
1389 1391
     # Update available connection types
1390 1392
     GRN@config$TF_peak_connectionTypes = unique(c(GRN@config$TF_peak_connectionTypes, name))
... ...
@@ -1542,7 +1544,7 @@ importTFData <- function(GRN, data, name, idColumn = "ENSEMBL", nameColumn = "TF
1542 1544
     # data = dplyr::select(data, -tidyselect::one_of(nameColumn))
1543 1545
     
1544 1546
     # Only TF.names are unique
1545
-    countsNorm = .normalizeNewDataWrapper(data %>% dplyr::select(-ENSEMBL), normalization = normalization, idColumn = "TF.name")
1547
+    countsNorm = .normalizeNewDataWrapper(data %>% dplyr::select(-.data$ENSEMBL), normalization = normalization, idColumn = "TF.name")
1546 1548
     
1547 1549
     # Check overlap of ENSEMBL IDs
1548 1550
     countsNorm$ENSEMBL = data$ENSEMBL
... ...
@@ -1578,7 +1580,7 @@ importTFData <- function(GRN, data, name, idColumn = "ENSEMBL", nameColumn = "TF
1578 1580
     # Therefore, use TF names as row names, same as with the TF Activity matrix
1579 1581
     
1580 1582
     GRN@data$TFs[[name]] = countsNorm.subset %>%
1581
-      dplyr::select(ENSEMBL, TF.name, tidyselect::all_of(GRN@config$sharedSamples)) %>%
1583
+      dplyr::select(.data$ENSEMBL, .data$TF.name, tidyselect::all_of(GRN@config$sharedSamples)) %>%
1582 1584
       tibble::as_tibble()
1583 1585
     
1584 1586
     # Update available connection types
... ...
@@ -1636,7 +1638,7 @@ AR_classification_wrapper<- function (GRN, significanceThreshold_Wilcoxon = 0.05
1636 1638
   outputFolder = .checkOutputFolder(GRN, outputFolder)
1637 1639
   
1638 1640
   GRN@data$TFs$classification$TF.translation.orig = GRN@data$TFs$translationTable %>%
1639
-    dplyr::mutate(TF.name = HOCOID)
1641
+    dplyr::mutate(TF.name = .data$HOCOID)
1640 1642
   
1641 1643
   if (is.null(GRN@data$TFs$TF_peak_overlap)) {
1642 1644
     message = paste0("Could not find peak - TF matrix. Run the function overlapPeaksAndTFBS first / again")
... ...
@@ -1683,7 +1685,7 @@ AR_classification_wrapper<- function (GRN, significanceThreshold_Wilcoxon = 0.05
1683 1685
           
1684 1686
           # TF activity data
1685 1687
           counts1 = GRN@data$TFs[[connectionTypeCur]] %>% 
1686
-            dplyr::select(-TF.name)
1688
+            dplyr::select(-.data$TF.name)
1687 1689
           
1688 1690
         } 
1689 1691
         
... ...
@@ -1716,7 +1718,7 @@ AR_classification_wrapper<- function (GRN, significanceThreshold_Wilcoxon = 0.05
1716 1718
         
1717 1719
         GRN@data$TFs$classification[[permIndex]] [[connectionTypeCur]]$TF.classification = 
1718 1720
           .finalizeClassificationAndAppend(
1719
-            output.global.TFs = GRN@data$TFs$classification$TF.translation.orig %>% dplyr::mutate(TF = TF.name), 
1721
+            output.global.TFs = GRN@data$TFs$classification$TF.translation.orig %>% dplyr::mutate(TF = .data$TF.name), 
1720 1722
             median.cor.tfs = GRN@data$TFs$classification[[permIndex]] [[connectionTypeCur]]$TF_cor_median_foreground, 
1721 1723
             act.rep.thres.l = GRN@data$TFs$classification[[permIndex]] [[connectionTypeCur]]$act.rep.thres.l, 
1722 1724
             par.l = GRN@config$parameters, 
... ...
@@ -1764,7 +1766,7 @@ AR_classification_wrapper<- function (GRN, significanceThreshold_Wilcoxon = 0.05
1764 1766
           TF_peak_cor = GRN@data$TFs$classification[[permIndex]] [[connectionTypeCur]]$TF_peak_cor
1765 1767
           peak_TF_overlapCur.df = .filterSortAndShuffle_peakTF_overlapTable(GRN, permutationCur, TF_peak_cor)
1766 1768
           .plot_heatmapAR(TF.peakMatrix.df = peak_TF_overlapCur.df, 
1767
-                          HOCOMOCO_mapping.df.exp = GRN@data$TFs$translationTable %>% dplyr::mutate(TF = TF.name), 
1769
+                          HOCOMOCO_mapping.df.exp = GRN@data$TFs$translationTable %>% dplyr::mutate(TF = .data$TF.name), 
1768 1770
                           sort.cor.m = TF_peak_cor, 
1769 1771
                           par.l = GRN@config$parameters, 
1770 1772
                           corMethod = corMethod,
... ...
@@ -1968,7 +1970,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
1968 1970
       
1969 1971
       # Keep only Ensembl ID here
1970 1972
       counts_connectionTypeCur = GRN@data$TFs[[connectionTypeCur]] %>% 
1971
-        dplyr::select(-TF.name)
1973
+        dplyr::select(-.data$TF.name)
1972 1974
       
1973 1975
     } 
1974 1976
     
... ...
@@ -2040,18 +2042,18 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
2040 2042
         
2041 2043
         # Get GC info from those peaks from the foreground
2042 2044
         GC_classes_foreground.df = peaksForeground %>%
2043
-          dplyr::group_by(GC_class) %>%
2044
-          dplyr::summarise(n= dplyr::n(), peak_width_mean = mean(peak_width), peak_width_sd = sd(peak_width)) %>%
2045
+          dplyr::group_by(.data$GC_class) %>%
2046
+          dplyr::summarise(n= dplyr::n(), peak_width_mean = mean(.data$peak_width), peak_width_sd = sd(.data$peak_width)) %>%
2045 2047
           dplyr::ungroup() %>% 
2046
-          tidyr::complete(GC_class, fill = list(n = 0)) %>%
2048
+          tidyr::complete(.data$GC_class, fill = list(n = 0)) %>%
2047 2049
           dplyr::mutate(n_rel = .data$n / nPeaksForeground, type = "foreground") %>%
2048
-          dplyr::arrange(dplyr::desc(n_rel))
2050
+          dplyr::arrange(dplyr::desc(.data$n_rel))
2049 2051
         
2050 2052
         GC_classes_background.df = peaksBackground %>%
2051
-          dplyr::group_by(GC_class) %>%
2052
-          dplyr::summarise(n= dplyr::n(), peak_width_mean = mean(peak_width), peak_width_sd = sd(peak_width)) %>%
2053
+          dplyr::group_by(.data$GC_class) %>%
2054
+          dplyr::summarise(n= dplyr::n(), peak_width_mean = mean(.data$peak_width), peak_width_sd = sd(.data$peak_width)) %>%
2053 2055
           dplyr::ungroup() %>% 
2054
-          tidyr::complete(GC_class, fill = list(n = 0)) %>%
2056
+          tidyr::complete(.data$GC_class, fill = list(n = 0)) %>%
2055 2057
           dplyr::mutate(n_rel = .data$n / nPeaksBackground, type = "background_orig")
2056 2058
         
2057 2059
         
... ...
@@ -2062,9 +2064,6 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
2062 2064
           minPerc = percBackground_size
2063 2065
         } else {
2064 2066
           
2065
-          # Find a matching background of maximal size
2066
-          minPerc= min(GC_class.all$maxSizeBackground) / nPeaksBackground
2067
-          
2068 2067
           threshold_percentage = 0.05
2069 2068
           minPerc = .findMaxBackgroundSize(GC_classes_foreground.df, GC_classes_background.df, peaksBackground, threshold_percentage =  threshold_percentage)
2070 2069
           
... ...
@@ -2099,9 +2098,9 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
2099 2098
         # Add sel. minPerc to table and calculate required frequencies
2100 2099
         
2101 2100
         GC_classes_all.df = dplyr::full_join(GC_classes_foreground.df, GC_classes_background.df, suffix = c(".fg",".bg"), by = "GC_class") %>%
2102
-          dplyr::mutate(maxSizeBackground = n.bg / n_rel.fg,
2103
-                        n.bg.needed = floor(n_rel.fg * targetNoPeaks), 
2104
-                        n.bg.needed.perc = n.bg / n.bg.needed) 
2101
+          dplyr::mutate(maxSizeBackground = .data$n.bg / .data$n_rel.fg,
2102
+                        n.bg.needed = floor(.data$n_rel.fg * targetNoPeaks), 
2103
+                        n.bg.needed.perc = .data$n.bg / .data$n.bg.needed) 
2105 2104
         
2106 2105
         
2107 2106
         #futile.logger::flog.info(paste0( " GC-adjustment: Randomly select a total of ", round(targetNoPeaks,0), 
... ...
@@ -2127,9 +2126,9 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
2127 2126
           }
2128 2127
           
2129 2128
           if (GC_classes_all.df$n.bg.needed[i] > nrow(peaksBackgroundGCCur)) {
2130
-            peakIDsSel = c(peakIDsSel, peaksBackgroundGCCur %>% dplyr::sample_n(nPeaksCur, replace = percBackground_resample) %>% dplyr::pull(peakID))  
2129
+            peakIDsSel = c(peakIDsSel, peaksBackgroundGCCur %>% dplyr::sample_n(nPeaksCur, replace = percBackground_resample) %>% dplyr::pull(.data$peakID))  
2131 2130
           } else {
2132
-            peakIDsSel = c(peakIDsSel, peaksBackgroundGCCur %>% dplyr::sample_n(nPeaksCur, replace = FALSE) %>% dplyr::pull(peakID))
2131
+            peakIDsSel = c(peakIDsSel, peaksBackgroundGCCur %>% dplyr::sample_n(nPeaksCur, replace = FALSE) %>% dplyr::pull(.data$peakID))
2133 2132
           }
2134 2133
           # Take a sample from the background, and record the row numbers
2135 2134
           
... ...
@@ -2246,7 +2245,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
2246 2245
           # 
2247 2246
           TF_peak.fdr_direction     = directionCur,
2248 2247
           TF_peak.r_bin      = 
2249
-            as.character(cut(TF_peak.r_bin2, breaks = stepsCur, right = rightOpen, include.lowest = TRUE))
2248
+            as.character(cut(.data$TF_peak.r_bin2, breaks = stepsCur, right = rightOpen, include.lowest = TRUE))
2250 2249
         ) 
2251 2250
         
2252 2251
         # Derive connection summaries for all TF for both directions
... ...
@@ -2262,7 +2261,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
2262 2261
         }
2263 2262
         
2264 2263
         connectionStats_all.l[[indexStr]] =  cor.peak.tf %>%
2265
-          dplyr::group_by(TF_peak.r_bin) %>%
2264
+          dplyr::group_by(.data$TF_peak.r_bin) %>%
2266 2265
           dplyr::summarise(n = dplyr::n()) %>%
2267 2266
           dplyr::ungroup() %>%
2268 2267
           dplyr::right_join(fdr.curve, by = "TF_peak.r_bin") %>%
... ...
@@ -2270,7 +2269,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
2270 2269
                         TF.name = as.factor(TFCur), 
2271 2270
                         TF_peak.connectionType = factor(connectionTypeCur, levels = connectionTypes),
2272 2271
                         TF_peak.fdr_direction  = factor(directionCur, levels = c("pos", "neg")),
2273
-                        TF_peak.r_bin = factor(TF_peak.r_bin, levels = levelsBins),
2272
+                        TF_peak.r_bin = factor(.data$TF_peak.r_bin, levels = levelsBins),
2274 2273
                         
2275 2274
                         # Collect extra information, currently however a bit repetitively stored
2276 2275
                         nForeground              = nPeaksForeground,
... ...
@@ -2278,15 +2277,15 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
2278 2277
                         nBackground_orig         = nPeaksBackground,
2279 2278
                         percBackgroundUsed       = minPerc,
2280 2279
                         background_match_success = background_match_success) %>%
2281
-          dplyr::select(TF.name, TF_peak.r_bin, 
2282
-                        .data$n, tpvalue, fpvalue, fpvalue_norm, 
2283
-                        TF_peak.fdr, 
2284
-                        TF_peak.fdr_orig, TF_peak.fdr_direction, 
2285
-                        TF_peak.connectionType,
2280
+          dplyr::select(.data$TF.name, .data$TF_peak.r_bin, 
2281
+                        .data$n, .data$tpvalue, .data$fpvalue, .data$fpvalue_norm, 
2282
+                        .data$TF_peak.fdr, 
2283
+                        .data$TF_peak.fdr_orig, .data$TF_peak.fdr_direction, 
2284
+                        .data$TF_peak.connectionType,
2286 2285
                         tidyselect::contains("ground")) %>%
2287
-          dplyr::rename(n_tp = tpvalue, n_fp = fpvalue, n_fp_norm = fpvalue_norm) %>%
2286
+          dplyr::rename(n_tp = .data$tpvalue, n_fp = .data$fpvalue, n_fp_norm = .data$fpvalue_norm) %>%
2288 2287
           dplyr::distinct() %>%
2289
-          dplyr::arrange(TF_peak.r_bin)
2288
+          dplyr::arrange(.data$TF_peak.r_bin)
2290 2289
         
2291 2290
         
2292 2291
         # Collect data for additional QC plots before they are filtered
... ...
@@ -2498,7 +2497,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2498 2497
 
2499 2498
 }
2500 2499
 
2501
-.calculatePeakGeneOverlaps <- function(GRN, allPeaks, peak_TAD_mapping = NULL, par.l, neighborhoodSize = 250000, genomeAssembly, 
2500
+.calculatePeakGeneOverlaps <- function(GRN, allPeaks, peak_TAD_mapping = NULL, neighborhoodSize = 250000, genomeAssembly, 
2502 2501
                                        gene.types, overlapTypeGene = "TSS", removeEnsemblNA = TRUE) {
2503 2502
   
2504 2503
   start = Sys.time()
... ...
@@ -2514,10 +2513,10 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2514 2513
     
2515 2514
     futile.logger::flog.info(paste0(" For peaks overlapping multiple TADs, use the union of all to define the neighborhood"))
2516 2515
     peak_TAD_mapping = peak_TAD_mapping %>%
2517
-      dplyr::group_by(peakID) %>%
2518
-      dplyr::mutate(tadStart2 = min(tadStart), # If a peak overlaps multiple TADs, merge them
2519
-                    tadEnd2   = max(tadEnd),  # If a peak overlaps multiple TADs, merge them
2520
-                    tad.ID_all = paste0(tad.ID, collapse = "|")) %>%
2516
+      dplyr::group_by(.data$peakID) %>%
2517
+      dplyr::mutate(tadStart2 = min(.data$tadStart), # If a peak overlaps multiple TADs, merge them
2518
+                    tadEnd2   = max(.data$tadEnd),  # If a peak overlaps multiple TADs, merge them
2519
+                    tad.ID_all = paste0(.data$tad.ID, collapse = "|")) %>%
2521 2520
       dplyr::slice(1) %>%
2522 2521
       dplyr::ungroup()
2523 2522
     
... ...
@@ -2722,7 +2721,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2722 2721
     nPeaksWithOutTAD = length(which(is.na(peak.TADs.df$tad.ID)))
2723 2722
     futile.logger::flog.info(paste0(" Out of the ", nPeaks, " peaks, ", nPeaksWithOutTAD, " peaks are not within a TAD domain. These will be ignored for subsequent overlaps"))   
2724 2723
     
2725
-    nPeaksWithMultipleTADs = peak.TADs.df %>% dplyr::group_by(peakID) %>% dplyr::summarize(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>% nrow()
2724
+    nPeaksWithMultipleTADs = peak.TADs.df %>% dplyr::group_by(.data$peakID) %>% dplyr::summarize(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>% nrow()
2726 2725
     
2727 2726
     if (nPeaksWithMultipleTADs > 0) {
2728 2727
       futile.logger::flog.info(paste0(" Out of the ", nPeaks, " peaks, ", nPeaksWithMultipleTADs, " overlap with more than one TAD. This usually means they are crossing TAD borders.")) 
... ...
@@ -2736,12 +2735,14 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2736 2735
   # if a peak overlaps with a gene, should the same gene be reported as the connection?
2737 2736
   
2738 2737
   # OVERLAP OF PEAKS AND EXTENDED GENES
2739
-  overlaps.sub.df = .calculatePeakGeneOverlaps(GRN, allPeaks = consensusPeaks, peak.TADs.df, neighborhoodSize = neighborhoodSize, 
2740
-                                               genomeAssembly = genomeAssembly, gene.types = gene.types, overlapTypeGene = overlapTypeGene) 
2738
+  overlaps.sub.df = .calculatePeakGeneOverlaps(GRN, allPeaks = consensusPeaks, peak.TADs.df, 
2739
+                                               neighborhoodSize = neighborhoodSize, 
2740
+                                               genomeAssembly = genomeAssembly, 
2741
+                                               gene.types = gene.types, overlapTypeGene = overlapTypeGene) 
2741 2742
   
2742 2743
   
2743 2744
   overlaps.sub.filt.df = overlaps.sub.df %>%
2744
-    dplyr::mutate(gene.ENSEMBL = gsub("\\..+", "", gene.ENSEMBL, perl = TRUE)) # Clean ENSEMBL IDs
2745
+    dplyr::mutate(gene.ENSEMBL = gsub("\\..+", "", .data$gene.ENSEMBL, perl = TRUE)) # Clean ENSEMBL IDs
2745 2746
   
2746 2747
   # Only now we shuffle to make sure the background of possible connections is the same as in the foreground, as opposed to completely random
2747 2748
   # which would also include peak-gene connections that are not in the foreground at all
... ...
@@ -2762,8 +2763,8 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2762 2763
   
2763 2764
   permIndex = as.character(perm)
2764 2765
   
2765
-  countsPeaks.clean = dplyr::select(getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE), -peakID)
2766
-  countsRNA.clean  = dplyr::select(getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(perm)), -ENSEMBL)
2766
+  countsPeaks.clean = dplyr::select(getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE), -.data$peakID)
2767
+  countsRNA.clean  = dplyr::select(getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(perm)), -.data$ENSEMBL)
2767 2768
   
2768 2769
   # Cleverly construct the count matrices so we do the correlations in one go
2769 2770
   map_peaks = match(overlaps.sub.filt.df$peak.ID,  getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE)$peakID)
... ...
@@ -2812,29 +2813,29 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2812 2813
   res.df = suppressMessages(tibble::as_tibble(res.m) %>%
2813 2814
                               dplyr::mutate(peak.ID = getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE)$peakID[map_peaks],
2814 2815
                                             gene.ENSEMBL = getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(perm))$ENSEMBL[map_rna]) %>%
2815
-                              dplyr::filter(!is.na(gene.ENSEMBL)) %>%  # For some peak-gene combinations, no RNA-Seq data was available, these NAs are filtered
2816
+                              dplyr::filter(!is.na(.data$gene.ENSEMBL)) %>%  # For some peak-gene combinations, no RNA-Seq data was available, these NAs are filtered
2816 2817
                               # Add gene annotation and distance
2817 2818
                               dplyr::left_join(overlaps.sub.filt.df, by = c("gene.ENSEMBL", "peak.ID")) %>%
2818 2819
                               # Integrate TAD IDs also
2819
-                              dplyr::left_join(dplyr::select(peak.TADs.df, peak.ID, tad.ID), by = c("peak.ID")) %>%
2820
+                              dplyr::left_join(dplyr::select(peak.TADs.df, .data$peak.ID, .data$tad.ID), by = c("peak.ID")) %>%
2820 2821
                               
2821 2822
                               dplyr::select(tidyselect::all_of(selectColumns))) %>%
2822
-    dplyr::mutate(peak.ID = as.factor(peak.ID),
2823
-                  gene.ENSEMBL = as.factor(gene.ENSEMBL), 
2824
-                  tad.ID = as.factor(tad.ID)) %>%
2825
-    dplyr::rename(peak_gene.r = r, 
2826
-                  peak_gene.p_raw = p.raw)
2823
+    dplyr::mutate(peak.ID = as.factor(.data$peak.ID),
2824
+                  gene.ENSEMBL = as.factor(.data$gene.ENSEMBL), 
2825
+                  tad.ID = as.factor(.data$tad.ID)) %>%
2826
+    dplyr::rename(peak_gene.r = .data$r, 
2827
+                  peak_gene.p_raw = .data$p.raw)
2827 2828
   
2828 2829
   if (addRobustRegression) {
2829 2830
     res.df = dplyr::rename(res.df, 
2830
-                           peak_gene.p_raw.robust = p_raw.robust, 
2831
-                           peak_gene.r_robust = r_robust,
2832
-                           peak_gene.bias_M_p.raw = bias_M_p.raw,
2833
-                           peak_gene.bias_LS_p.raw = bias_LS_p.raw)
2831
+                           peak_gene.p_raw.robust = .data$p_raw.robust, 
2832
+                           peak_gene.r_robust = .data$r_robust,
2833
+                           peak_gene.bias_M_p.raw = .data$bias_M_p.raw,
2834
+                           peak_gene.bias_LS_p.raw = .data$bias_LS_p.raw)
2834 2835
   }
2835 2836
   
2836 2837
   if (is.null(TADs)) {
2837
-    res.df = dplyr::select(res.df, -tad.ID)
2838
+    res.df = dplyr::select(res.df, -.data$tad.ID)
2838 2839
   }
2839 2840
   
2840 2841
   futile.logger::flog.info(paste0(" Finished. Final number of rows: ", nrow(res.df)))
... ...
@@ -2905,17 +2906,17 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2905 2906
       dataCur.df = tibble::tibble(data_Peaks =unlist(counts1 [map1[i],]), 
2906 2907
                                   data_RNA = unlist(counts2 [map2[i],]) )
2907 2908
       
2908
-      # TODO: perm not known here
2909
+      # TODO: perm and GRN not known here
2909 2910
       stop("Not implemeneted yet")
2910
-      g = ggplot(dataCur.df, aes(data1, data2)) + geom_point() + geom_smooth(method ="lm") + 
2911
-        ggtitle(paste0(getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE)$peakID[map1[i]], 
2912
-                       ", ", 
2913
-                       getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(perm))$ENSEMBL[map2 [i]], 
2914
-                       ", p = ", round(res$p.value, 3))) + 
2915
-        theme_bw()
2916
-      plot(g)
2917
-      
2918
-      nPlotted = nPlotted + 1
2911
+      # g = ggplot2::ggplot(dataCur.df, ggplot2::aes(data1, data2)) + ggplot2::geom_point() + ggplot2::geom_smooth(method ="lm") + 
2912
+      #   ggplot2::ggtitle(paste0(getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE)$peakID[map1[i]], 
2913
+      #                  ", ", 
2914
+      #                  getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(perm))$ENSEMBL[map2 [i]], 
2915
+      #                  ", p = ", round(res$p.value, 3))) + 
2916
+      #   ggplot2::theme_bw()
2917
+      # plot(g)
2918
+      # 
2919
+      # nPlotted = nPlotted + 1
2919 2920
     }
2920 2921
     
2921 2922
     if (debugMode_nPlots > 0 & nPlotted >=  debugMode_nPlots){
... ...
@@ -3064,8 +3065,8 @@ filterGRNAndConnectGenes <- function(GRN,
3064 3065
     grn.filt = GRN@connections$TF_peaks[[permIndex]]$main  %>% 
3065 3066
       tibble::as_tibble() %>%
3066 3067
       dplyr::left_join(GRN@data$TFs$translationTable, by = c("TF.name")) %>%
3067
-      dplyr::select(-HOCOID, -ENSEMBL) %>%
3068
-      dplyr::mutate(TF.ENSEMBL = as.factor(TF.ENSEMBL))
3068
+      dplyr::select(-.data$HOCOID, -.data$ENSEMBL) %>%
3069
+      dplyr::mutate(TF.ENSEMBL = as.factor(.data$TF.ENSEMBL))
3069 3070
     
3070 3071
     if (is.null(grn.filt)) {
3071 3072
       message = "No TF-peak connections found. Run the function addConnections_TF_peak first"
... ...
@@ -3189,7 +3190,7 @@ filterGRNAndConnectGenes <- function(GRN,
3189 3190
       checkmate::assertIntegerish(peak_gene.maxDistance, lower = 0)
3190 3191
       futile.logger::flog.info(paste0(" Filter peak-gene connections for their distance and keep only connections with a maximum distance of  ", peak_gene.maxDistance))
3191 3192
       futile.logger::flog.info(paste0("  Number of peak-gene rows before filtering connection types: ", nrow(peakGeneCorrelations)))
3192
-      peakGeneCorrelations = dplyr::filter(peakGeneCorrelations, peak_gene.distance < peak_gene.maxDistance)
3193
+      peakGeneCorrelations = dplyr::filter(peakGeneCorrelations, .data$peak_gene.distance < peak_gene.maxDistance)
3193 3194
       futile.logger::flog.info(paste0("  Number of peak-gene rows after filtering connection types: ", nrow(peakGeneCorrelations)))
3194 3195
     }
3195 3196
     
... ...
@@ -3365,9 +3366,9 @@ filterGRNAndConnectGenes <- function(GRN,
3365 3366
                     dplyr::starts_with("peak_gene."),
3366 3367
                     dplyr::starts_with("gene."),
3367 3368
                     -dplyr::starts_with("peak.gene.")) %>% # these are the annotation columns by ChipSeeker that are confusing
3368
-      dplyr::mutate(peak.ID      = as.factor(peak.ID),
3369
-                    gene.ENSEMBL = as.factor(gene.ENSEMBL),
3370
-                    TF.name      = as.factor(TF.name))
3369
+      dplyr::mutate(peak.ID      = as.factor(.data$peak.ID),
3370
+                    gene.ENSEMBL = as.factor(.data$gene.ENSEMBL),
3371
+                    TF.name      = as.factor(.data$TF.name))
3371 3372
     
3372 3373
     
3373 3374
     GRN@connections$all.filtered[[permIndex]] = grn.filt
... ...
@@ -3482,27 +3483,27 @@ filterGRNAndConnectGenes <- function(GRN,
3482 3483
     # Plot No. 1
3483 3484
     res.l$ihwPlots$estimatedWeights  = 
3484 3485
       IHW::plot(ihw_res, what = "weights") + 
3485
-      ggtitle("Estimated weights")
3486
+      ggplot2::ggtitle("Estimated weights")
3486 3487
     
3487 3488
     # Plot No. 2
3488 3489
     res.l$ihwPlots$decisionBoundary  = 
3489 3490
       IHW::plot(ihw_res, what = "decisionboundary") + 
3490
-      ggtitle("Decision boundary")
3491
+      ggplot2::ggtitle("Decision boundary")
3491 3492
     
3492 3493
     # Plot No. 3
3493 3494
     res.l$ihwPlots$rawVsAdjPVal_all <- 
3494 3495
       as.data.frame(ihw_res) %>%
3495
-      ggplot(aes(x = .data$pvalue, y = adj_pvalue, col = group)) + 
3496
-      geom_point(size = 0.25) + scale_colour_hue(l = 70, c = 150, drop = FALSE) + 
3497
-      ggtitle("Raw versus adjusted p-values")
3496
+      ggplot2::ggplot(ggplot2::aes(x = .data$pvalue, y = .data$adj_pvalue, col = .data$group)) + 
3497
+      ggplot2::geom_point(size = 0.25) + ggplot2::scale_colour_hue(l = 70, c = 150, drop = FALSE) + 
3498
+      ggplot2::ggtitle("Raw versus adjusted p-values")
3498 3499
     
3499 3500
     # TODO why hard-coded
3500 3501
     pValThreshold = 0.2
3501 3502
     
3502 3503
     # Plot No. 4
3503 3504
     res.l$ihwPlots$rawVsAdjPVal_subset = 
3504
-      res.l$ihwPlots$rawVsAdjPVal_all %+% subset(IHW::as.data.frame(ihw_res), adj_pvalue <= pValThreshold) + 
3505
-      ggtitle("raw versus adjusted p-values (zoom)")
3505
+      res.l$ihwPlots$rawVsAdjPVal_all %+% subset(IHW::as.data.frame(ihw_res), .data$adj_pvalue <= pValThreshold) + 
3506
+      ggplot2::ggtitle("raw versus adjusted p-values (zoom)")
3506 3507
     
3507 3508
     ##                       ##
3508 3509
     ## 2. p-value histograms ##
... ...
@@ -3515,8 +3516,8 @@ filterGRNAndConnectGenes <- function(GRN,
3515 3516
     # Plot No. 5
3516 3517
     
3517 3518
     res.l$ihwPlots$pValHistogram = 
3518
-      ggplot(data.df, aes(x = pValues)) + geom_histogram(binwidth = 0.025, boundary = 0) + 
3519
-      ggtitle("p-Value histogram independent of the covariate")
3519
+      ggplot2::ggplot(data.df, ggplot2::aes(x = .data$pValues)) + ggplot2::geom_histogram(binwidth = 0.025, boundary = 0) + 
3520
+      ggplot2::ggtitle("p-Value histogram independent of the covariate")
3520 3521
     
3521 3522
     # Stratified p-value histograms by covariate
3522 3523
     # Plot No. 6
... ...
@@ -3525,14 +3526,14 @@ filterGRNAndConnectGenes <- function(GRN,
3525 3526
     
3526 3527
     data.df$covariate_group <- IHW::groups_by_filter(data.df$covariate, nGroups)
3527 3528
     res.l$ihwPlots$pValHistogramGroupedByCovariate = 
3528
-      ggplot(data.df, aes(x=pValues)) + geom_histogram(binwidth = 0.025, boundary = 0) + 
3529
-      facet_wrap( ~covariate_group, nrow = 2) + 
3530
-      ggtitle("p-Value histogram grouped by the covariate")
3529
+      ggplot2::ggplot(data.df, ggplot2::aes(x=.data$pValues)) + ggplot2::geom_histogram(binwidth = 0.025, boundary = 0) + 
3530
+        ggplot2::facet_wrap( ~covariate_group, nrow = 2) + 
3531
+      ggplot2::ggtitle("p-Value histogram grouped by the covariate")
3531 3532
     
3532 3533
     # Plot No. 7
3533 3534
     res.l$ihwPlots$pValHistogramGroupedByCovariateECDF = 
3534
-      ggplot(data.df, aes(x = pValues, col = covariate_group)) + stat_ecdf(geom = "step")  + 
3535
-      ggtitle("p-Value histogram grouped by the covariate (ECDF)")
3535
+      ggplot2::ggplot(data.df, ggplot2::aes(x = .data$pValues, col = .data$covariate_group)) + ggplot2::stat_ecdf(geom = "step")  + 
3536
+      ggplot2::ggtitle("p-Value histogram grouped by the covariate (ECDF)")
3536 3537
     
3537 3538
     
3538 3539
     # Check whether the covariate is informative about power under the alternative (property 1), 
... ...
@@ -3542,8 +3543,8 @@ filterGRNAndConnectGenes <- function(GRN,
3542 3543
     
3543 3544
     # Plot No. 8
3544 3545
     res.l$ihwPlots$pValAgainstRankCovariate = 
3545
-      ggplot(data.df, aes(x= covariateRank, y = -log10(pValues))) + geom_hex(bins = 100) + 
3546
-      ggtitle("p-Value against rank of the covariate")
3546
+      ggplot2::ggplot(data.df, ggplot2::aes(x= .data$covariateRank, y = -log10(.data$pValues))) + ggplot2::geom_hex(bins = 100) + 
3547
+      ggplot2::ggtitle("p-Value against rank of the covariate")
3547 3548
     
3548 3549
     
3549 3550
     if (!is.null(pdfFile)) {
... ...
@@ -3580,9 +3581,9 @@ add_TF_gene_correlation <- function(GRN, corMethod = "pearson", addRobustRegress
3580 3581
   checkmate::assertClass(GRN, "GRN")  
3581 3582
   GRN = .addFunctionLogToObject(GRN)    
3582 3583
   checkmate::assertChoice(corMethod, c("pearson", "spearman"))
3583
-  assertFlag(addRobustRegression)
3584
+  checkmate::assertFlag(addRobustRegression)
3584 3585
   checkmate::assertIntegerish(nCores, lower = 1)
3585
-  assertFlag(forceRerun)
3586
+  checkmate::assertFlag(forceRerun)
3586 3587
   
3587 3588
   if (addRobustRegression) {
3588 3589
       packageMessage = paste0("The package robust is not installed, but needed here due to addRobustRegression = TRUE. Please install it and re-run this function or change addRobustRegression to FALSE.")
... ...
@@ -3607,8 +3608,8 @@ add_TF_gene_correlation <- function(GRN, corMethod = "pearson", addRobustRegress
3607 3608
       
3608 3609
       permIndex = as.character(permutationCur)
3609 3610
       TF_genePairs = GRN@connections$all.filtered[[permIndex]] %>%
3610
-        dplyr::filter(!is.na(gene.ENSEMBL)) %>%
3611
-        dplyr::select(TF.name, gene.ENSEMBL) %>%
3611
+        dplyr::filter(!is.na(.data$gene.ENSEMBL)) %>%
3612
+        dplyr::select(.data$TF.name, .data$gene.ENSEMBL) %>%
3612 3613
         dplyr::distinct() %>%
3613 3614
         dplyr::left_join(GRN@data$TFs$translationTable, by = c("TF.name"), suffix = c("", ".transl")) # %>%
3614 3615
       # dplyr::distinct(ENSEMBL, ENSEMBL.transl)
... ...
@@ -3619,7 +3620,7 @@ add_TF_gene_correlation <- function(GRN, corMethod = "pearson", addRobustRegress
3619 3620
         futile.logger::flog.info(paste0("  Iterate through ", maxRow, " TF-gene combinations and (if possible) calculate correlations using ", nCores, " cores. This may take a few minutes."))
3620 3621
         
3621 3622
         
3622
-        countsRNA.clean  = dplyr::select(getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(permutationCur)), -ENSEMBL)
3623
+        countsRNA.clean  = dplyr::select(getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(permutationCur)), -.data$ENSEMBL)
3623 3624
         
3624 3625
         map_TF =   match(TF_genePairs$TF.ENSEMBL, getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(permutationCur))$ENSEMBL)
3625 3626
         map_gene = match(TF_genePairs$gene.ENSEMBL, getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(permutationCur))$ENSEMBL)
... ...
@@ -3664,7 +3665,7 @@ add_TF_gene_correlation <- function(GRN, corMethod = "pearson", addRobustRegress
3664 3665
                         TF.name           = as.factor(.data$TF.name)) %>%
3665 3666
           dplyr::rename(TF_gene.r     = .data$r, 
3666 3667
                         TF_gene.p_raw = .data$p.raw) %>%
3667
-          dplyr::select(TF.name, TF.ENSEMBL, gene.ENSEMBL, tidyselect::everything())
3668
+          dplyr::select(.data$TF.name, .data$TF.ENSEMBL, .data$gene.ENSEMBL, tidyselect::everything())
3668 3669
         
3669 3670
         
3670 3671
         if (addRobustRegression) {
... ...
@@ -3770,7 +3771,7 @@ addSNPOverlap <- function(grn, SNPData, col_chr = "chr", col_pos = "pos", col_pe
3770 3771
   query_overlap_df     = as.data.frame(S4Vectors::elementMetadata(peaks.gr)[query_row_ids, col_peakID])
3771 3772
   subject_overlap_df   = as.data.frame( SNPs.gr)[subject_row_ids, c("seqnames", "start")]
3772 3773
   
3773
-  overlaps.df = cbind.data.frame(query_overlap_df,subject_overlap_df) %>% dplyr::mutate(seqnames = as.character(seqnames))
3774
+  overlaps.df = cbind.data.frame(query_overlap_df,subject_overlap_df) %>% dplyr::mutate(seqnames = as.character(.data$seqnames))
3774 3775
   colnames(overlaps.df) = c("peakID", "SNP_chr", "SNP_start")
3775 3776
   
3776 3777
   if (addAllColumns) {
... ...
@@ -3785,13 +3786,13 @@ addSNPOverlap <- function(grn, SNPData, col_chr = "chr", col_pos = "pos", col_pe
3785 3786
   
3786 3787
   colnamesNew = setdiff(colnames(overlaps.df), col_peakID)
3787 3788
   overlaps.summarized.df =  overlaps.df %>%
3788
-    dplyr::group_by(peak) %>%
3789
+    dplyr::group_by(.data$peak) %>%
3789 3790
     dplyr::summarise_at(colnamesNew, myfun) %>%
3790 3791
     dplyr::ungroup()
3791 3792
   
3792 3793
   # Add no of SNPs also as column
3793 3794
   overlaps.summarized.df = overlaps.summarized.df %>%
3794
-    dplyr::mutate(SNP_nOverlap = stringr::str_count(SNP_chr, stringr::fixed("|")) + 1)
3795
+    dplyr::mutate(SNP_nOverlap = stringr::str_count(.data$SNP_chr, stringr::fixed("|")) + 1)
3795 3796
   
3796 3797
   grn.SNPs = dplyr::left_join(grn, overlaps.summarized.df, by = col_peakID)
3797 3798
   
... ...
@@ -3827,7 +3828,7 @@ addSNPOverlap <- function(grn, SNPData, col_chr = "chr", col_pos = "pos", col_pe
3827 3828
 #' @param TF_peak.fdr Numeric vector[0,1]. Default \code{c(0.001, 0.01, 0.05, 0.1, 0.2)}. TF-peak FDR values to iterate over.
3828 3829
 #' @template TF_peak.connectionTypes
3829 3830
 #' @param peak_gene.fdr Numeric vector[0,1]. Default \code{c(0.001, 0.01, 0.05, 0.1, 0.2)}. Peak-gene FDR values to iterate over.
3830
-#' @param peak_gene.p_raw  Numeric vector[0,1]. Default \code{NULL}. Peak-gene raw p-value values to iterate over. Skipped if set to NULL.
3831
+# #' @param peak_gene.p_raw  Numeric vector[0,1]. Default \code{NULL}. Peak-gene raw p-value values to iterate over. Skipped if set to NULL.
3831 3832
 #' @param peak_gene.r_range Numeric vector of length 2[-1,1]. Default \code{c(0,1)}. The correlation range of peak-gene connections to keep.
3832 3833
 #' @template gene.types
3833 3834
 #' @param allowMissingGenes Logical vector of length 1 or 2. Default \code{c(FALSE, TRUE)}. Allow genes to be missing for peak-gene connections? If both \code{FALSE} and \code{TRUE} are given, the code loops over both
... ...
@@ -3844,7 +3845,7 @@ generateStatsSummary <- function(GRN,
3844 3845
                                  TF_peak.fdr = c(0.001, 0.01, 0.05, 0.1, 0.2),
3845 3846
                                  TF_peak.connectionTypes = "all",
3846 3847
                                  peak_gene.fdr = c(0.001, 0.01, 0.05, 0.1, 0.2),
3847
-                                 peak_gene.p_raw = NULL,
3848
+#                                 peak_gene.p_raw = NULL,
3848 3849
                                  peak_gene.r_range = c(0,1),
3849 3850
                                  gene.types = c("protein_coding"),
3850 3851
                                  allowMissingGenes = c(FALSE, TRUE),
... ...
@@ -3860,12 +3861,11 @@ generateStatsSummary <- function(GRN,
3860 3861
   checkmate::assertNumeric(TF_peak.fdr, lower = 0, upper = 1, min.len = 1)
3861 3862
   checkmate::assertSubset(TF_peak.connectionTypes, c("all", GRN@config$TF_peak_connectionTypes), empty.ok = FALSE)
3862 3863
   checkmate::assertNumeric(peak_gene.fdr, lower = 0, upper = 1, min.len = 1)
3863
-  checkmate::assert(checkmate::checkNull(peak_gene.p_raw), checkmate::checkNumeric(peak_gene.p_raw, lower = 0, upper = 1, min.len = 1))
3864
+  #   checkmate::assert(checkmate::checkNull(peak_gene.p_raw), checkmate::checkNumeric(peak_gene.p_raw, lower = 0, upper = 1, min.len = 1))
3864 3865
   checkmate::assertNumeric(peak_gene.r_range, lower = -1, upper = 1, len = 2)
3865 3866
   checkmate::assertSubset(gene.types, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE)
3866 3867
   checkmate::assertSubset(allowMissingGenes, c(TRUE, FALSE))
3867 3868
   checkmate::assertSubset(allowMissingTFs, c(TRUE, FALSE))
3868
-  
3869 3869
   checkmate::assertFlag(forceRerun)
3870 3870
   
3871 3871
   if (is.null(GRN@stats$connections) | is.null(GRN@stats$connectionDetails.l) | forceRerun) {
... ...
@@ -3959,35 +3959,40 @@ generateStatsSummary <- function(GRN,
3959 3959
         } # end of for (peak_gene.fdr_cur in peak_gene.fdr)
3960 3960
         
3961 3961
         
3962
-        if (!is.null(peak_gene.p_raw)) {
3963
-          
3964
-          futile.logger::flog.info(paste0(" Iterating over different peak-gene raw p-value thresholds..."))
3965
-          for (peak_gene.p_raw_cur in peak_gene.p_raw) {
3966
-            
3967
-            futile.logger::flog.info(paste0("  Peak-gene raw p-value = ", peak_gene.p_raw_cur))
3968
-            
3969
-            # TODO: Add the other ones here also
3970
-            for (allowMissingGenesCur in allowMissingGenes) {
3971
-              
3972
-              GRN = filterGRNAndConnectGenes(GRN, 
3973
-                                             TF_peak.fdr.threshold = TF_peak.fdr_cur, peak_gene.p_raw.threshold = peak_gene.p_raw_cur, 
3974
-                                             peak_gene.fdr.threshold= NULL,
3975
-                                             gene.types = gene.types, 
3976
-                                             allowMissingGenes = allowMissingGenesCur, peak_gene.r_range = peak_gene.r_range,
3977
-                                             filterTFs = NULL, filterGenes = NULL, filterPeaks = NULL,
3978
-                                             silent = TRUE)
3979
-              
3980
-              
3981
-              GRN@stats$connections = .addStats(GRN@stats$connections, GRN@connections$all.filtered[[permIndex]], 
3982
-                                                perm = permutationCur, TF_peak.fdr = TF_peak.fdr_cur, peak_gene.fdr = NA, 
3983
-                                                allowMissingGenes = allowMissingGenesCur, peak_gene.p_raw = peak_gene.p_raw_cur,
3984
-                                                peak_gene.r_range = paste0(peak_gene.r_range, collapse = ","),
3985
-                                                gene.types = paste0(gene.types, collapse = ","))
3986
-              
3987
-            } # end of for (allowMissingGenesCur in allowMissingGenes)
3988
-            
3989
-          } # end of   for (peak_gene.p_raw_cur in peak_gene.p_raw) 
3990
-        } # end of  if (!is.null(peak_gene.p_raw))
3962
+        # REMOVED FOR NOW, WAS INCOMPLETE AND NOT NEEDED ANYWAY
3963
+        # if (!is.null(peak_gene.p_raw)) {
3964
+        #   
3965
+        #   futile.logger::flog.info(paste0(" Iterating over different peak-gene raw p-value thresholds..."))
3966
+        #   for (peak_gene.p_raw_cur in peak_gene.p_raw) {
3967
+        #     
3968
+        #     futile.logger::flog.info(paste0("  Peak-gene raw p-value = ", peak_gene.p_raw_cur))
3969
+        #     
3970
+        #     # TODO: Add the other ones here also
3971
+        #     for (allowMissingGenesCur in allowMissingGenes) {
3972
+        #       
3973
+        #       GRN = filterGRNAndConnectGenes(GRN, 
3974
+        #                                      TF_peak.fdr.threshold = TF_peak.fdr_cur, peak_gene.p_raw.threshold = peak_gene.p_raw_cur, 
3975
+        #                                      peak_gene.fdr.threshold= NULL,
3976
+        #                                      gene.types = gene.types, 
3977
+        #                                      allowMissingGenes = allowMissingGenesCur, peak_gene.r_range = peak_gene.r_range,
3978
+        #                                      filterTFs = NULL, filterGenes = NULL, filterPeaks = NULL,
3979
+        #                                      silent = TRUE)
3980
+        #       
3981
+        #       
3982
+        #       GRN@stats$connections = .addStats(GRN@stats$connections, GRN@connections$all.filtered[[permIndex]], 
3983
+        #                                         perm = permutationCur, 
3984
+        #                                         TF_peak.fdr = TF_peak.fdr_cur, TF_peak.connectionType = TF_peak.connectionTypeCur,
3985
+        #                                         peak_gene.fdr = NA, 
3986
+        #                                         peak_gene.p_raw = peak_gene.p_raw_cur,
3987
+        #                                         peak_gene.r_range = paste0(peak_gene.r_range, collapse = ","),
3988
+        #                                         gene.types = paste0(gene.types, collapse = ","),
3989
+        #                                         allowMissingGenes = allowMissingGenesCur,
3990
+        #                                         allowMissingTFs   = allowMissingTFsCur)
3991
+        #       
3992
+        #     } # end of for (allowMissingGenesCur in allowMissingGenes)
3993
+        #     
3994
+        #   } # end of   for (peak_gene.p_raw_cur in peak_gene.p_raw) 
3995
+        # } # end of  if (!is.null(peak_gene.p_raw))
3991 3996
         
3992 3997
         
3993 3998
       }
... ...
@@ -4010,11 +4015,11 @@ generateStatsSummary <- function(GRN,
4010 4015
                       gene.types,
4011 4016
                       allowMissingGenes, allowMissingTFs) {
4012 4017
   
4013
-  TF.stats   = dplyr::select(connections.df, TF.name, peak.ID)   %>% dplyr::filter(!is.na(peak.ID)) %>% dplyr::pull(TF.name)   %>% as.character() %>% table() 
4014
-  gene.stats = dplyr::select(connections.df, peak.ID, gene.ENSEMBL) %>% dplyr::filter(!is.na(gene.ENSEMBL)) %>% dplyr::pull(gene.ENSEMBL) %>% as.character() %>% table() 
4018
+  TF.stats   = dplyr::select(connections.df, .data$TF.name, .data$peak.ID)   %>% dplyr::filter(!is.na(.data$peak.ID)) %>% dplyr::pull(.data$TF.name)   %>% as.character() %>% table() 
4019
+  gene.stats = dplyr::select(connections.df, .data$peak.ID, .data$gene.ENSEMBL) %>% dplyr::filter(!is.na(.data$gene.ENSEMBL)) %>% dplyr::pull(.data$gene.ENSEMBL) %>% as.character() %>% table() 
4015 4020
   
4016
-  peak_gene.stats = dplyr::select(connections.df, peak.ID, gene.ENSEMBL) %>% dplyr::filter(!is.na(gene.ENSEMBL),!is.na(peak.ID)) %>% dplyr::pull(peak.ID) %>% as.character() %>% table() 
4017
-  peak.TF.stats   = dplyr::select(connections.df, peak.ID, TF.name)      %>% dplyr::filter(!is.na(TF.name),     !is.na(peak.ID)) %>% dplyr::pull(peak.ID) %>% as.character() %>% table() 
4021
+  peak_gene.stats = dplyr::select(connections.df, .data$peak.ID, .data$gene.ENSEMBL) %>% dplyr::filter(!is.na(.data$gene.ENSEMBL),!is.na(.data$peak.ID)) %>% dplyr::pull(.data$peak.ID) %>% as.character() %>% table() 
4022
+  peak.TF.stats   = dplyr::select(connections.df, .data$peak.ID, .data$TF.name)      %>% dplyr::filter(!is.na(.data$TF.name),     !is.na(.data$peak.ID)) %>% dplyr::pull(.data$peak.ID) %>% as.character() %>% table() 
4018 4023
   
4019 4024
   if (length(TF.stats) > 0){
4020 4025
     TF.connections = c(min(TF.stats, na.rm = TRUE), 
... ...
@@ -4168,7 +4173,7 @@ loadExampleObject <- function(forceDownload = FALSE, fileURL = "https://www.embl
4168 4173
   GRN@config$files$output_log = "GRN.log"
4169 4174
   
4170 4175
   # Temporary fix: Replace lincRNA with lncRNA due to a recent change in biomaRt until we update the object directly
4171
-  GRN@annotation$genes = dplyr::mutate(GRN@annotation$genes, gene.type = dplyr::recode_factor(gene.type, lncRNA = "lincRNA")) 
4176
+  GRN@annotation$genes = dplyr::mutate(GRN@annotation$genes, gene.type = dplyr::recode_factor(.data$gene.type, lncRNA = "lincRNA")) 
4172 4177
   
4173 4178
   GRN
4174 4179
   
... ...
@@ -4215,7 +4220,7 @@ getCounts <- function(GRN, type, norm, permuted = FALSE) {
4215 4220
         if (checkmate::testClass(countsOrig, "DESeqDataSet")) {
4216 4221
           result = DESeq2::counts(countsOrig, norm = FALSE) %>% as.data.frame() %>% tibble::rownames_to_column("peakID")
4217 4222
         } else {
4218
-          result = countsOrig %>% dplyr::select(-isFiltered) %>% as.data.frame()
4223
+          result = countsOrig %>% dplyr::select(-.data$isFiltered) %>% as.data.frame()
4219 4224
         }
4220 4225
         
4221 4226
       }
... ...
@@ -4233,7 +4238,7 @@ getCounts <- function(GRN, type, norm, permuted = FALSE) {
4233 4238
         if (checkmate::testClass(countsOrig, "DESeqDataSet")) {
4234 4239
           result = DESeq2::counts(countsOrig, norm = FALSE) %>% as.data.frame() %>% tibble::rownames_to_column("ENSEMBL")
4235 4240
         } else {
4236
-          result = countsOrig %>% dplyr::select(-isFiltered) %>% as.data.frame()
4241
+          result = countsOrig %>% dplyr::select(-.data$isFiltered) %>% as.data.frame()
4237 4242
           # TODO: isFiltered column?
4238 4243
         }
4239 4244
         
... ...
@@ -4252,7 +4257,7 @@ getCounts <- function(GRN, type, norm, permuted = FALSE) {
4252 4257
         
4253 4258
         # Columns are shuffled so that non-matching samples are compared throughout the pipeline for all correlation-based analyses
4254 4259
         result = GRN@data$RNA$counts_norm.l[["0"]][shuffledColumnOrder] %>%
4255
-          dplyr::select(ENSEMBL, tidyselect::everything())
4260
+          dplyr::select(.data$ENSEMBL, tidyselect::everything())
4256 4261
         
4257 4262
       } else {
4258 4263
         message= "Could not find counts in object."
... ...
@@ -4262,7 +4267,7 @@ getCounts <- function(GRN, type, norm, permuted = FALSE) {
4262 4267
   }
4263 4268
   
4264 4269
   if ("isFiltered" %in% colnames(result)) {
4265
-    result = dplyr::filter(result, !isFiltered) %>% dplyr::select(-isFiltered)
4270
+    result = dplyr::filter(result, !.data$isFiltered) %>% dplyr::select(-.data$isFiltered)
4266 4271
   }
4267 4272
   
4268 4273
   result
... ...
@@ -4388,11 +4393,11 @@ getGRNConnections <- function(GRN, type = "all.filtered",  permuted = FALSE, inc
4388 4393
   } 
4389 4394
   
4390 4395
   df  %>%
4391
-    dplyr::mutate(TF.name = as.factor(TF.name ),
4392
-                  TF_peak.r_bin = as.factor(TF_peak.r_bin),
4393
-                  peak.ID = as.factor(peak.ID),
4394
-                  TF_peak.fdr_direction = as.factor(TF_peak.fdr_direction),
4395
-                  TF_peak.connectionType = as.factor(TF_peak.connectionType))
4396
+    dplyr::mutate(TF.name = as.factor(.data$TF.name ),
4397
+                  TF_peak.r_bin = as.factor(.data$TF_peak.r_bin),
4398
+                  peak.ID = as.factor(.data$peak.ID),
4399
+                  TF_peak.fdr_direction = as.factor(.data$TF_peak.fdr_direction),
4400
+                  TF_peak.connectionType = as.factor(.data$TF_peak.connectionType))
4396 4401
   
4397 4402
 }
4398 4403
 
... ...
@@ -4577,18 +4582,18 @@ getBasic_metadata_visualization <- function(GRN, forceRerun = FALSE) {
4577 4582
   
4578 4583
   if (is.null(GRN@visualization$metadata) | forceRerun) {
4579 4584
     
4580
-    expMeans.m = getCounts(GRN, type = "rna", norm = TRUE, permuted = FALSE) %>% dplyr::select(-ENSEMBL) %>% as.matrix()
4585
+    expMeans.m = getCounts(GRN, type = "rna", norm = TRUE, permuted = FALSE) %>% dplyr::select(-.data$ENSEMBL) %>% as.matrix()
4581 4586
     
4582 4587
     baseMean = rowMeans(expMeans.m)
4583 4588
     expression.df = tibble::tibble(ENSEMBL_ID = getCounts(GRN, type = "rna", norm = TRUE, permuted = FALSE)$ENSEMBL, baseMean = baseMean) %>%
4584
-      dplyr::mutate(ENSEMBL_ID = gsub("\\..+", "", ENSEMBL_ID, perl = TRUE),
4589
+      dplyr::mutate(ENSEMBL_ID = gsub("\\..+", "", .data$ENSEMBL_ID, perl = TRUE),
4585 4590
                     baseMean_log = log2(baseMean + 0.01))
4586 4591
     
4587
-    expression_TF.df = dplyr::filter(expression.df, ENSEMBL_ID %in% GRN@data$TFs$translationTable$ENSEMBL) %>%
4592
+    expression_TF.df = dplyr::filter(expression.df, .data$ENSEMBL_ID %in% GRN@data$TFs$translationTable$ENSEMBL) %>%
4588 4593
       dplyr::left_join(GRN@data$TFs$translationTable, by = c("ENSEMBL_ID" = "ENSEMBL"))
4589 4594
     
4590 4595
     meanPeaks.df = tibble::tibble(peakID = getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE)$peakID, 
4591
-                                  mean = rowMeans(dplyr::select(getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE), -peakID))) %>%
4596
+                                  mean = rowMeans(dplyr::select(getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE), -.data$peakID))) %>%
4592 4597
       dplyr::mutate(mean_log = log2(mean + 0.01))
4593 4598
     
4594 4599
     metadata = list("RNA_expression_genes" = expression.df,
... ...
@@ -4627,7 +4632,7 @@ changeOutputDirectory <- function(GRN, outputDirectory = ".") {
4627 4632
 .createTables_peakGeneQC <- function(peakGeneCorrelations.all.cur, networkType_details, colors_vec, range) {
4628 4633
     
4629 4634
     d = peakGeneCorrelations.all.cur %>% 
4630
-        dplyr::group_by(r_positive, class, peak_gene.p.raw.class) %>%
4635
+        dplyr::group_by(.data$r_positive, class, .data$peak_gene.p.raw.class) %>%
4631 4636
         dplyr::summarise(n = dplyr::n()) %>%
4632 4637
         dplyr::ungroup()
4633 4638
     
... ...
@@ -4650,34 +4655,34 @@ changeOutputDirectory <- function(GRN, outputDirectory = ".") {
4650 4655
     
4651 4656
     # Normalization factors
4652 4657
     dsum = d %>%
4653
-        dplyr::group_by(r_positive, class) %>%
4658
+        dplyr::group_by(.data$r_positive, .data$class) %>%
4654 4659
         dplyr::summarise(sum_n = sum(.data$n))
4655 4660
     
4656 4661
     
4657 4662
     # Summarize per bin
4658 4663
     d2 = d %>%
4659
-        dplyr::group_by(class, peak_gene.p.raw.class)%>%
4660
-        dplyr::summarise(sum_pos = .data$n[r_positive],
4661
-                         sum_neg = .data$n[!r_positive]) %>%
4662
-        dplyr::mutate(ratio_pos_raw = sum_pos / sum_neg,
4663
-                      enrichment_pos = sum_pos / (sum_pos + sum_neg),
4664
-                      ratio_neg = 1 - enrichment_pos) %>%
4664
+        dplyr::group_by(class, .data$peak_gene.p.raw.class)%>%
4665
+        dplyr::summarise(sum_pos = .data$n[.data$r_positive],
4666
+                         sum_neg = .data$n[!.data$r_positive]) %>%
4667
+        dplyr::mutate(ratio_pos_raw = .data$sum_pos / .data$sum_neg,
4668
+                      enrichment_pos = .data$sum_pos / (.data$sum_pos + .data$sum_neg),
4669
+                      ratio_neg = 1 - .data$enrichment_pos) %>%
4665 4670
         dplyr::ungroup()
4666 4671
     
4667 4672
     # Compare between real and permuted
4668
-    normFactor_real = dplyr::filter(dsum, class ==  !! (networkType_details[1])) %>%  dplyr::pull(sum_n) %>% sum() /
4669
-        dplyr::filter(dsum, class ==  !! (networkType_details[2])) %>%  dplyr::pull(sum_n) %>% sum()
4673
+    normFactor_real = dplyr::filter(dsum, class ==  !! (networkType_details[1])) %>%  dplyr::pull(.data$sum_n) %>% sum() /
4674
+        dplyr::filter(dsum, class ==  !! (networkType_details[2])) %>%  dplyr::pull(.data$sum_n) %>% sum()
4670 4675
     
4671 4676
     # ratio_norm not used currently, no normalization necessary here or not even useful because we dont want to normalize the r_pos and r_neg ratios: These are signal in a way. Only when comparing between real and permuted, we have to account for sample size for corrections
4672 4677
     d3 = d %>%
4673
-        dplyr::group_by(peak_gene.p.raw.class, r_positive) %>%
4678
+        dplyr::group_by(.data$peak_gene.p.raw.class, .data$r_positive) %>%
4674 4679
         dplyr::summarise(n_real     = .data$n[class == !! (names(colors_vec)[1]) ],
4675 4680
                          n_permuted = .data$n[class == !! (names(colors_vec)[2]) ]) %>%
4676 4681
         dplyr::ungroup() %>%
4677
-        dplyr::mutate(ratio_real_raw = n_real / n_permuted,
4678
-                      ratio_real_norm = ratio_real_raw / normFactor_real,
4679
-                      enrichment_real      = n_real / (n_real + n_permuted),
4680
-                      enrichment_real_norm = (n_real / normFactor_real) / ((n_real / normFactor_real) + n_permuted)) 
4682
+        dplyr::mutate(ratio_real_raw = .data$n_real / .data$n_permuted,
4683
+                      ratio_real_norm = .data$ratio_real_raw / normFactor_real,
4684
+                      enrichment_real      = .data$n_real / (.data$n_real + .data$n_permuted),
4685
+                      enrichment_real_norm = (.data$n_real / normFactor_real) / ((.data$n_real / normFactor_real) + .data$n_permuted)) 
4681 4686
     
4682 4687
     
4683 4688
     stopifnot(identical(levels(d2$peak_gene.p.raw.class), levels(d3$peak_gene.p.raw.class)))
... ...
@@ -4688,8 +4693,8 @@ changeOutputDirectory <- function(GRN, outputDirectory = ".") {
4688 4693
                                ratio = c(d2$ratio_pos_raw, d3$ratio_real_norm),
4689 4694
                                classAll = c(as.character(d2$class), d3$r_positive),
4690 4695
                                set = c(d2$set, d3$set)) %>%
4691
-        dplyr::mutate(classAll = factor(classAll, levels=c(paste0("real_",range), paste0("random_",range), "TRUE", "FALSE")),
4692
-                      peak_gene.p.raw.class = factor(peak_gene.p.raw.class, levels = levels(d2$peak_gene.p.raw.class)))
4696
+        dplyr::mutate(classAll = factor(.data$classAll, levels=c(paste0("real_",range), paste0("random_",range), "TRUE", "FALSE")),
4697
+                      peak_gene.p.raw.class = factor(.data$peak_gene.p.raw.class, levels = levels(d2$peak_gene.p.raw.class)))
4693 4698
     
4694 4699
     d4 = tibble::tibble(peak_gene.p.raw.class = unique(d$peak_gene.p.raw.class), 
4695 4700
                         n_rpos_real = NA_integer_, n_rpos_random = NA_integer_,
... ...
@@ -4713,13 +4718,13 @@ changeOutputDirectory <- function(GRN, outputDirectory = ".") {
4713 4718
     }
4714 4719
     
4715 4720
     d4 = d4 %>%
4716
-        dplyr::mutate(ratio_rneg_rpos_real = n_rneg_real / (n_rneg_real + n_rpos_real),
4717
-                      ratio_rneg_rpos_random = n_rneg_random / (n_rneg_random + n_rpos_random),
4718
-                      peak_gene.p.raw.class.bin = as.numeric(peak_gene.p.raw.class)) %>%
4719
-        dplyr::arrange(peak_gene.p.raw.class.bin)
4721
+        dplyr::mutate(ratio_rneg_rpos_real = .data$n_rneg_real / (.data$n_rneg_real + .data$n_rpos_real),
4722
+                      ratio_rneg_rpos_random = .data$n_rneg_random / (.data$n_rneg_random + .data$n_rpos_random),
4723
+                      peak_gene.p.raw.class.bin = as.numeric(.data$peak_gene.p.raw.class)) %>%
4724
+        dplyr::arrange(.data$peak_gene.p.raw.class.bin)
4720 4725
     
4721 4726
     d4_melt = reshape2::melt(d4, id  = c("peak_gene.p.raw.class.bin", "peak_gene.p.raw.class")) %>%
4722
-        dplyr::filter(grepl("ratio", variable))
4727
+        dplyr::filter(grepl("ratio", .data$variable))
4723 4728
     
4724 4729
     
4725 4730
     list(d = d, d2 = d2, d3 = d3, d4 = d4, d4_melt = d4_melt, d_merged = d_merged)
... ...
@@ -191,7 +191,7 @@
191 191
 .readHOCOMOCOTable <- function(file, delim = " ") {
192 192
 
193 193
   HOCOMOCO_mapping.df = .read_tidyverse_wrapper(file, type = "delim", delim = delim, col_types = readr::cols()) %>%
194
-    dplyr::mutate(ENSEMBL = gsub("\\..+", "", ENSEMBL, perl = TRUE)) # Clean ENSEMBL IDs
194
+    dplyr::mutate(ENSEMBL = gsub("\\..+", "", .data$ENSEMBL, perl = TRUE)) # Clean ENSEMBL IDs
195 195
   
196 196
   checkmate::assertSubset(c("ENSEMBL", "HOCOID"), colnames(HOCOMOCO_mapping.df))
197 197
   
... ...
@@ -379,7 +379,7 @@
379 379
             TFs_to_change = dplyr::filter(output.global.TFs, (!!as.name(colnameClassification) == "activator" | 
380 380
                                                                   !!as.name(colnameClassification) == "repressor") & 
381 381
                                               !!as.name( colnameClassificationPVal) > !!significanceThreshold_Wilcoxon) %>%
382
-                            dplyr::pull(TF)
382
+                            dplyr::pull(.data$TF)
383 383
             
384 384
             # Filter some TFs to be undetermined
385 385
             if (length(TFs_to_change) > 0) {
... ...
@@ -496,7 +496,7 @@
496 496
   
497 497
   # Prepare the RNASeq data
498 498
   TF.specific = dplyr::left_join(HOCOMOCO_mapping, DESeq_results, by = "ENSEMBL") %>% 
499
-    dplyr::filter(!is.na(baseMean))
499
+    dplyr::filter(!is.na(.data$baseMean))
500 500
   
501 501
   if (nrow(TF.specific) == 0) {
502 502
     message = "The Ensembl IDs from the translation table do not match with the IDs from the RNA-seq counts table."
... ...
@@ -510,7 +510,7 @@
510 510
     output.global.TFs.merged = output.global.TFs %>%
511 511
       dplyr::filter(!!as.name(colnameClassificationCur) != "not-expressed")  %>%
512 512
       dplyr::full_join(TF.specific, by = c( "TF" = "HOCOID"))  %>%
513
-      dplyr::mutate(baseMeanNorm = (baseMean - min(baseMean, na.rm = TRUE)) / (max(baseMean, na.rm = TRUE) - min(baseMean, na.rm = TRUE)) + par.l$minPointSize)   %>%
513
+      dplyr::mutate(baseMeanNorm = (.data$baseMean - min(.data$baseMean, na.rm = TRUE)) / (max(.data$baseMean, na.rm = TRUE) - min(.data$baseMean, na.rm = TRUE)) + par.l$minPointSize)   %>%
514 514
       dplyr::filter(!is.na(!!as.name(colnameClassificationCur))) 
515 515
     
516 516
     
... ...
@@ -531,11 +531,11 @@
531 531
                         signif(cor.res.l[["pearson"]]$p.value,2),  "/", 
532 532
                         signif(cor.res.l[["spearman"]]$p.value,2), "\n(Pearson/Spearman, stringency: ", thresholdCur, ")")
533 533
       
534
-      g = ggplot(output.global.TFs.cur, aes(weighted_meanDifference, log2FoldChange)) + geom_point(aes(size = baseMeanNorm)) + 
535
-        geom_smooth(method = par.l$regressionMethod, color = par.l$internal$colorCategories[classificationCur]) + 
536
-        ggtitle(titleCur) + 
537
-        ylab("log2 fold-change RNA-seq") + 
538
-        theme_bw() + theme(plot.title = element_text(hjust = 0.5))
534
+      g = ggplot2::ggplot(output.global.TFs.cur, ggplot2::aes(.data$weighted_meanDifference, .data$log2FoldChange)) + ggplot2::geom_point(ggplot2::aes(size = .data$baseMeanNorm)) + 
535
+          ggplot2::geom_smooth(method = par.l$regressionMethod, color = par.l$internal$colorCategories[classificationCur]) + 
536
+          ggplot2::ggtitle(titleCur) + 
537
+          ggplot2::ylab("log2 fold-change RNA-seq") + 
538
+          ggplot2::theme_bw() + ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))
539 539
       plot(g)
540 540
       
541 541
     }
... ...
@@ -583,7 +583,7 @@
583 583
         
584 584
         plot(median.cor.tfs.non, seq_len(nTF),
585 585
              xlim = xlim, ylim = ylim, main=mainCur, xlab=xlab, ylab=ylab,
586
-             col = grDevices::adjustcolor("darkgrey",alpha=1), pch = 16, cex = 0.5,axes = FALSE)
586
+             col = grDevices::adjustcolor("darkgrey",alpha.f=1), pch = 16, cex = 0.5, axes = FALSE)
587 587
         points(median.cor.tfs, seq_len(nTF),
588 588
                pch=16,  cex = 0.5, 
589 589
                col= dplyr::case_when(median.cor.tfs > thresCur.v[2] ~ par.l$internal$colorCategories["activator"],
... ...
@@ -640,7 +640,7 @@
640 640
     
641 641
     missingGenes = which(!HOCOMOCO_mapping.df.exp$HOCOID %in% colnames(sort.cor.m))
642 642
     if (length(missingGenes) > 0) {
643
-        HOCOMOCO_mapping.df.exp = dplyr::filter(HOCOMOCO_mapping.df.exp, HOCOID %in% colnames(sort.cor.m))
643
+        HOCOMOCO_mapping.df.exp = dplyr::filter(HOCOMOCO_mapping.df.exp, .data$HOCOID %in% colnames(sort.cor.m))
644 644
     }
645 645
     
646 646
     if (!is.null(file)) {
... ...
@@ -36,22 +36,22 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE,
36 36
     # If set to TRUE, the new default, self-loops can happen and the graph is not strictly tripartite anymore
37 37
     if (model_TF_gene_nodes_separately) {
38 38
       TF_peak.df = GRN@connections$all.filtered[["0"]] %>%
39
-        dplyr::filter(!is.na(gene.ENSEMBL)) %>% 
39
+        dplyr::filter(!is.na(.data$gene.ENSEMBL)) %>% 
40 40
         dplyr::select(c("TF.name", "peak.ID", "TF.ENSEMBL", "TF_peak.r")) %>%
41 41
         stats::na.omit() %>% 
42 42
         dplyr::mutate(V2_name = NA) %>%
43 43
         unique() %>%
44
-        dplyr::rename(V1 = TF.name, V2 = peak.ID, V1_name = TF.ENSEMBL, r = TF_peak.r) %>%
44
+        dplyr::rename(V1 = .data$TF.name, V2 = .data$peak.ID, V1_name = .data$TF.ENSEMBL, r = .data$TF_peak.r) %>%
45 45
         dplyr::mutate_at(c("V1","V2"), as.vector)
46 46
     } else {
47 47
       # Get Ensembl ID for TFs here to make a clean join and force a TF that is regulated by a peak to be the same node
48 48
       TF_peak.df = GRN@connections$all.filtered[["0"]] %>%
49
-        dplyr::filter(!is.na(gene.ENSEMBL)) %>% 
49
+        dplyr::filter(!is.na(.data$gene.ENSEMBL)) %>% 
50 50
         dplyr::select(c("TF.ENSEMBL", "peak.ID", "TF.name", "TF_peak.r")) %>%
51 51
         stats::na.omit() %>% 
52 52
         dplyr::mutate(V2_name = NA) %>%
53 53
         unique() %>%
54
-        dplyr::rename(V1 = TF.ENSEMBL, V2 = peak.ID, V1_name = TF.name, r = TF_peak.r) %>%
54
+        dplyr::rename(V1 = .data$TF.ENSEMBL, V2 = .data$peak.ID, V1_name = .data$TF.name, r = .data$TF_peak.r) %>%
55 55
         dplyr::mutate_at(c("V1","V2"), as.vector)
56 56
     }
57 57
     
... ...
@@ -60,15 +60,15 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE,
60 60
       stats::na.omit() %>% 
61 61
       dplyr::mutate(V1_name = NA) %>%
62 62
       unique() %>%
63
-      dplyr::rename(V1 = peak.ID, V2 = gene.ENSEMBL, V2_name = gene.name, r = peak_gene.r) %>%
63
+      dplyr::rename(V1 = .data$peak.ID, V2 = .data$gene.ENSEMBL, V2_name = .data$gene.name, r = .data$peak_gene.r) %>%
64 64
       dplyr::mutate_at(c("V1","V2"), as.vector)
65 65
     
66 66
     TF_peak_gene.df = dplyr::bind_rows(list(`tf-peak`=TF_peak.df, `peak-gene` = peak_gene.df), .id = "connectionType") %>%
67
-      dplyr::select(V1, V2, V1_name, V2_name, r, connectionType)
67
+      dplyr::select(.data$V1, .data$V2, .data$V1_name, .data$V2_name, .data$r, .data$connectionType)
68 68
     
69 69
     TF_gene.df = dplyr::inner_join(TF_peak.df, peak_gene.df, by = c("V2"="V1"), suffix = c(".TF_peak", ".peak_gene")) %>% 
70
-      dplyr::select(V1, V2.peak_gene, V1_name.TF_peak, V2_name.peak_gene) %>%
71
-      dplyr::rename(V1_name = V1_name.TF_peak, V2 = V2.peak_gene, V2_name = V2_name.peak_gene) %>%
70
+      dplyr::select(.data$V1, .data$V2.peak_gene, .data$V1_name.TF_peak, .data$V2_name.peak_gene) %>%
71
+      dplyr::rename(V1_name = .data$V1_name.TF_peak, V2 = .data$V2.peak_gene, V2_name = .data$V2_name.peak_gene) %>%
72 72
       dplyr::distinct() %>%
73 73
       dplyr::mutate(connectionType = "tf-gene") 
74 74
     
... ...
@@ -105,23 +105,23 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE,
105 105
 .buildGraph <- function(df, directed, allowLoops, removeMultiple = FALSE, silent = FALSE) {
106 106
   
107 107
   # Remove V1_name and V2_name as igraph treats additional columns as edge attribute, which can become messed up as it here refers to vertice attribute
108
-  df_mod = df %>% dplyr::select(-V1_name, -V2_name)
108
+  df_mod = df %>% dplyr::select(-.data$V1_name, -.data$V2_name)
109 109
   
110 110
   TF_vertices = df %>%
111
-    dplyr::select(V1, V1_name) %>% 
112
-    dplyr::rename(nodeID = V1) %>%
111
+    dplyr::select(.data$V1, .data$V1_name) %>% 
112
+    dplyr::rename(nodeID = .data$V1) %>%
113 113
     dplyr::distinct() %>%
114
-    dplyr::group_by(nodeID) %>%
115
-    dplyr::summarise(names_TF_all = paste0(V1_name, collapse="|"),
114
+    dplyr::group_by(.data$nodeID) %>%
115
+    dplyr::summarise(names_TF_all = paste0(.data$V1_name, collapse="|"),
116 116
                      nTF = dplyr::n(),
117 117
                      isTF = TRUE, .groups = "keep") %>%
118 118
     dplyr::ungroup()
119 119
   
120 120
   gene_vertices = df %>%
121
-    dplyr::select(V2, V2_name) %>% 
121
+    dplyr::select(.data$V2, .data$V2_name) %>% 
122 122
     dplyr::distinct() %>%
123 123
     dplyr::mutate(isGene = TRUE) %>%
124
-    dplyr::rename(names_gene = V2_name, nodeID = V2) %>%
124
+    dplyr::rename(names_gene = .data$V2_name, nodeID = .data$V2) %>%
125 125
     dplyr::ungroup()
126 126
   
127 127
   # Combine vertex metadata
... ...
@@ -147,7 +147,7 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE,
147 147
     
148 148
   }
149 149
   
150
-  .printGraphSummary(GRN, graph, silent = silent)
150
+  .printGraphSummary(graph, silent = silent)
151 151
   
152 152
   if (!silent) futile.logger::flog.info(paste0(" Done. Graphs are saved in GRN@graph"))
153 153
   
... ...
@@ -157,9 +157,9 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE,
157 157
 .printLoopsGraph <- function(graph_table, silent = FALSE) {
158 158
   
159 159
   loop_vertices = graph_table %>%
160
-    dplyr::filter(V1 == V2) %>%
161
-    dplyr::mutate(V1_name_combined = paste0(V1, " (", V1_name, ")")) %>%
162
-    dplyr::pull(V1_name_combined)
160
+    dplyr::filter(.data$V1 == .data$V2) %>%
161
+    dplyr::mutate(V1_name_combined = paste0(.data$V1, " (", .data$V1_name, ")")) %>%
162
+    dplyr::pull(.data$V1_name_combined)
163 163
   
164 164
   if (length(loop_vertices) > 0) {
165 165
     if (!silent) futile.logger::flog.info(paste0(" The following nodes / vertices have loop edges (TF regulating itself):\n", paste0(loop_vertices, collapse = ", ")))
... ...
@@ -171,7 +171,7 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE,
171 171
   
172 172
   
173 173
   multipleEdges = graph_table %>%
174
-    dplyr::group_by(V1,V2) %>% 
174
+    dplyr::group_by(.data$V1, .data$V2) %>% 
175 175
     dplyr::summarize(n = dplyr::n(), .groups = "keep") %>% 
176 176
     dplyr::ungroup() %>%
177 177
     dplyr::filter(.data$n>1)
... ...
@@ -183,7 +183,7 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE,
183 183
 }
184 184
 
185 185
 
186
-.printGraphSummary <- function(GRN, graph, silent = FALSE) {
186
+.printGraphSummary <- function(graph, silent = FALSE) {
187 187
   if (!silent) futile.logger::flog.info(paste0(" Graph summary:"))
188 188
   nVertices = length(igraph::V(graph))
189 189
   nEdges = length(igraph::E(graph))
... ...
@@ -275,28 +275,41 @@ performAllNetworkAnalyses <- function(GRN, ontology = c("GO_BP", "GO_MF"),
275 275
 
276 276
 
277 277
 # Retrieve set of background genes (as vector) used for enrichment analyses from a GRN object
278
-.getBackgroundGenes <-function(GRN, type = "neighborhood") {
278
+.getBackgroundGenes <-function(GRN, type = "neighborhood", gene.types = "all") {
279 279
   
280
-  checkmate::assertChoice(type, c("all_annotated", "all_RNA", "neighborhood"), empty.ok = FALSE)
280
+  checkmate::assertChoice(type, c("all_annotated", "all_RNA", "all_RNA_filtered", "neighborhood"))
281 281
   
282 282
   if (type == "all_annotated") {
283 283
     
284
-    backgroundGenes = geneAnnotation[[GRN@config$parameters$genomeAssembly]]$gene.ENSEMBL
285
-    
284
+    backgroundGenes = GRN@annotation$genes$gene.ENSEMBL
285
+
286 286
   } else if (type == "all_RNA") {
287 287
     
288
-    if (checkmate::testClass(GRN@data$RNA$counts_orig, "DESeqDataSet")) {
289
-      backgroundGenes = rownames(GRN@data$RNA$counts_orig)
290
-    } else {
291
-      backgroundGenes = GRN@data$RNA$counts_orig$ENSEMBL
292
-    }
288
+      backgroundGenes = GRN@data$RNA$counts_norm.l[["0"]] %>%
289
+          dplyr::pull(.data$ENSEMBL)
293 290
     
294 291
     
292
+  } else if (type == "all_RNA_filtered") {
293
+      
294
+      backgroundGenes = GRN@data$RNA$counts_norm.l[["0"]] %>%
295
+          dplyr::filter(!.data$isFiltered) %>%
296
+          dplyr::pull(.data$ENSEMBL)
297
+      
298
+      
295 299
   } else if (type == "neighborhood") {
296 300
     
297 301
     # Retrieve only those who are in the neighborhood of genes
298 302
     backgroundGenes = levels(GRN@connections$peak_genes[["0"]]$gene.ENSEMBL)
299 303
   }
304
+    
305
+   
306
+  # Filter genes by gene.type.  
307
+  if (gene.types != "all") {
308
+      backgroundGenes = dplyr::filter(GRN@annotation$genes, 
309
+                                      .data$gene.ENSEMBL %in% backgroundGenes,
310
+                                      .data$gene.type %in% gene.types) %>%
311
+                        dplyr::pull(.data$gene.ENSEMBL)
312
+  }
300 313
   
301 314
   
302 315
   backgroundGenes 
... ...
@@ -304,6 +317,7 @@ performAllNetworkAnalyses <- function(GRN, ontology = c("GO_BP", "GO_MF"),
304 317
 }
305 318
 
306 319
 
320
+
307 321
 #' Run an enrichment analysis for the genes in the whole network in the filtered \code{\linkS4class{GRN}} object
308 322
 #' 
309 323
 #' The enrichment analysis is based on the whole network, see \code{\link{calculateCommunitiesEnrichment}} and \code{\link{calculateTFEnrichment}} for 
... ...
@@ -323,7 +337,10 @@ performAllNetworkAnalyses <- function(GRN, ontology = c("GO_BP", "GO_MF"),
323 337
 #' As they are listed under \code{Suggests}, they may not yet be installed, and the function will throw an error if they are missing.
324 338
 #' @param algorithm Character. Default \code{"weight01"}. One of: \code{"classic"}, \code{"elim"}, \code{"weight"}, \code{"weight01"}, \code{"lea"}, \code{"parentchild"}. Only relevant if ontology is GO related (GO_BP, GO_MF, GO_CC), ignored otherwise. Name of the algorithm that handles the GO graph structures. Valid inputs are those supported by the \code{topGO} library.
325 339
 #' @param statistic Character. Default \code{"fisher"}. One of: \code{"fisher"}, \code{"ks"}, \code{"t"}, \code{"globaltest"}, \code{"sum"}, \code{"ks.ties"}. Statistical test to be used. Only relevant if ontology is GO related (GO_BP, GO_MF, GO_CC), and valid inputs are those supported by the topGO library, ignored otherwise. For the other ontologies the test statistic is always Fisher. 
326
-#' @param background Character. Default \code{"neighborhood"}. One of: \code{"all_annotated"}, \code{"all_RNA"}, \code{"neighborhood"}. Set of genes to be used to construct the background for the enrichment analysis. This can either be all annotated genes in the reference genome (all_annotated), all differentially expressed genes (all_RNA), or all the genes that are within the neighborhood of a peak in the GRN (neighborhood)
340
+#' @param background Character. Default \code{"neighborhood"}. One of: \code{"all_annotated"}, \code{"all_RNA"}, \code{"all_RNA_filtered"}, \code{"neighborhood"}. Set of genes to be used to construct the background for the enrichment analysis. This can either be all annotated genes in the reference genome (\code{all_annotated}), all genes from the provided RNA data (\code{all_RNA}), all genes from the provided RNA data excluding those marked as filtered after executing \code{filterData} (\code{all_RNA_filtered}), or all the genes that are within the neighborhood of any peak (before applying any filters except for the user-defined \code{promoterRange} value in \code{addConnections_peak_gene}) (\code{neighborhood}).
341
+#' @param background_geneTypes Character vector of gene types that should be considered for the background. Default \code{"all"}. 
342
+#' Only gene types as defined in the \code{\linkS4class{GRN}} object, slot \code{GRN@annotation$genes$gene.type} are allowed. 
343
+#' The special keyword \code{"all"} means no filter on gene type.
327 344
 #' @param pAdjustMethod Character. Default \code{"BH"}. One of: \code{"holm"}, \code{"hochberg"}, \code{"hommel"}, \code{"bonferroni"}, \code{"BH"}, \code{"BY"}, \code{"fdr"}. This parameter is only relevant for the following ontologies: KEGG, DO, Reactome. For the other ontologies, the algorithm serves as an adjustment.
328 345
 #' @template forceRerun
329 346
 #' @return An updated \code{\linkS4class{GRN}} object, with the enrichment results stored in the \code{stats$Enrichment$general} slot.
... ...
@@ -336,13 +353,10 @@ performAllNetworkAnalyses <- function(GRN, ontology = c("GO_BP", "GO_MF"),
336 353
 #' GRN =  loadExampleObject()
337 354
 #' GRN =  calculateGeneralEnrichment(GRN, ontology = "GO_BP", forceRerun = FALSE)
338 355
 #' @export
339
-# #' @importFrom topGO whichAlgorithms whichTests
340
-
341
-# Deactivated temporarily
342
-# @import BiocManager
343 356
 calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"), 
344 357
                                        algorithm = "weight01", statistic = "fisher",
345
-                                       background = "neighborhood",  pAdjustMethod = "BH", forceRerun = FALSE) {
358
+                                       background = "neighborhood",  background_geneTypes = "all",
359
+                                       pAdjustMethod = "BH", forceRerun = FALSE) {
346 360
   
347 361
   start = Sys.time()
348 362
   checkmate::assertClass(GRN, "GRN")
... ...
@@ -352,7 +366,8 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"),
352 366
   
353 367
   .checkPackage_topGO_and_arguments(ontology, algorithm, statistic)
354 368
 
355
-  checkmate::assertChoice(background, c("all_annotated", "all_RNA", "neighborhood"))
369
+  checkmate::assertChoice(background, c("all_annotated", "all_RNA", "all_RNA_filtered", "neighborhood"))
370
+  checkmate::assertSubset(background_geneTypes, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE)
356 371
   checkmate::assertChoice(pAdjustMethod, c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"))
357 372
   checkmate::assertFlag(forceRerun)
358 373
   
... ...
@@ -360,7 +375,7 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"),
360 375
   
361 376
   
362 377
   mapping = .getGenomeObject(GRN@config$parameters$genomeAssembly, type = "packageName")
363
-  backgroundGenes = .getBackgroundGenes(GRN, type = background)
378
+  backgroundGenes = .getBackgroundGenes(GRN, type = background, gene.types = background_geneTypes)
364 379
   
365 380
   futile.logger::flog.info(paste0("Calculating general enrichment statistics. This may take a while"))
366 381
   
... ...
@@ -375,7 +390,8 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"),
375 390
       # run general enrichment analysis and store tabulated results in GRN object
376 391
       # Only use the "targets", i.e., genes as foreground because it would artificially enrich for TF terms, such as "DNA-binding" "transcription activation" type terms.
377 392
       GRN@stats[["Enrichment"]][["general"]][[ontologyCur]] =
378
-        .runEnrichment(foreground = GRN@graph$TF_gene$table$V2, 
393
+        .runEnrichment(GRN,
394
+                       foreground = GRN@graph$TF_gene$table$V2, 
379 395
                        background = backgroundGenes,
380 396
                        backgroundStr = background, 
381 397
                        ontology = ontologyCur, algorithm = algorithm, statistic = statistic,
... ...
@@ -401,25 +417,6 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"),
401 417
 }
402 418
 
403 419
 
404
-.getBackgroundGenes <-function(GRN, type = "neighborhood") {
405
-  # Retrieve set of background genes (as vector) used for enrichment analyses from a GRN object
406
-  checkmate::assertChoice(type, c("all_annotated", "all_RNA", "neighborhood"))
407
-  if (type == "all_annotated") {
408
-    backgroundGenes = geneAnnotation[[GRN@config$parameters$genomeAssembly]]$gene.ENSEMBL
409
-  } else if (type == "all_RNA") {
410
-    if (checkmate::testClass(GRN@data$RNA$counts_orig, "DESeqDataSet")) {
411
-      backgroundGenes = rownames(GRN@data$RNA$counts_orig)
412
-    } else {
413
-      backgroundGenes = GRN@data$RNA$counts_orig$ENSEMBL
414
-    }
415
-  } else if (type == "neighborhood") {
416
-    # Retrieve only those who are in the neighborhood of genes
417
-    backgroundGenes = levels(GRN@connections$peak_genes[["0"]]$gene.ENSEMBL)
418
-  }
419
-  backgroundGenes 
420
-}
421
-
422
-
423 420
 .combineEnrichmentResults <- function(GRN, type, ontology, p, nSignificant, display_pAdj) {
424 421
   
425 422
   if (type == "byCommunity") {
... ...
@@ -433,7 +430,7 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"),
433 430
                                           lapply(function(x) {x[[ontology]]$results}) %>%
434 431
                                           dplyr::bind_rows(.id = idMerge) %>%
435 432
                                           dplyr::select(-tidyselect::starts_with("topG")) %>%
436
-                                          dplyr::mutate(pval = as.numeric(pval)) %>%
433
+                                          dplyr::mutate(pval = as.numeric(.data$pval)) %>%
437 434
                                           tibble::as_tibble())
438 435
   
439 436
   # p-adjust only available for non-GO ontologies
... ...
@@ -454,18 +451,18 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"),
454 451
   
455 452
   # get enriched terms from general enrichment and make sure it is kept for the community enrichment
456 453
   enrichedTermsGeneral = enrichmentGeneral %>% 
457
-    dplyr::filter(pval <= p, Found >= nSignificant) %>% 
458
-    dplyr::pull(ID)
454
+    dplyr::filter(.data$pval <= p, .data$Found >= nSignificant) %>% 
455
+    dplyr::pull(.data$ID)
459 456
   
460 457
   enrichedTermsGrouped = resultsCombined.df %>% 
461
-    dplyr::filter(pval <= p, Found >= nSignificant) %>% 
462
-    dplyr::pull(ID)
458
+    dplyr::filter(.data$pval <= p, .data$Found >= nSignificant) %>% 
459
+    dplyr::pull(.data$ID)
463 460
   
464 461
   all.df = resultsCombined.df %>%
465 462
     rbind(enrichmentGeneral) %>%
466
-    dplyr::mutate(ID = as.factor(ID),
467
-                  pval =as.numeric(gsub(">|<", "", pval))) %>%
468
-    dplyr::filter(pval <= p & (Found >= nSignificant | ID %in% c(enrichedTermsGeneral, enrichedTermsGrouped)))
463
+    dplyr::mutate(ID = as.factor(.data$ID),
464
+                  pval =as.numeric(gsub(">|<", "", .data$pval))) %>%
465
+    dplyr::filter(.data$pval <= p & (.data$Found >= nSignificant | .data$ID %in% c(enrichedTermsGeneral, enrichedTermsGrouped)))
469 466
   
470 467
   all.df[, idMerge] = as.factor(all.df[, idMerge, drop = TRUE])
471 468
   
... ...
@@ -493,7 +490,7 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"),
493 490
 }
494 491
 
495 492
 #' @importFrom biomaRt useEnsembl getBM
496
-.runEnrichment <- function(foreground, background, backgroundStr, database = "GO", ontology, 
493
+.runEnrichment <- function(GRN, foreground, background, backgroundStr, database = "GO", ontology, 
497 494
                            description = "Enrichment Analysis",
498 495
                            algorithm="weight01", statistic = "fisher", mapping, pAdjustMethod = "BH", minGSSize = 0, maxGSSize = 5000){
499 496
   
... ...
@@ -591,8 +588,8 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"),
591 588
     # Dont trim GO terms here, happens later when plotting
592 589
     result.tbl = unique(topGO::GenTable(go_enrichment, pval = result, orderBy = "pval", numChar = 1000, 
593 590
                                         topNodes = length(topGO::score(result))) ) %>%
594
-      dplyr::rename(ID = GO.ID, Found = Significant)  %>%      # make it more clear what Significant refers to here
595
-      dplyr::mutate(GeneRatio = Found / nForeground)
591
+      dplyr::rename(ID = .data$GO.ID, Found = .data$Significant)  %>%      # make it more clear what Significant refers to here
592
+      dplyr::mutate(GeneRatio = .data$Found / nForeground)
596 593
     
597 594
     
598 595
     result.list[["results"]] = result.tbl
... ...
@@ -720,7 +717,7 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"),
720 717
   if (!is.null(enrichmentObj)) {
721 718
     
722 719
     result.tbl = enrichmentObj@result %>%
723
-      dplyr::rename(Term = Description, pval = .data$pvalue, Found = Count) %>%
720
+      dplyr::rename(Term = .data$Description, pval = .data$pvalue, Found = .data$Count) %>%
724 721
       dplyr::mutate(GeneRatio = sapply(parse (text = enrichmentObj@result$GeneRatio), eval))
725 722
     
726 723
   } else {
... ...
@@ -867,7 +864,8 @@ calculateCommunitiesStats <- function(GRN, clustering = "louvain", forceRerun =
867 864
 # #' @importFrom topGO whichAlgorithms whichTests
868 865
 calculateCommunitiesEnrichment <- function(GRN, 
869 866
                                            ontology = c("GO_BP", "GO_MF"), algorithm = "weight01", 
870
-                                           statistic = "fisher", background = "neighborhood", 
867
+                                           statistic = "fisher", 
868
+                                           background = "neighborhood", background_geneTypes = "all",
871 869
                                            selection = "byRank", communities = seq_len(10),
872 870
                                            pAdjustMethod = "BH",
873 871
                                            forceRerun = FALSE) {
... ...
@@ -881,7 +879,8 @@ calculateCommunitiesEnrichment <- function(GRN,
881 879
  
882 880
   .checkPackage_topGO_and_arguments(ontology, algorithm, statistic)
883 881
   
884
-  checkmate::assertChoice(background, c("all_annotated", "all_RNA", "neighborhood"))
882
+  checkmate::assertChoice(background, c("all_annotated", "all_RNA", "all_RNA_filtered", "neighborhood"))
883
+  checkmate::assertSubset(background_geneTypes, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE)
885 884
   checkmate::assertChoice(selection, c("byRank", "byLabel"))
886 885
   checkmate::assertChoice(pAdjustMethod, c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"))
887 886
   checkmate::assertFlag(forceRerun)
... ...
@@ -914,7 +913,7 @@ calculateCommunitiesEnrichment <- function(GRN,
914 913
   
915 914
   
916 915
   mapping = .getGenomeObject(GRN@config$parameters$genomeAssembly, type = "packageName")
917
-  backgroundGenes = .getBackgroundGenes(GRN, type = background)
916
+  backgroundGenes = .getBackgroundGenes(GRN, type = background, gene.types = background_geneTypes)
918 917
   
919 918
   if (is.null(GRN@stats[["Enrichment"]][["byCommunity"]])) {
920 919
     GRN@stats[["Enrichment"]][["byCommunity"]] = list()
... ...
@@ -930,15 +929,16 @@ calculateCommunitiesEnrichment <- function(GRN,
930 929
     
931 930
     
932 931
     foregroundCur = vertexMetadata %>%
933
-      dplyr::filter(community == communityCur) %>%
934
-      dplyr::pull(name)
932
+      dplyr::filter(.data$community == communityCur) %>%
933
+      dplyr::pull(.data$name)
935 934
     
936 935
     for (ontologyCur in ontology) {
937 936
       
938 937
       if (is.null(GRN@stats[["Enrichment"]][["byCommunity"]][[communityCur]][[ontologyCur]]) | forceRerun) {
939 938
         
940 939
         GRN@stats[["Enrichment"]][["byCommunity"]][[communityCur]][[ontologyCur]] = 
941
-          .runEnrichment(foreground = foregroundCur, 
940
+          .runEnrichment(GRN,
941
+                         foreground = foregroundCur, 
942 942
                          background = backgroundGenes, 
943 943
                          backgroundStr = background,
944 944
                          ontology = ontologyCur, 
... ...
@@ -967,7 +967,7 @@ calculateCommunitiesEnrichment <- function(GRN,
967 967
   
968 968
   df = igraph::vertex.attributes(GRN@graph[[graph]]$graph) %>%
969 969
     as.data.frame() %>% 
970
-    dplyr::count(community)
970
+    dplyr::count(.data$community)
971 971
   
972 972
   
973 973
   if (is.null(communities)) {
... ...
@@ -976,7 +976,7 @@ calculateCommunitiesEnrichment <- function(GRN,
976 976
   selCommunities = df %>% 
977 977
     dplyr::arrange(dplyr::desc(.data$n)) %>%
978 978
     dplyr::slice(communities) %>%
979
-    dplyr::pull(community) %>%
979
+    dplyr::pull(.data$community) %>%
980 980
     as.character()
981 981
   
982 982
   if (length(selCommunities) == 0) {
... ...
@@ -1049,22 +1049,22 @@ getTopNodes <- function(GRN, nodeType, rankType, n = 0.1, use_TF_gene_network =
1049 1049
   if (rankType == "degree"){
1050 1050
     col = dplyr::if_else(nodeType == "gene", "V2", "V1")
1051 1051
     topNodes = graph.df %>%
1052
-      dplyr::filter(connectionType == link) %>%
1052
+      dplyr::filter(.data$connectionType == link) %>%
1053 1053
       dplyr::count(!!as.name(col), sort = TRUE) %>%
1054 1054
       # dplyr::rename(!!slot := V1, Connections = n) %>%
1055 1055
       dplyr::rename(Connections = n) %>%
1056
-      dplyr::arrange(dplyr::desc(Connections)) %>%
1056
+      dplyr::arrange(dplyr::desc(.data$Connections)) %>%
1057 1057
       dplyr::slice(seq_len(top.n)) 
1058 1058
     
1059 1059
     # TODO: change column names
1060 1060
     if (nodeType == "gene") {
1061 1061
       topNodes = topNodes  %>%
1062
-        dplyr::left_join(graph.df %>% dplyr::select(V2, V2_name) %>% dplyr::distinct(), by = "V2") %>%
1063
-        dplyr::rename(gene.ENSEMBL = V2, gene.name = V2_name)
1062
+        dplyr::left_join(graph.df %>% dplyr::select(.data$V2, .data$V2_name) %>% dplyr::distinct(), by = "V2") %>%
1063
+        dplyr::rename(gene.ENSEMBL = .data$V2, gene.name = .data$V2_name)
1064 1064
     } else {
1065 1065
       topNodes = topNodes  %>%
1066
-        dplyr::left_join(graph.df %>% dplyr::select(V1, V1_name) %>% dplyr::distinct(), by = "V1") %>%
1067
-        dplyr::rename(TF.ENSEMBL = V1, TF.name = V1_name)
1066
+        dplyr::left_join(graph.df %>% dplyr::select(.data$V1, .data$V1_name) %>% dplyr::distinct(), by = "V1") %>%
1067
+        dplyr::rename(TF.ENSEMBL = .data$V1, TF.name = .data$V1_name)
1068 1068
     }
1069 1069
     
1070 1070
     
... ...
@@ -1076,7 +1076,7 @@ getTopNodes <- function(GRN, nodeType, rankType, n = 0.1, use_TF_gene_network =
1076 1076
   
1077 1077
   # Remove unnecessary extra column
1078 1078
   if ("name_plot" %in% colnames(topNodes)) {
1079
-      topNodes = dplyr::select(topNodes, -name_plot)
1079
+      topNodes = dplyr::select(topNodes, -.data$name_plot)
1080 1080
   }
1081 1081
 
1082 1082
   .printExecutionTime(start)
... ...
@@ -1108,7 +1108,8 @@ getTopNodes <- function(GRN, nodeType, rankType, n = 0.1, use_TF_gene_network =
1108 1108
 # #' @importFrom topGO whichAlgorithms whichTests
1109 1109
 calculateTFEnrichment <- function(GRN, rankType = "degree", n = 3, TF.names = NULL,
1110 1110
                                   ontology = c("GO_BP", "GO_MF"), algorithm = "weight01", 
1111
-                                  statistic = "fisher", background = "neighborhood",
1111
+                                  statistic = "fisher", 
1112
+                                  background = "neighborhood", background_geneTypes = "all",
1112 1113
                                   pAdjustMethod = "BH",
1113 1114
                                   forceRerun = FALSE){
1114 1115
   
... ...
@@ -1122,7 +1123,8 @@ calculateTFEnrichment <- function(GRN, rankType = "degree", n = 3, TF.names = NU
1122 1123
   
1123 1124
   .checkPackage_topGO_and_arguments(ontology, algorithm, statistic)
1124 1125
 
1125
-  checkmate::assertChoice(background, c("all_annotated", "all_RNA", "neighborhood"))
1126
+  checkmate::assertChoice(background, c("all_annotated", "all_RNA", "all_RNA_filtered", "neighborhood"))
1127
+  checkmate::assertSubset(background_geneTypes, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE)
1126 1128
   checkmate::assertChoice(pAdjustMethod, c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"))
1127 1129
   checkmate::assertFlag(forceRerun)
1128 1130
   
... ...
@@ -1138,7 +1140,7 @@ calculateTFEnrichment <- function(GRN, rankType = "degree", n = 3, TF.names = NU
1138 1140
   } else{
1139 1141
     
1140 1142
     # TF.name is always there, irrespective of whether ENSEMBL ID or TF name is used as primary ID type
1141
-    TFset = getTopNodes(GRN, nodeType = "TF", rankType = rankType, n, use_TF_gene_network = TRUE) %>% dplyr::pull(TF.name)
1143
+    TFset = getTopNodes(GRN, nodeType = "TF", rankType = rankType, n, use_TF_gene_network = TRUE) %>% dplyr::pull(.data$TF.name)
1142 1144
   }
1143 1145
   
1144 1146
   # TODO: Continue working on the TF.name level or switch to Ensembl? Should be in concordance with the graph!
... ...
@@ -1165,12 +1167,12 @@ calculateTFEnrichment <- function(GRN, rankType = "degree", n = 3, TF.names = NU
1165 1167
     
1166 1168
     # get the genes associated with current top TF
1167 1169
     curGenes = GRN@connections$all.filtered$`0` %>% 
1168
-      dplyr::filter(TF.name == TF) %>% 
1169
-      dplyr::pull(gene.ENSEMBL) %>%
1170
+      dplyr::filter(.data$TF.name == TF) %>% 
1171
+      dplyr::pull(.data$gene.ENSEMBL) %>%
1170 1172
       unique()
1171 1173
     
1172 1174
     
1173
-    backgroundGenes = .getBackgroundGenes(GRN, type = background)
1175
+    backgroundGenes = .getBackgroundGenes(GRN, type = background, gene.types = background_geneTypes)
1174 1176
     
1175 1177
     for (ontologyCur in ontology) {
1176 1178
       
... ...
@@ -1184,7 +1186,8 @@ calculateTFEnrichment <- function(GRN, rankType = "degree", n = 3, TF.names = NU
1184 1186
         
1185 1187
         
1186 1188
         GRN@stats[["Enrichment"]][["byTF"]][[TF]][[ontologyCur]] =  
1187
-          .runEnrichment(foreground = curGenes,
1189
+          .runEnrichment(GRN,
1190
+                         foreground = curGenes,
1188 1191
                          background = backgroundGenes, 
1189 1192
                          backgroundStr = background,
1190 1193
                          ontology = ontologyCur, 
... ...
@@ -1,3 +1,6 @@
1
+# TODO: tidyselect:::where has only been properly exported in August 2022, see here: https://github.com/r-lib/tidyselect/pull/274
2
+# As a workaround, as recommended, we use this here for now:
3
+utils::globalVariables("where")
1 4
 
2 5
 ############### PCA ###############
3 6
 
... ...
@@ -18,8 +21,8 @@
18 21
 #' @return An updated \code{\linkS4class{GRN}} object. 
19 22
 #' @examples 
20 23
 #' # See the Workflow vignette on the GRaNIE website for examples
21
-#' # GRN = loadExampleObject()
22
-#' # GRN = plotPCA_all(GRN, topn = 500, type = "rna", plotAsPDF = FALSE)
24
+#' GRN = loadExampleObject()
25
+#' GRN = plotPCA_all(GRN, topn = 500, data = "rna", type = "raw", plotAsPDF = FALSE, pages = 1)
23 26
 #' @export
24 27
 plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL, 
25 28
                         data = c("rna", "peaks"), topn = c(500,1000,5000), type = c("raw", "normalized"), 
... ...
@@ -178,12 +181,12 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
178 181
   } else if (transformation == "log2") {
179 182
     checkmate::assertMatrix(counts)
180 183
     counts.transf = log2(counts + 1)
181
-    metadata = GRN@data$metadata %>% dplyr::filter(sampleID %in% colnames(counts.transf)) %>% tibble::column_to_rownames("sampleID") %>% as.data.frame()
184
+    metadata = GRN@data$metadata %>% dplyr::filter(.data$sampleID %in% colnames(counts.transf)) %>% tibble::column_to_rownames("sampleID") %>% as.data.frame()
182 185
     
183 186
   } else {
184 187
     checkmate::assertMatrix(counts)
185 188
     counts.transf = counts
186
-    metadata = GRN@data$metadata %>% dplyr::filter(sampleID %in% colnames(counts.transf)) %>% tibble::column_to_rownames("sampleID") %>% as.data.frame()
189
+    metadata = GRN@data$metadata %>% dplyr::filter(.data$sampleID %in% colnames(counts.transf)) %>% tibble::column_to_rownames("sampleID") %>% as.data.frame()
187 190
   }
188 191
   
189 192
   # Override scaling when already vst-transformed
... ...
@@ -232,18 +235,18 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
232 235
     percentVar <- pca$sdev^2 / sum( pca$sdev^2 )
233 236
     
234 237
     screeplot.df = tibble::tibble(PCs = factor(paste0("PC ", seq_len(nPCS)), levels = paste0("PC ", seq_len(nPCS))), variation = percentVar[seq_len(nPCS)] * 100) %>%
235
-      dplyr::mutate(variation_sum = cumsum(variation),
238
+      dplyr::mutate(variation_sum = cumsum(.data$variation),
236 239
                     pos = seq_len(nPCS))
237 240
     
238 241
     # 1. Scree plot
239 242
     if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
240 243
       
241
-      g = ggplot(screeplot.df, aes(PCs, variation)) + geom_bar(stat = "identity") + 
242
-        xlab("Principal component (PC)") + ylab("Explained variation / Cumultative sum (in %)") + 
243
-        geom_point(data = screeplot.df, aes(pos, variation_sum), color = "red") + 
244
-        geom_line(data = screeplot.df, aes(pos, variation_sum), color = "red") +
245
-        ggtitle(paste0("Screeplot for top ", topn, " variable features")) + 
246
-        theme_bw()
244
+      g = ggplot2::ggplot(screeplot.df, ggplot2::aes(.data$PCs, .data$variation)) + ggplot2::geom_bar(stat = "identity") + 
245
+        ggplot2::xlab("Principal component (PC)") + ggplot2::ylab("Explained variation / Cumultative sum (in %)") + 
246
+        ggplot2::geom_point(data = screeplot.df, ggplot2::aes(.data$pos, .data$variation_sum), color = "red") + 
247
+        ggplot2::geom_line(data = screeplot.df, ggplot2::aes(.data$pos, .data$variation_sum), color = "red") +
248
+        ggplot2::ggtitle(paste0("Screeplot for top ", topn, " variable features")) + 
249
+        ggplot2::theme_bw()
247 250
       
248 251
       plot(g)
249 252
     }
... ...
@@ -290,24 +293,24 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
290 293
             dplyr::mutate_if(is.logical, as.factor)
291 294
           
292 295
           
293
-          g = g = ggplot(data=d, aes(x=PC1, y=PC2, color=group)) + geom_point(size=3) + 
294
-            xlab(paste0("PC1: ",round(percentVar[1] * 100, 1),"% variance")) +
295
-            ylab(paste0("PC2: ",round(percentVar[2] * 100, 1),"% variance")) +
296
-            ggtitle(plotTitle) + coord_fixed()
296
+          g = g = ggplot2::ggplot(data=d, ggplot2::aes(x=.data$PC1, y=.data$PC2, color=group)) + ggplot2::geom_point(size=3) + 
297
+            ggplot2::xlab(paste0("PC1: ",round(percentVar[1] * 100, 1),"% variance")) +
298
+            ggplot2::ylab(paste0("PC2: ",round(percentVar[2] * 100, 1),"% variance")) +
299
+            ggplot2::ggtitle(plotTitle) + ggplot2::coord_fixed()
297 300
           
298 301
           
299 302
           
300 303
           if (is.factor(d$group)) {
301
-            g = g + scale_color_viridis_d()
304
+            g = g + ggplot2::scale_color_viridis_d()
302 305
             
303 306
           } else {
304 307
             
305 308
             is.date <- function(x) inherits(x, 'Date')
306 309
             
307 310
             if (!is.date(d$group)) {
308
-              g = g + scale_color_viridis_c()
311
+              g = g + ggplot2::scale_color_viridis_c()
309 312
             } else {
310
-              g = g + scale_color_viridis_c(trans = "date")
313
+              g = g + ggplot2::scale_color_viridis_c(trans = "date")
311 314
             }
312 315
             
313 316
             
... ...
@@ -315,7 +318,7 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
315 318
           
316 319
           
317 320
           if (skipLegend) {
318
-            g = g + theme(legend.position = "none")
321
+            g = g + ggplot2::theme(legend.position = "none")
319 322
           } 
320 323
           
321 324
           plot(g)
... ...
@@ -346,15 +349,15 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
346 349
     data = log2(data+1)
347 350
   } 
348 351
   
349
-  data.df = reshape2::melt(tibble::as_tibble(data), measure.vars = colnames(data)) %>% dplyr::rename(sample = variable)
350
-  g = ggplot(data.df, aes(value, color = sample)) + geom_density() + theme_bw()
352
+  data.df = reshape2::melt(tibble::as_tibble(data), measure.vars = colnames(data)) %>% dplyr::rename(sample = .data$variable)
353
+  g = ggplot2::ggplot(data.df, ggplot2::aes(.data$value, color = .data$sample)) + ggplot2::geom_density() + ggplot2::theme_bw()
351 354
   
352 355
   title = paste0("Density of values per sample for all features")
353 356
   if (! legend) {
354
-    g = g + theme(legend.position = "none")
357
+    g = g + ggplot2::theme(legend.position = "none")
355 358
     title = paste0(title, " (Legend omitted, too many samples)")
356 359
   }
357
-  g = g + ggtitle(title)
360
+  g = g + ggplot2::ggtitle(title)
358 361
   
359 362
   plot(g)
360 363
   
... ...
@@ -498,7 +501,7 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
498 501
 #' @examples 
499 502
 #' # See the Workflow vignette on the GRaNIE website for examples
500 503
 #' GRN = loadExampleObject()
501
-#' GRN = plotDiagnosticPlots_TFPeaks(GRN, outputFolder = ".", dataType = "real", nTFMax = 2)
504
+#' GRN = plotDiagnosticPlots_TFPeaks(GRN, outputFolder = ".", dataType = "real", nTFMax = 2, pages = 1)
502 505
 #' @export
503 506
 plotDiagnosticPlots_TFPeaks <- function(GRN, 
504 507
                                         outputFolder = NULL, 
... ...
@@ -583,10 +586,10 @@ plotDiagnosticPlots_TFPeaks <- function(GRN,
583 586
 .generateTF_GC_diagnosticPlots <- function(TFCur, GC_classes_foreground.df, GC_classes_background.df, GC_classes_all.df, peaksForeground, peaksBackground, peaksBackgroundGC) {
584 587
   
585 588
   GC_classes_background_GC.df = peaksBackgroundGC %>%
586
-    dplyr::group_by(GC_class) %>%
587
-    dplyr::summarise(n=  dplyr::n(), peak_width_mean = mean(peak_width), peak_width_sd = sd(peak_width)) %>%
589
+    dplyr::group_by(.data$GC_class) %>%
590
+    dplyr::summarise(n=  dplyr::n(), peak_width_mean = mean(.data$peak_width), peak_width_sd = sd(.data$peak_width)) %>%
588 591
     dplyr::ungroup() %>% 
589
-    tidyr::complete(GC_class, fill = list(n = 0)) %>%
592
+    tidyr::complete(.data$GC_class, fill = list(n = 0)) %>%
590 593
     dplyr::mutate(n_rel = .data$n / nrow(peaksBackgroundGC), type = "background_GC")
591 594
   
592 595
   # TODO
... ...
@@ -594,7 +597,7 @@ plotDiagnosticPlots_TFPeaks <- function(GRN,
594 597
     dplyr::mutate(type = forcats::fct_relevel(.data$type, "foreground", "background_GC", "background_orig")) %>%
595 598
     dplyr::left_join(GC_classes_all.df, by = "GC_class") %>%
596 599
     dplyr::rowwise() %>%
597
-    dplyr::mutate(n.bg.needed.relFreq = dplyr::if_else(round(n.bg.needed.perc * 100, 0) > 100, "", paste0(as.character(round(n.bg.needed.perc * 100, 0)),"%")))
600
+    dplyr::mutate(n.bg.needed.relFreq = dplyr::if_else(round(.data$n.bg.needed.perc * 100, 0) > 100, "", paste0(as.character(round(.data$n.bg.needed.perc * 100, 0)),"%")))
598 601
   
599 602
   # Current hack: Put those numbers where the no. of peaks in the GC background < the required one (i.e., where resampling occured)
600 603
   GC_classes_all2.df$n.bg.needed.relFreq[GC_classes_all2.df$type != "background_GC" | GC_classes_all2.df$n == 0] = ""
... ...
@@ -606,28 +609,28 @@ plotDiagnosticPlots_TFPeaks <- function(GRN,
606 609
   labelVec = c("background_orig" = label_background_orig, "background_GC" = label_background_GC, "foreground" = label_foreground)
607 610
   colorVec = c("background_orig" = "lightgray", "background_GC" = "darkgray", "foreground" = "black")
608 611
   
609
-  g1 = ggplot(GC_classes_all2.df , aes(GC_class, n_rel, group = .data$type, fill = .data$type, label = n.bg.needed.relFreq)) + 
610
-    geom_bar(stat = "identity", position = position_dodge(width = 0.5), width = 0.5) + 
611
-    geom_text(vjust=-0.5, size = 3)  + 
612
-    ylim(0,0.75) + 
613
-    #geom_line(aes(color= type), size = 2) +
614
-    scale_fill_manual(name = "Peakset origin", labels  = labelVec, values = colorVec) + 
615
-    #scale_color_manual(name = "Signal", labels = labelVec, values = colorVec) + 
616
-    theme_bw() + ggtitle(paste0(TFCur, ": Before and after GC-adjustment")) + 
617
-    xlab("GC class from peaks") +
618
-    ylab("Relative frequencies") +
619
-    theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10)) 
620
-  
621