Browse code

Major changes related to Vignette and package size reduction, part 1

Christian Arnold authored on 25/03/2022 20:47:10
Showing22 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: GRaNIE
2 2
 Title: GRaNIE: Reconstruction cell type specific gene regulatory networks including enhancers using chromatin accessibility and RNA-seq data
3
-Version: 0.99.3
3
+Version: 0.99.4
4 4
 Encoding: UTF-8
5 5
 Authors@R: c(person("Christian", "Arnold", email =
6 6
         "chrarnold@web.de", role = c("cre","aut")),
... ...
@@ -370,23 +370,51 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
370 370
 }
371 371
 
372 372
 
373
-.loadPrecompiledAnnotationData <- function(genomeAssembly) {
374
-  
375
-  futile.logger::flog.info(paste0("Reading pre-compiled genome annotation data "))
376
-  
377
-  if (is.null(geneAnnotation[[genomeAssembly]])) {
373
+.retrieveAnnotationData <- function(genomeAssembly) {
378 374
     
379
-    message = "The genome version you specified is not contained in the pre-compiled genome annotation list. Currently, only hg19, hg38 and mm10 are supported. Contact us."
380
-    .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
375
+    futile.logger::flog.info(paste0("Retrieving genome annotation data from biomaRt... This may take a while"))
376
+    
377
+    params.l = .getBiomartParameters(genomeAssembly)
378
+    
379
+    columnsToRetrieve = c("chromosome_name", "start_position", "end_position",
380
+                          "strand", "ensembl_gene_id", "gene_biotype", "external_gene_name")
381
+    
382
+    geneAnnotation <- NULL
383
+    attempt <- 0
384
+    mirrors = c('www', 'uswest', 'useast', 'asia')
385
+    while(!is.data.frame(geneAnnotation) && attempt <= 3 ) {
386
+        attempt <- attempt + 1
387
+        geneAnnotation = tryCatch({ 
388
+            ensembl = biomaRt::useEnsembl(biomart = "genes", host = params.l[["host"]], dataset = params.l[["dataset"]], mirror = mirrors[attempt])
389
+            biomaRt::getBM(attributes = columnsToRetrieve, mart = ensembl)
390
+            
391
+        }
392
+        )
393
+    } 
394
+    
395
+    
396
+    if (!is.data.frame(geneAnnotation)) {
397
+        
398
+        error_Biomart = "A temporary error occured with biomaRt::getBM or biomaRt::useEnsembl. This is often caused by an unresponsive Ensembl site. Try again at a later time. Note that this error is not caused by GRaNIE but external services."
399
+        .checkAndLogWarningsAndErrors(NULL, error_Biomart, isWarning = FALSE)
400
+        return(NULL)
401
+        
402
+    }
403
+    
404
+    geneAnnotation %>%
405
+        as_tibble() %>%
406
+        dplyr::filter(stringr::str_length(chromosome_name) <= 5) %>%
407
+        dplyr::mutate(chromosome_name = paste0("chr", chromosome_name)) %>%
408
+        dplyr::rename(gene.chr = chromosome_name, gene.start = start_position, gene.end = end_position, 
409
+                      gene.strand = strand, gene.ENSEMBL = ensembl_gene_id, gene.type = gene_biotype, gene.name = external_gene_name) %>%
410
+        dplyr::mutate_if(is.character, as.factor) %>%
411
+        dplyr::mutate(gene.strand = factor(gene.strand, levels = c(1,-1), labels = c("+", "-")))
381 412
     
382
-  } 
383
-  
384
-  geneAnnotation[[genomeAssembly]]
385 413
 }
386 414
 
387 415
 .getAllGeneTypesAndFrequencies <- function(genomeAssembly, verbose = TRUE) {
388 416
   
389
-  geneAnnotation.df = .loadPrecompiledAnnotationData(genomeAssembly)
417
+  geneAnnotation.df = .retrieveAnnotationData(genomeAssembly)
390 418
   return(table(geneAnnotation.df$gene.type))
391 419
   
392 420
 }
... ...
@@ -399,7 +427,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
399 427
     stop("Not yet implemented")
400 428
   }
401 429
   
402
-  genes.filt.df = .loadPrecompiledAnnotationData(GRN@config$parameters$genomeAssembly)
430
+  genes.filt.df = .retrieveAnnotationData(GRN@config$parameters$genomeAssembly)
403 431
   
404 432
   if (!is.null(gene.types)) {
405 433
     if (! "all" %in% gene.types) {
... ...
@@ -511,9 +539,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
511 539
   rowMedians_rna = matrixStats::rowMedians(countsRNA.m)
512 540
   CV_rna = matrixStats::rowSds(countsRNA.m) /  rowMeans_rna
513 541
   
514
-  # This object is internally loaded and lives inside the R directory within sysdata.rda
515
-  # load("R/sysdata.rda")
516
-  genomeAnnotation.df = geneAnnotation[[GRN@config$parameters$genomeAssembly]]
542
+  genomeAnnotation.df = .retrieveAnnotationData(GRN@config$parameters$genomeAssembly)
517 543
   
518 544
   metadata_rna = tibble::tibble(gene.ENSEMBL = countsRNA.clean$ENSEMBL, 
519 545
                                 gene.mean = rowMeans_rna, 
... ...
@@ -2431,11 +2457,11 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2431 2457
     end(subject.gr) = start(subject.gr)
2432 2458
   } 
2433 2459
   
2434
-  overlapsAll = GenomicRanges::findOverlaps(query, subject.gr, 
2460
+  overlapsAll = suppressWarnings(GenomicRanges::findOverlaps(query, subject.gr, 
2435 2461
                                             minoverlap=1,
2436 2462
                                             type="any",
2437 2463
                                             select="all",
2438
-                                            ignore.strand=TRUE)
2464
+                                            ignore.strand=TRUE))
2439 2465
   
2440 2466
   query_row_ids  = S4Vectors::queryHits(overlapsAll)
2441 2467
   subject_rowids = S4Vectors::subjectHits(overlapsAll)
... ...
@@ -2596,7 +2622,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = "
2596 2622
   # if a peak overlaps with a gene, should the same gene be reported as the connection?
2597 2623
   
2598 2624
   # OVERLAP OF PEAKS AND EXTENDED GENES
2599
-  overlaps.sub.df = .calculatePeakGeneOverlaps(GRN, consensusPeaks, peak.TADs.df, neighborhoodSize = neighborhoodSize, 
2625
+  overlaps.sub.df = .calculatePeakGeneOverlaps(GRN, allPeaks = consensusPeaks, peak.TADs.df, neighborhoodSize = neighborhoodSize, 
2600 2626
                                                genomeAssembly = genomeAssembly, gene.types = gene.types, overlapTypeGene = overlapTypeGene) 
2601 2627
   
2602 2628
   
... ...
@@ -4388,3 +4414,110 @@ changeOutputDirectory <- function(GRN, outputDirectory = ".") {
4388 4414
   
4389 4415
 }
4390 4416
 
4417
+
4418
+.createTables_peakGeneQC <- function(peakGeneCorrelations.all.cur, networkType_details, colors_vec, range) {
4419
+    
4420
+    d = peakGeneCorrelations.all.cur %>% 
4421
+        dplyr::group_by(r_positive, class, peak_gene.p.raw.class) %>%
4422
+        dplyr::summarise(n = dplyr::n()) %>%
4423
+        dplyr::ungroup()
4424
+    
4425
+    # Some classes might be missing, add them here with explicit zeros
4426
+    for (r_pos in c(TRUE, FALSE)) {
4427
+        for (classCur in networkType_details){
4428
+            for (pclassCur in levels(peakGeneCorrelations.all.cur$peak_gene.p.raw.class)) {
4429
+                
4430
+                row = which(d$r_positive == r_pos & d$class == classCur & as.character(d$peak_gene.p.raw.class) == as.character(pclassCur))
4431
+                if (length(row) == 0) {
4432
+                    d = tibble::add_row(d, r_positive = r_pos, class = classCur, peak_gene.p.raw.class = pclassCur, n = 0)
4433
+                }
4434
+            }
4435
+        }
4436
+    }
4437
+    
4438
+    # Restore the "ordered" factor for class
4439
+    d$peak_gene.p.raw.class = factor(d$peak_gene.p.raw.class, ordered = TRUE, levels =  levels(peakGeneCorrelations.all.cur$peak_gene.p.raw.class))
4440
+    
4441
+    
4442
+    # Normalization factors
4443
+    dsum = d %>%
4444
+        dplyr::group_by(r_positive, class) %>%
4445
+        dplyr::summarise(sum_n = sum(.data$n))
4446
+    
4447
+    
4448
+    # Summarize per bin
4449
+    d2 = d %>%
4450
+        dplyr::group_by(class, peak_gene.p.raw.class)%>%
4451
+        dplyr::summarise(sum_pos = .data$n[r_positive],
4452
+                         sum_neg = .data$n[!r_positive]) %>%
4453
+        dplyr::mutate(ratio_pos_raw = sum_pos / sum_neg,
4454
+                      enrichment_pos = sum_pos / (sum_pos + sum_neg),
4455
+                      ratio_neg = 1 - enrichment_pos) %>%
4456
+        dplyr::ungroup()
4457
+    
4458
+    # Compare between real and permuted
4459
+    normFactor_real = dplyr::filter(dsum, class ==  !! (networkType_details[1])) %>%  dplyr::pull(sum_n) %>% sum() /
4460
+        dplyr::filter(dsum, class ==  !! (networkType_details[2])) %>%  dplyr::pull(sum_n) %>% sum()
4461
+    
4462
+    # ratio_norm not used currently, no normalization necessary here or not even useful because we dont want to normalize the r_pos and r_neg ratios: These are signal in a way. Only when comparing between real and permuted, we have to account for sample size for corrections
4463
+    d3 = d %>%
4464
+        dplyr::group_by(peak_gene.p.raw.class, r_positive) %>%
4465
+        dplyr::summarise(n_real     = .data$n[class == !! (names(colors_vec)[1]) ],
4466
+                         n_permuted = .data$n[class == !! (names(colors_vec)[2]) ]) %>%
4467
+        dplyr::ungroup() %>%
4468
+        dplyr::mutate(ratio_real_raw = n_real / n_permuted,
4469
+                      ratio_real_norm = ratio_real_raw / normFactor_real,
4470
+                      enrichment_real      = n_real / (n_real + n_permuted),
4471
+                      enrichment_real_norm = (n_real / normFactor_real) / ((n_real / normFactor_real) + n_permuted)) 
4472
+    
4473
+    
4474
+    stopifnot(identical(levels(d2$peak_gene.p.raw.class), levels(d3$peak_gene.p.raw.class)))
4475
+    # 2 enrichment bar plots but combined using facet_wrap
4476
+    d2$set = "r+ / r-"; d3$set = "real / permuted" 
4477
+    d_merged <- tibble::tibble(peak_gene.p.raw.class = c(as.character(d2$peak_gene.p.raw.class), 
4478
+                                                         as.character(d3$peak_gene.p.raw.class)),
4479
+                               ratio = c(d2$ratio_pos_raw, d3$ratio_real_norm),
4480
+                               classAll = c(as.character(d2$class), d3$r_positive),
4481
+                               set = c(d2$set, d3$set)) %>%
4482
+        dplyr::mutate(classAll = factor(classAll, levels=c(paste0("real_",range), paste0("random_",range), "TRUE", "FALSE")),
4483
+                      peak_gene.p.raw.class = factor(peak_gene.p.raw.class, levels = levels(d2$peak_gene.p.raw.class)))
4484
+    
4485
+    d4 = tibble::tibble(peak_gene.p.raw.class = unique(d$peak_gene.p.raw.class), 
4486
+                        n_rpos_real = NA_integer_, n_rpos_random = NA_integer_,
4487
+                        n_rneg_real = NA_integer_, n_rneg_random = NA_integer_,
4488
+                        ratio_permuted_real_rpos_norm = NA_real_,
4489
+                        ratio_permuted_real_rneg_norm = NA_real_)
4490
+    
4491
+    for (i in seq_len(nrow(d4))) {
4492
+        row_d2 = which(d2$class == networkType_details[1] & d2$peak_gene.p.raw.class == d4$peak_gene.p.raw.class[i])
4493
+        stopifnot(length(row_d2) == 1)
4494
+        d4[i, "n_rpos_real"] = d2[row_d2, "sum_pos"] %>% unlist()
4495
+        d4[i, "n_rneg_real"] = d2[row_d2, "sum_neg"] %>% unlist()
4496
+        row_d2 = which(d2$class == paste0("random_",range) & d2$peak_gene.p.raw.class == d4$peak_gene.p.raw.class[i])
4497
+        d4[i, "n_rpos_random"] = d2[row_d2, "sum_pos"] %>% unlist()
4498
+        d4[i, "n_rneg_random"] = d2[row_d2, "sum_neg"] %>% unlist()
4499
+        
4500
+        row_d3 = which(d3$r_positive == TRUE & d3$peak_gene.p.raw.class == d4$peak_gene.p.raw.class[i])
4501
+        d4[i, "ratio_permuted_real_rpos_norm"] = 1- d3[row_d3, "ratio_real_norm"] %>% unlist()
4502
+        row_d3 = which(d3$r_positive == FALSE & d3$peak_gene.p.raw.class == d4$peak_gene.p.raw.class[i])
4503
+        d4[i, "ratio_permuted_real_rneg_norm"] = 1- d3[row_d3, "ratio_real_norm"] %>% unlist()
4504
+    }
4505
+    
4506
+    d4 = d4 %>%
4507
+        dplyr::mutate(ratio_rneg_rpos_real = n_rneg_real / (n_rneg_real + n_rpos_real),
4508
+                      ratio_rneg_rpos_random = n_rneg_random / (n_rneg_random + n_rpos_random),
4509
+                      peak_gene.p.raw.class.bin = as.numeric(peak_gene.p.raw.class)) %>%
4510
+        dplyr::arrange(peak_gene.p.raw.class.bin)
4511
+    
4512
+    d4_melt = reshape2::melt(d4, id  = c("peak_gene.p.raw.class.bin", "peak_gene.p.raw.class")) %>%
4513
+        dplyr::filter(grepl("ratio", variable))
4514
+    
4515
+    
4516
+    list(d = d, d2 = d2, d3 = d3, d4 = d4, d4_melt = d4_melt, d_merged = d_merged)
4517
+    
4518
+}
4519
+
4520
+.classFreq_label <- function(tbl_freq) {
4521
+    paste0(names(tbl_freq), " (", tbl_freq, ")")
4522
+}
4523
+
... ...
@@ -58,7 +58,7 @@
58 58
     if (!checkmate::testNull(pdfFile)) 
59 59
       grDevices::dev.off()
60 60
     
61
-    futile.logger::flog.info(paste0("Finished writing plots to file ", pdfFile))
61
+    futile.logger::flog.info(paste0(" Finished writing plots"))
62 62
 }
63 63
 
64 64
 .startLogger <- function(logfile, level, removeOldLog = TRUE, appenderName = "consoleAndFile", verbose = TRUE) {
... ...
@@ -719,3 +719,23 @@ is.installed <- function(mypkg){
719 719
     .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
720 720
   }
721 721
 }
722
+
723
+
724
+.getBiomartParameters <- function(genomeAssembly) {
725
+    
726
+    .checkOntologyPackageInstallation("biomaRt")
727
+    
728
+    if (grepl(x = genomeAssembly, pattern = "^hg\\d\\d")){
729
+        dataset = "hsapiens_gene_ensembl"
730
+        if (genomeAssembly == "hg38") {
731
+            host = "https://www.ensembl.org"
732
+        } else if (genomeAssembly == "hg19") {
733
+            host ="https://grch37.ensembl.org"
734
+        }
735
+    } else if (grep(x = genomeAssembly, pattern = "^mm\\d\\d")){
736
+        dataset = "mmusculus_gene_ensembl"
737
+        host = "https://www.ensembl.org"
738
+    }
739
+    
740
+    list(dataset = dataset, host = host)
741
+}
722 742
\ No newline at end of file
... ...
@@ -483,17 +483,13 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"),
483 483
     
484 484
     .checkOntologyPackageInstallation("biomaRt")
485 485
     
486
-    if (grep(x = GRN@config$parameters$genomeAssembly, pattern = "^hg\\d\\d" )){
487
-      dataset = "hsapiens_gene_ensembl"
488
-    } else if (grep(x = GRN@config$parameters$genomeAssembly, pattern = "^mm\\d\\d")){
489
-      dataset = "mmusculus_gene_ensembl"
490
-    }
486
+    params.l = .getBiomartParameters(GRN@config$parameters$genomeAssembly)
491 487
     
492 488
     errorOcured = FALSE
493 489
     
494 490
     ensembl = tryCatch({ 
495
-      biomaRt::useEnsembl(biomart = "genes", dataset = dataset)
496
-      
491
+        biomaRt::useEnsembl(biomart = "genes", host = params.l[["host"]], dataset = params.l[["dataset"]])
492
+        
497 493
     }, error = function(e) {
498 494
       errorOcured = TRUE 
499 495
     }
... ...
@@ -1019,7 +1015,7 @@ getTopNodes <- function(GRN, nodeType, rankType, n = 0.1, use_TF_gene_network =
1019 1015
 #' 
1020 1016
 #' @inheritParams calculateGeneralEnrichment
1021 1017
 #' @param rankType Character. Default \code{"degree"}. One of: \code{"degree"}, \code{"EV"}, \code{"custom"}. This parameter will determine the criterion to be used to identify the "top" TFs. If set to "degree", the function will select top TFs based on the number of connections to genes they have, i.e. based on their degree-centrality. If set to \code{"EV"} it will select the top TFs based on their eigenvector-centrality score in the network. If set to custom, a set of TF names will have to be passed to the "TF.names" parameter.
1022
-#' @param n Numeric. Default 0.1. If this parameter is passed as a value between (0,1), it is treated as a percentage of top nodes. If the value is passed as an integer it will be treated as the number of top nodes. This parameter is not relevant if \code{rankType = "custom"}.
1018
+#' @param n Numeric. Default 3. If this parameter is passed as a value between 0 and 1, it is treated as a percentage of top nodes. If the value is passed as an integer it will be treated as the number of top nodes. This parameter is not relevant if \code{rankType = "custom"}.
1023 1019
 #' @param TF.names Character vector. Default \code{NULL}. If the rank type is set to \code{"custom"}, a vector of TF names for which the GO enrichment should be calculated should be passed to this parameter.
1024 1020
 #' @return The same \code{\linkS4class{GRN}} object, with the enrichment results stored in the \code{stats$Enrichment$byTF} slot.
1025 1021
 #' @examples 
... ...
@@ -1027,7 +1023,7 @@ getTopNodes <- function(GRN, nodeType, rankType, n = 0.1, use_TF_gene_network =
1027 1023
 #' GRN =  loadExampleObject()
1028 1024
 #' GRN =  calculateTFEnrichment(GRN, n = 5, ontology = "GO_BP", forceRerun = FALSE)
1029 1025
 #' @export
1030
-calculateTFEnrichment <- function(GRN, rankType = "degree", n = 0.1, TF.names = NULL,
1026
+calculateTFEnrichment <- function(GRN, rankType = "degree", n = 3, TF.names = NULL,
1031 1027
                                   ontology = c("GO_BP", "GO_MF"), algorithm = "weight01", 
1032 1028
                                   statistic = "fisher", background = "neighborhood",
1033 1029
                                   pAdjustMethod = "BH",
... ...
@@ -6,13 +6,15 @@
6 6
 #'
7 7
 #' @template GRN 
8 8
 #' @template outputFolder
9
-#' @param type Character. Either "peaks" or "rna" or "all". Type of data to plot a PCA for. "peaks" corresponds to the the open chromatin data, while "rna" refers to the RNA-seq counts. If set to "all", PCA will be done for both data modalities. In any case, PCA will be based on the original provided data before any additional normalization has been run (i.e., usually the raw data).
10
-#' @param topn Integer vector. Default c(500,1000,5000). Number of top variable features to do PCA for. Can be a vector of different numbers (see default).
9
+#' @param data Character. Either \code{"peaks"} or \code{"rna"} or \code{"all"}. Type of data to plot a PCA for. \code{"peaks"} corresponds to the the open chromatin data, while \code{"rna"} refers to the RNA-seq counts. If set to \code{"all"}, PCA will be done for both data modalities. In any case, PCA will be based on the original provided data before any additional normalization has been run (i.e., usually the raw data).
10
+#' @param topn Integer vector. Default \code{c(500,1000,5000)}. Number of top variable features to do PCA for. Can be a vector of different numbers (see default).
11
+#' @param type Character. One of or a combination of \code{"raw"}, \code{"normalized"}, \code{"all"}. Default \code{c("raw", "normalized")}. Should the PCA plots be done based on the raw or normalized data, respectively?
11 12
 #' @template forceRerun
12 13
 #' @template basenameOutput
13 14
 #' @template plotAsPDF
14 15
 #' @template pdf_width
15 16
 #' @template pdf_height
17
+#' @template pages
16 18
 #' @return The same \code{\linkS4class{GRN}} object, without modifications. In addition, for each specified \code{type}, a PDF file is produced with a PCA. We refer to the Vignettes for details and further explanations.
17 19
 #' @examples 
18 20
 #' # See the Workflow vignette on the GRaNIE website for examples
... ...
@@ -20,17 +22,19 @@
20 22
 #' # GRN = plotPCA_all(GRN, topn = 500, type = "rna", plotAsPDF = FALSE)
21 23
 #' @export
22 24
 plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL, 
23
-                        type = c("rna", "peaks"), topn = c(500,1000,5000), 
24
-                        plotAsPDF = TRUE, pdf_width = 12, pdf_height = 12,
25
+                        data = c("rna", "peaks"), topn = c(500,1000,5000), type = c("raw", "normalized"), 
26
+                        plotAsPDF = TRUE, pdf_width = 12, pdf_height = 12, pages = NULL,
25 27
                         forceRerun = FALSE) {
26 28
   
27 29
   GRN = .addFunctionLogToObject(GRN)
28 30
   
29 31
   checkmate::assertClass(GRN, "GRN")
30
-  checkmate::assertSubset(type , c("rna", "peaks", "all"))
32
+  checkmate::assertSubset(data , c("rna", "peaks", "all"))
33
+  checkmate::assertSubset(type , c("raw", "normalized", "all"))
31 34
   checkmate::assertFlag(plotAsPDF)
32 35
   checkmate::assertNumeric(pdf_width, lower = 5, upper = 99)
33 36
   checkmate::assertNumeric(pdf_height, lower = 5, upper = 99)
37
+  checkmate::assert(checkmate::checkNull(pages), checkmate::checkIntegerish(pages, lower = 1))
34 38
   checkmate::assertFlag(forceRerun)
35 39
   checkmate::assert(checkmate::checkNull(basenameOutput), checkmate::checkCharacter(basenameOutput, len = 1, min.chars = 1, any.missing = FALSE))
36 40
   
... ...
@@ -43,7 +47,10 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
43 47
   
44 48
   uniqueSamples = GRN@config$sharedSamples
45 49
   
46
-  if ("rna" %in% type | "all" %in% type) {
50
+  # Initialize the page counter
51
+  pageCounter = 1 
52
+  
53
+  if ("rna" %in% data | "all" %in% data) {
47 54
     
48 55
     # Check if we have raw integer counts. Ust vst transform if, otherwise "regular log2
49 56
     transformation = dplyr::if_else(checkmate::testClass(GRN@data$RNA$counts_orig, "DESeqDataSet"), "vst", "log2")
... ...
@@ -55,6 +62,10 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
55 62
     }
56 63
     
57 64
     for (norm in normVector) {
65
+        
66
+        if (!(norm %in% type | "all" %in% type)) {
67
+            next
68
+        }
58 69
       
59 70
       # Only do a transformation if this is raw counts, already normalized counts dont need another transformation
60 71
       transformationCur = dplyr::if_else(norm == "raw", transformation, "none")
... ...
@@ -85,7 +96,7 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
85 96
                                         " RNA data for all shared samples to file ", fileCur , 
86 97
                                         "... This may take a few minutes"))
87 98
         
88
-        .plot_PCA_wrapper(GRN, matrixCur, transformation = transformationCur, logTransformDensity = logTransformDensity, file = fileCur, topn = topn, pdf_width = pdf_width, pdf_height = pdf_height, plotAsPDF = plotAsPDF)
99
+        .plot_PCA_wrapper(GRN, matrixCur, transformation = transformationCur, logTransformDensity = logTransformDensity, file = fileCur, topn = topn, pdf_width = pdf_width, pdf_height = pdf_height, pages = pages, plotAsPDF = plotAsPDF)
89 100
         
90 101
       } else {
91 102
         futile.logger::flog.info(paste0("File ", fileCur, " already exists, not overwriting due to forceRerun = FALSE"))
... ...
@@ -95,7 +106,7 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
95 106
     
96 107
   } # end if plot RNA PCA
97 108
   
98
-  if ("peaks" %in% type | "all" %in% type) {
109
+  if ("peaks" %in% data | "all" %in% data) {
99 110
     
100 111
     # Check if we have raw integer counts
101 112
     transformation = dplyr::if_else(checkmate::testClass(GRN@data$peaks$counts_orig, "DESeqDataSet"), "vst", "log2")
... ...
@@ -131,7 +142,7 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
131 142
                                         " peaks data for all shared samples to file ", fileCur , 
132 143
                                         "... This may take a few minutes"))
133 144
         
134
-        .plot_PCA_wrapper(GRN, matrixCur, transformation = transformationCur, logTransformDensity = logTransformDensity, file = fileCur, topn = topn, pdf_width = pdf_width, pdf_height = pdf_height, plotAsPDF = plotAsPDF)
145
+        .plot_PCA_wrapper(GRN, matrixCur, transformation = transformationCur, logTransformDensity = logTransformDensity, file = fileCur, topn = topn, pdf_width = pdf_width, pdf_height = pdf_height, pages = pages, plotAsPDF = plotAsPDF)
135 146
         
136 147
       } else {
137 148
         futile.logger::flog.info(paste0("File ", fileCur, " already exists, not overwriting due to forceRerun = FALSE"))
... ...
@@ -149,7 +160,7 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
149 160
 }
150 161
 
151 162
 
152
-.plot_PCA_wrapper <- function(GRN, counts, transformation = "vst", scale = TRUE, logTransformDensity, file, topn, pdf_width, pdf_height, plotAsPDF) {
163
+.plot_PCA_wrapper <- function(GRN, counts, transformation = "vst", scale = TRUE, logTransformDensity, file, topn, pdf_width, pdf_height, pages = NULL, plotAsPDF) {
153 164
   
154 165
   checkmate::assertChoice(transformation, c("vst", "log2", "none"))
155 166
   checkmate::assertIntegerish(topn, lower = 50, any.missing = FALSE,min.len = 1)
... ...
@@ -175,7 +186,7 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
175 186
   
176 187
   futile.logger::flog.info(paste0("Prepare PCA. Count transformation: ", transformation))
177 188
   
178
-  
189
+  pageCounter = 1 
179 190
   if (plotAsPDF) {
180 191
     .checkOutputFile(file)
181 192
     grDevices::pdf(file, width = pdf_width, height = pdf_height)
... ...
@@ -188,7 +199,11 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
188 199
   rv <- matrixStats::rowVars(counts.transf)
189 200
   
190 201
   # 0. Density plot for ALL features
191
-  .plotDensity(counts.transf, logTransform = logTransformDensity, legend = dplyr::if_else(ncol(counts.transf) > 20, FALSE, TRUE))
202
+  if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
203
+      .plotDensity(counts.transf, logTransform = logTransformDensity, legend = dplyr::if_else(ncol(counts.transf) > 20, FALSE, TRUE))
204
+      
205
+  }
206
+  pageCounter = pageCounter + 1
192 207
   
193 208
   for (topn in topn) {
194 209
     
... ...
@@ -209,95 +224,105 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
209 224
                     pos = seq_len(nPCS))
210 225
     
211 226
     # 1. Scree plot
212
-    g = ggplot(screeplot.df, aes(PCs, variation)) + geom_bar(stat = "identity") + 
213
-      xlab("Principal component (PC)") + ylab("Explained variation / Cumultative sum (in %)") + 
214
-      geom_point(data = screeplot.df, aes(pos, variation_sum), color = "red") + 
215
-      geom_line(data = screeplot.df, aes(pos, variation_sum), color = "red") +
216
-      ggtitle(paste0("Screeplot for top ", topn, " variable features")) + 
217
-      theme_bw()
218
-    
219
-    plot(g)
220
-    
227
+    if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
228
+        
229
+        g = ggplot(screeplot.df, aes(PCs, variation)) + geom_bar(stat = "identity") + 
230
+            xlab("Principal component (PC)") + ylab("Explained variation / Cumultative sum (in %)") + 
231
+            geom_point(data = screeplot.df, aes(pos, variation_sum), color = "red") + 
232
+            geom_line(data = screeplot.df, aes(pos, variation_sum), color = "red") +
233
+            ggtitle(paste0("Screeplot for top ", topn, " variable features")) + 
234
+            theme_bw()
235
+        
236
+        plot(g)
237
+    }
238
+    pageCounter = pageCounter + 1
221 239
     
222 240
     # 2. Metadata correlation with PCs
223
-    metadata_columns = .findSuitableColumnsToPlot(metadata, remove1LevelFactors = TRUE, verbose = FALSE)
224
-    .plotPCA_QC(pcaResult = pca, dataCols = colnames(counts.transf), metadataTable = metadata, metadataColumns = metadata_columns, 
225
-                varn = topn, metadataColumns_fill = metadata_columns, 
226
-                file = NULL, logTransform = FALSE)
227
-    
241
+    if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
242
+        metadata_columns = .findSuitableColumnsToPlot(metadata, remove1LevelFactors = TRUE, verbose = FALSE)
243
+        .plotPCA_QC(pcaResult = pca, dataCols = colnames(counts.transf), metadataTable = metadata, metadataColumns = metadata_columns, 
244
+                    varn = topn, metadataColumns_fill = metadata_columns, 
245
+                    file = NULL, logTransform = FALSE)
246
+    }
247
+    pageCounter = pageCounter + 1
228 248
     
229 249
     
230 250
     # 3. Correlation plots colored by metadata
231 251
     metadata_columns = .findSuitableColumnsToPlot(metadata, remove1LevelFactors = FALSE, verbose = FALSE)
232 252
     for (varCur in metadata_columns) {
233 253
       
234
-      plotTitle = paste0("Top ", topn, " variable features, colored by \n", varCur)
235
-      
236
-      skipLegend = FALSE
237
-      vecCur = metadata[, varCur] %>%unlist()
238
-      if (is.character(vecCur) & length(unique(vecCur)) > 30) {
239
-        skipLegend = TRUE
240
-        plotTitle = paste0(plotTitle, " (legend skipped)")
241
-      }
242
-      
243
-      result = tryCatch({
244
-        
245
-        # The following code is taken from plotPCA from DESeq2 and adapted here to be more flexible
246
-        intgroup.df <- as.data.frame(metadata[, varCur, drop=FALSE])
247
-        
248
-        # add the varCur factors together to create a new grouping factor
249
-        group = metadata[[varCur]]
250
-        
251
-        # assembly the data for the plot
252
-        d <- data.frame(PC1=pca$x[,1], PC2=pca$x[,2], group=group, intgroup.df, name=colnames(counts.transf))
253
-        
254
-        d = d %>%
255
-          dplyr::mutate_if(is.character, as.factor) %>%
256
-          dplyr::mutate_if(is.logical, as.factor)
257
-        
258
-        
259
-        g = g = ggplot(data=d, aes(x=PC1, y=PC2, color=group)) + geom_point(size=3) + 
260
-          xlab(paste0("PC1: ",round(percentVar[1] * 100, 1),"% variance")) +
261
-          ylab(paste0("PC2: ",round(percentVar[2] * 100, 1),"% variance")) +
262
-          ggtitle(plotTitle) + coord_fixed()
263
-        
264
-        
265
-        
266
-        if (is.factor(d$group)) {
267
-          g = g + scale_color_viridis_d()
268
-          
269
-        } else {
270
-          
271
-          is.date <- function(x) inherits(x, 'Date')
272
-          
273
-          if (!is.date(d$group)) {
274
-            g = g + scale_color_viridis_c()
275
-          } else {
276
-            g = g + scale_color_viridis_c(trans = "date")
277
-          }
278
-          
279
-          
254
+        if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
255
+            
256
+            plotTitle = paste0("Top ", topn, " variable features, colored by \n", varCur)
257
+            
258
+            skipLegend = FALSE
259
+            vecCur = metadata[, varCur] %>%unlist()
260
+            if (is.character(vecCur) & length(unique(vecCur)) > 30) {
261
+                skipLegend = TRUE
262
+                plotTitle = paste0(plotTitle, " (legend skipped)")
263
+            }
264
+            
265
+            result = tryCatch({
266
+                
267
+                # The following code is taken from plotPCA from DESeq2 and adapted here to be more flexible
268
+                intgroup.df <- as.data.frame(metadata[, varCur, drop=FALSE])
269
+                
270
+                # add the varCur factors together to create a new grouping factor
271
+                group = metadata[[varCur]]
272
+                
273
+                # assembly the data for the plot
274
+                d <- data.frame(PC1=pca$x[,1], PC2=pca$x[,2], group=group, intgroup.df, name=colnames(counts.transf))
275
+                
276
+                d = d %>%
277
+                    dplyr::mutate_if(is.character, as.factor) %>%
278
+                    dplyr::mutate_if(is.logical, as.factor)
279
+                
280
+                
281
+                g = g = ggplot(data=d, aes(x=PC1, y=PC2, color=group)) + geom_point(size=3) + 
282
+                    xlab(paste0("PC1: ",round(percentVar[1] * 100, 1),"% variance")) +
283
+                    ylab(paste0("PC2: ",round(percentVar[2] * 100, 1),"% variance")) +
284
+                    ggtitle(plotTitle) + coord_fixed()
285
+                
286
+                
287
+                
288
+                if (is.factor(d$group)) {
289
+                    g = g + scale_color_viridis_d()
290
+                    
291
+                } else {
292
+                    
293
+                    is.date <- function(x) inherits(x, 'Date')
294
+                    
295
+                    if (!is.date(d$group)) {
296
+                        g = g + scale_color_viridis_c()
297
+                    } else {
298
+                        g = g + scale_color_viridis_c(trans = "date")
299
+                    }
300
+                    
301
+                    
302
+                }
303
+                
304
+                
305
+                if (skipLegend) {
306
+                    g = g + theme(legend.position = "none")
307
+                } 
308
+                
309
+                plot(g)
310
+                
311
+            }, error = function(e) {
312
+                futile.logger::flog.warn(paste0(" Could not plot PCA with variable ", varCur))
313
+                plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
314
+                message = paste0(" Could not plot PCA with variable ", varCur)
315
+                text(x = 0.5, y = 0.5, message, cex = 1.6, col = "red")
316
+            })
280 317
         }
281
-        
282
-        
283
-        if (skipLegend) {
284
-          g = g + theme(legend.position = "none")
285
-        } 
286
-        
287
-        plot(g)
288
-        
289
-      }, error = function(e) {
290
-        futile.logger::flog.warn(paste0(" Could not plot PCA with variable ", varCur))
291
-        plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
292
-        message = paste0(" Could not plot PCA with variable ", varCur)
293
-        text(x = 0.5, y = 0.5, message, cex = 1.6, col = "red")
294
-      })
318
+        pageCounter = pageCounter + 1
295 319
       
296 320
       
297
-    }
321
+    } # end for each variable
298 322
     
299 323
   }
300 324
   
325
+  .printWarningPageNumber(pages, pageCounter)
301 326
   if (plotAsPDF) grDevices::dev.off()
302 327
   
303 328
 }
... ...
@@ -426,7 +451,7 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
426 451
     cell_fun = function(j, i, x, y, width, height, fill) {
427 452
       grid::grid.text(sprintf("%.2f", r2_cv[i, j]), x, y, gp = grid::gpar(fontsize = 10))
428 453
     }
429
-  )
454
+  ) %>% plot()
430 455
   
431 456
   
432 457
   
... ...
@@ -450,8 +475,12 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
450 475
 #' @template outputFolder
451 476
 #' @template basenameOutput
452 477
 #' @template plotDetails
453
-#' @param plotPermuted \code{TRUE} or \code{FALSE}. Default  \code{TRUE}. Also produce the diagnostic plots for permuted data?
478
+#' @param dataType Character vector. One of, or both of, \code{"real"} or \code{"permuted"}. For which data type, real or permuted data, to produce the diagnostic plots?
454 479
 #' @param nTFMax \code{NULL} or Integer. Default \code{NULL}. Maximum number of TFs to process. Can be used for testing purposes by setting this to a small number i(.e., 10)
480
+#' @template plotAsPDF
481
+#' @template pdf_width
482
+#' @param pdf_height_base  Number. Default 8. Base height of the PDF, in cm, per connection type. The total height is automatically determined based on the number of connection types that are found in the object (e.g., expression or TF activity). For example, when two connection types are found, the base height is multiplied by 2.
483
+#' @template pages
455 484
 #' @template forceRerun
456 485
 #' @return The same \code{\linkS4class{GRN}} object, with added data from this function.
457 486
 #' @examples 
... ...
@@ -463,8 +492,9 @@ plotDiagnosticPlots_TFPeaks <- function(GRN,
463 492
                                         outputFolder = NULL, 
464 493
                                         basenameOutput = NULL, 
465 494
                                         plotDetails = FALSE,
466
-                                        plotPermuted = TRUE,
495
+                                        dataType = c("real", "permuted"),
467 496
                                         nTFMax = NULL,
497
+                                        plotAsPDF = TRUE, pdf_width = 12, pdf_height_base = 8, pages = NULL,
468 498
                                         forceRerun = FALSE) {
469 499
   
470 500
   GRN = .addFunctionLogToObject(GRN)
... ...
@@ -473,8 +503,12 @@ plotDiagnosticPlots_TFPeaks <- function(GRN,
473 503
   checkmate::assert(checkmate::checkNull(outputFolder), checkmate::checkCharacter(outputFolder, min.chars = 1))
474 504
   checkmate::assert(checkmate::checkNull(basenameOutput), checkmate::checkCharacter(basenameOutput, len = 1, min.chars = 1, any.missing = FALSE))
475 505
   checkmate::assertFlag(plotDetails)
476
-  checkmate::assertFlag(plotPermuted)
506
+  checkmate::assertSubset(dataType, c("real", "permuted"))
477 507
   checkmate::assert(checkmate::checkNull(nTFMax), checkmate::checkIntegerish(nTFMax))
508
+  checkmate::assertFlag(plotAsPDF)
509
+  checkmate::assertNumeric(pdf_width, lower = 5, upper = 99)
510
+  checkmate::assertNumeric(pdf_height_base, lower = 5, upper = 99)
511
+  checkmate::assert(checkmate::checkNull(pages), checkmate::checkIntegerish(pages, lower = 1))
478 512
   checkmate::assertFlag(forceRerun)
479 513
   
480 514
   useGCCorrection = GRN@config$parameters$useGCCorrection
... ...
@@ -482,29 +516,38 @@ plotDiagnosticPlots_TFPeaks <- function(GRN,
482 516
   
483 517
   outputFolder = .checkOutputFolder(GRN, outputFolder)
484 518
   
519
+  dataType2 = c()
520
+  if ("real" %in% dataType) dataType2 = c(0)
521
+  if ("permuted" %in% dataType) dataType2 = c(dataType2, 1)
522
+  
485 523
   for (permutationCur in 0:.getMaxPermutation(GRN)) {
486 524
     
487
-    if (!plotPermuted & permutationCur != 0) {
525
+    if (!permutationCur %in% dataType2) {
488 526
       next
489 527
     }
490 528
     
491 529
     suffixFile = .getPermutationSuffixStr(permutationCur)
492 530
     
493
-    fileCur = paste0(outputFolder, 
494
-                     dplyr::if_else(is.null(basenameOutput), .getOutputFileName("plot_TFPeak_fdr"), basenameOutput), 
531
+    fileCur = paste0(outputFolder, dplyr::if_else(is.null(basenameOutput), .getOutputFileName("plot_TFPeak_fdr"), basenameOutput), 
495 532
                      suffixFile, ".pdf")
496 533
     
497
-    if (!file.exists(fileCur) | forceRerun) {
534
+    
535
+    if (!file.exists(fileCur) | !plotAsPDF | forceRerun) {
536
+        
537
+        if (!plotAsPDF) fileCur = NULL
498 538
       
499
-      heightCur = 8* length(GRN@config$TF_peak_connectionTypes)
500
-      .plot_TF_peak_fdr(GRN, perm = permutationCur, useGCCorrection = useGCCorrection, 
501
-                        plotDetails = plotDetails, fileCur, width = 7, height = heightCur,
502
-                        nPagesMax = nTFMax) 
539
+        heightCur = pdf_height_base * length(GRN@config$TF_peak_connectionTypes)
540
+        .plot_TF_peak_fdr(GRN, perm = permutationCur, useGCCorrection = useGCCorrection, 
541
+                        plotDetails = plotDetails, fileCur, width = pdf_width, height = heightCur,
542
+                        nPagesMax = nTFMax, pages = pages) 
503 543
     }
504 544
     
545
+    # TODO: page selection not implemented here yet
505 546
     fileCur = paste0(outputFolder, .getOutputFileName("plot_TFPeak_fdr_GC"), suffixFile)
506
-    if (useGCCorrection & (!file.exists(fileCur) | forceRerun)) {
507
-      .plotTF_peak_GC_diagnosticPlots(GRN, perm = permutationCur, fileCur, width = 7, height = 8) 
547
+    if (useGCCorrection & (!file.exists(fileCur) | !plotAsPDF | forceRerun)) {
548
+        
549
+        if (!plotAsPDF) fileCur = NULL
550
+        .plotTF_peak_GC_diagnosticPlots(GRN, perm = permutationCur, fileCur, width = pdf_width, height = pdf_height_base) 
508 551
     }
509 552
     
510 553
     # TODO: handle multiple activities and actually write this function
... ...
@@ -519,7 +562,7 @@ plotDiagnosticPlots_TFPeaks <- function(GRN,
519 562
   
520 563
 }
521 564
 
522
-
565
+# TODO: Currently not used anywhere
523 566
 .generateTF_GC_diagnosticPlots <- function(TFCur, GC_classes_foreground.df, GC_classes_background.df, GC_classes_all.df, peaksForeground, peaksBackground, peaksBackgroundGC) {
524 567
   
525 568
   GC_classes_background_GC.df = peaksBackgroundGC %>%
... ...
@@ -627,15 +670,15 @@ plotDiagnosticPlots_TFPeaks <- function(GRN,
627 670
 }
628 671
 
629 672
 
630
-.plot_TF_peak_fdr <- function(GRN, perm, useGCCorrection, plotDetails = FALSE, file = NULL, width = 7, height = 7, nPagesMax = NULL) {
673
+.plot_TF_peak_fdr <- function(GRN, perm, useGCCorrection, plotDetails = FALSE, file = NULL, width = 7, height = 7, nPagesMax = NULL, pages = NULL) {
631 674
   
632 675
   start = Sys.time()
633 676
   futile.logger::flog.info(paste0("Plotting FDR curves for each TF", dplyr::if_else(is.null(file), "", paste0(" to file ", file))))
634 677
   
635
-  if (!is.null(file)) {
636
-    plots.l = list()
637
-    index = 1
638
-  } 
678
+  pageCounter = 1  
679
+  
680
+  plots.l = list()
681
+  index = 1
639 682
   
640 683
   # Dont take all TF, some might be missing.
641 684
   connections_TF_peak = GRN@connections$TF_peaks[[as.character(perm)]]$connectionStats
... ...
@@ -755,35 +798,42 @@ plotDiagnosticPlots_TFPeaks <- function(GRN,
755 798
         
756 799
       }
757 800
       
758
-      
759
-      
760
-      if (is.null(file)) {
761
-        plot(g)
762
-      } else {
763
-        plots.l[[index]] = g
764
-        index = index + 1
765
-        
766
-        if (plotDetails) {
767
-          plots.l[[index]] = g2
801
+      # We now created either just one plot g or g + g2 for "one" direction
802
+      if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
803
+          plots.l[[index]] = g
768 804
           index = index + 1
769
-        }
770
-        
771 805
       }
806
+
772 807
       
808
+      if (plotDetails) {
809
+          
810
+          if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
811
+              plots.l[[index]] = g2
812
+              index = index + 1
813
+          }
814
+
815
+          # Only here we already have a new page, as we produced 2 plots in this iteration alone
816
+          pageCounter = pageCounter + 1 
817
+       }
818
+
773 819
       
820
+    } # end for both directions
821
+    
822
+    if (!plotDetails) {
823
+        pageCounter = pageCounter + 1 
774 824
     }
825
+
826
+    
775 827
   } # end allTF
776 828
   
777
-  if (!is.null(file)) {
778
-    futile.logger::flog.info(paste0(" Finished generating plots, start plotting to file ",file, ". This may take a few minutes."))
779
-    .printMultipleGraphsPerPage(plots.l, nCol = 1, nRow = 2, pdfFile = file, height = height, width = width)
780
-    
781
-  }
829
+  .printWarningPageNumber(pages, pageCounter)
830
+  
831
+  .printMultipleGraphsPerPage(plots.l, nCol = 1, nRow = 2, pdfFile = file, height = height, width = width)
782 832
   
783 833
   .printExecutionTime(start)
784 834
 }
785 835
 
786
-.plotTF_peak_TFActivity_QC <- function(GRN, perm = permIndex, fileCur, width = 7, height = 8) {
836
+.plotTF_peak_TFActivity_QC <- function(GRN, perm = 0, fileCur, width = 7, height = 8) {
787 837
   
788 838
   # TODO
789 839
 }
... ...
@@ -796,13 +846,14 @@ plotDiagnosticPlots_TFPeaks <- function(GRN,
796 846
 #' @template GRN 
797 847
 #' @template outputFolder
798 848
 #' @template basenameOutput
799
-#' @param gene.types List of character vectors. Default list(c("all"), c("protein_coding"), c("protein_coding", "lincRNA")). Vectors of gene types to consider for the diagnostic plots. Multiple distinct combinations of gene types can be specified. For example, if set to \code{list(c("protein_coding", "lincRNA"), c("protein_coding"), c("all"))}, 3 distinct PDFs will be produced, one for each element of the list. The first file would only consider protein-coding and lincRNA genes, while the second plot only considers protein-coding ones. The special keyword "all" denotes all gene types found (usually, there are many gene types present, also more exotic and rare ones).
849
+#' @param gene.types List of character vectors. Default list(c("protein_coding", "lincRNA")). Vectors of gene types to consider for the diagnostic plots. Multiple distinct combinations of gene types can be specified. For example, if set to \code{list(c("protein_coding", "lincRNA"), c("protein_coding"), c("all"))}, 3 distinct PDFs will be produced, one for each element of the list. The first file would only consider protein-coding and lincRNA genes, while the second plot only considers protein-coding ones. The special keyword "all" denotes all gene types found (usually, there are many gene types present, also more exotic and rare ones).
800 850
 #' @param useFiltered Logical. TRUE or FALSE. Default FALSE. If set to \code{FALSE}, the diagnostic plots will be produced based on all peak-gene connections. This is the default and will usually be best to judge whether the background behaves as expected. If set to TRUE, the diagnostic plots will be produced based on the filtered set of connections. For this, the function \code{link{filterGRNAndConnectGenes}} must have been run before.
801 851
 #' @template plotDetails
802 852
 #' @param plotPerTF Logical. TRUE or FALSE. Default \code{FALSE}. If set to \code{FALSE}, the diagnostic plots will be done across all TF (the default), while setting it to \code{TRUE} will generate the QC plots TF-specifically, including "all" TF, sorted by the number of connections.
803 853
 #' @template plotAsPDF
804 854
 #' @template pdf_width
805 855
 #' @template pdf_height
856
+#' @template pages
806 857
 #' @template forceRerun
807 858
 #' @return The same \code{\linkS4class{GRN}} object, with added data from this function.
808 859
 #' @examples 
... ...
@@ -815,11 +866,11 @@ plotDiagnosticPlots_TFPeaks <- function(GRN,
815 866
 plotDiagnosticPlots_peakGene <- function(GRN, 
816 867
                                          outputFolder = NULL, 
817 868
                                          basenameOutput = NULL, 
818
-                                         gene.types = list(c("all"), c("protein_coding"), c("protein_coding", "lincRNA")), 
869
+                                         gene.types = list(c("protein_coding", "lincRNA")), 
819 870
                                          useFiltered = FALSE, 
820 871
                                          plotDetails = FALSE,
821 872
                                          plotPerTF = FALSE,
822
-                                         plotAsPDF = TRUE, pdf_width = 12, pdf_height = 12,
873
+                                         plotAsPDF = TRUE, pdf_width = 12, pdf_height = 12, pages = NULL,
823 874
                                          forceRerun= FALSE) {
824 875
   
825 876
   GRN = .addFunctionLogToObject(GRN)
... ...
@@ -834,6 +885,7 @@ plotDiagnosticPlots_peakGene <- function(GRN,
834 885
   checkmate::assertFlag(plotAsPDF)
835 886
   checkmate::assertNumeric(pdf_width, lower = 5, upper = 99)
836 887
   checkmate::assertNumeric(pdf_height, lower = 5, upper = 99)
888
+  checkmate::assert(checkmate::checkNull(pages), checkmate::checkIntegerish(pages, lower = 1))
837 889
   checkmate::assertFlag(forceRerun)
838 890
   
839 891
   
... ...
@@ -864,20 +916,9 @@ plotDiagnosticPlots_peakGene <- function(GRN,
864 916
                                     plotPerTF = plotPerTF,
865 917
                                     pdf_width = pdf_width, pdf_height = pdf_height,
866 918
                                     plotDetails = plotDetails,
919
+                                    pages = pages,
867 920
                                     forceRerun = forceRerun)
868 921
   
869
-  # Summarize all data, both real and random
870
-  # filteredStr = dplyr::if_else(useFiltered, "_filtered", "")
871
-  # for (geneTypesSelected in gene.types) {
872
-  #   
873
-  #   filenameCur = paste0(outputFolder, .getOutputFileName("plot_peakGene_diag"), "_", paste0(geneTypesSelected, collapse = "+"), filteredStr, ".pdf")
874
-  #   if (!file.exists(filenameCur) | forceRerun) {
875
-  #    
876
-  #   } else {
877
-  #     futile.logger::flog.info(paste0("File ", filenameCur, " already exists. As forceRerun = FALSE, not overwriting it."))
878
-  #   }
879
-  #   
880
-  # }
881 922
   
882 923
   GRN
883 924
   
... ...
@@ -891,6 +932,7 @@ plotDiagnosticPlots_peakGene <- function(GRN,
891 932
                                               fileBase = NULL,
892 933
                                               pdf_width = 12,
893 934
                                               pdf_height = 12,
935
+                                              pages = NULL,
894 936
                                               forceRerun = FALSE) {
895 937
   
896 938
   start = Sys.time()
... ...
@@ -1005,8 +1047,7 @@ plotDiagnosticPlots_peakGene <- function(GRN,
1005 1047
     freq_class = gsub(freq_class, pattern = "random", replacement = "permuted")
1006 1048
     names(freq_class) <- names(freqs)
1007 1049
     
1008
-    
1009
-    
1050
+
1010 1051
     xlabels_peakGene_r.class = levels(peakGeneCorrelations.all$peak_gene.r.class)
1011 1052
     nCur = length(xlabels_peakGene_r.class)
1012 1053
     xlabels_peakGene_r.class[setdiff(seq_len(nCur), c(1, floor(nCur/2), nCur))] <- ""
... ...
@@ -1020,9 +1061,14 @@ plotDiagnosticPlots_peakGene <- function(GRN,
1020 1061
     nCur = length(xlabels_peakGene_praw.class)
1021 1062
     xlabels_peakGene_praw.class[setdiff(seq_len(nCur), c(1, floor(nCur/2), nCur))] <- ""
1022 1063
     
1023
-    
1064
+    #
1065
+    # ITERATE THROUGH ALL GENE TYPES, WITH ONE PER PLOT
1066
+    #
1024 1067
     for (geneTypesSelected in gene.types) {
1025 1068
       
1069
+      # Reset page counter for each PDF anew
1070
+      pageCounter = 1
1071
+        
1026 1072
       futile.logger::flog.info(paste0(" Gene type ", paste0(geneTypesSelected, collapse = "+")))
1027 1073
       
1028 1074
       if ("all" %in% geneTypesSelected) {
... ...
@@ -1064,116 +1110,120 @@ plotDiagnosticPlots_peakGene <- function(GRN,
1064 1110
         
1065 1111
         counter = counter + 1
1066 1112
         
1067
-        if (length(allTF) > 1) {
1068
-          futile.logger::flog.info(paste0(" QC plots for TF ", TFCur, " (", counter, " of ", length(allTF), ")"))
1069
-        }
1070
-        
1071
-        
1072
-        if ("all" %in% geneTypesSelected) {
1073
-          indexCur = seq_len(nrow(peakGeneCorrelations.all))
1074
-        } else {
1075
-          indexCur = which(peakGeneCorrelations.all$gene.type %in% geneTypesSelected)
1076
-        }
1077
-        
1078
-        
1079
-        if (TFCur != "all") {
1080
-          indexCur = intersect(indexCur, which(peakGeneCorrelations.all$peak.ID %in% TF.peaks[[TFCur]]))
1113
+        if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
1114
+            
1115
+            if (length(allTF) > 1) {
1116
+                futile.logger::flog.info(paste0(" QC plots for TF ", TFCur, " (", counter, " of ", length(allTF), ")"))
1117
+            }
1118
+            
1119
+            
1120
+            if ("all" %in% geneTypesSelected) {
1121
+                indexCur = seq_len(nrow(peakGeneCorrelations.all))
1122
+            } else {
1123
+                indexCur = which(peakGeneCorrelations.all$gene.type %in% geneTypesSelected)
1124
+            }
1125
+            
1126
+            
1127
+            if (TFCur != "all") {
1128
+                indexCur = intersect(indexCur, which(peakGeneCorrelations.all$peak.ID %in% TF.peaks[[TFCur]]))
1129
+            }
1130
+            
1131
+            # Get subset also for just the real data
1132
+            indexCurReal = intersect(indexCur, which(peakGeneCorrelations.all$class == names(dist_class)[1]))
1133
+            
1134
+            
1135
+            xlabel = paste0("Correlation raw p-value")
1136
+            
1137
+            # DENSITY PLOTS P VALUE
1138
+            
1139
+            # TODO: Densities as ratio
1140
+            # https://stackoverflow.com/questions/58629959/how-can-i-extract-data-from-a-kernel-density-function-in-r-for-many-samples-at-o#:~:text=To%20compute%20the%20density%20you,can%20use%20the%20package%20spatstat%20.
1141
+            
1142
+            # Produce the labels for the class-specific subtitles
1143
+            customLabel_class = .customLabeler(table(peakGeneCorrelations.all[indexCur,]$class))
1144
+            
1145
+            r_pos_freq = table(peakGeneCorrelations.all[indexCur,]$r_positive)
1146
+            labeler_r_pos = labeller(r_positive = c("TRUE"  = paste0("r positive (", r_pos_freq["TRUE"], ")"), 
1147
+                                                    "FALSE" = paste0("r negative (", r_pos_freq["FALSE"], ")")) )
1148
+            theme_main = theme(axis.text.x = element_text(angle = 0, hjust = 0.5, size = rel(0.8)),
1149
+                               axis.text.y = element_text(size=rel(0.8)),
1150
+                               panel.grid.major = element_blank(), panel.grid.minor = element_blank())
1151
+            
1152
+            
1153
+            
1154
+            ## p-val density curves stratified by real/permuted ##
1155
+            
1156
+            gA2 = ggplot(peakGeneCorrelations.all[indexCur,], aes(peak_gene.p_raw, color = r_positive)) + geom_density()  +
1157
+                facet_wrap(~ class, labeller = labeller(class=freq_class) ) +
1158
+                xlab(xlabel) + ylab("Density") +  theme_bw() +
1159
+                scale_color_manual(labels = names(r_pos_class), values = r_pos_class) +
1160
+                theme(legend.position = "none", axis.text=element_text(size=rel(0.6)), strip.text.x = element_text(size = rel(0.8)))
1161
+            
1162
+            # Helper function to retrieve all tables and data aggregation steps for subsequent visualization
1163
+            tbl.l = .createTables_peakGeneQC(peakGeneCorrelations.all[indexCur,], networkType_details, colors_vec, range)
1164
+            
1165
+            
1166
+            xlabel = paste0("Correlation raw\np-value (binned)")
1167
+            
1168
+            
1169
+            xlabels = levels(tbl.l$d_merged$peak_gene.p.raw.class)
1170
+            xlabels[setdiff(seq_len(length(xlabels)), c(1, floor(length(xlabels)/2), length(xlabels)))] <- ""
1171
+            
1172
+            gB3 = ggplot(tbl.l$d_merged, aes(peak_gene.p.raw.class, ratio, fill = classAll)) + 
1173
+                geom_bar(stat = "identity", position="dodge", na.rm = TRUE, width = 0.5) + 
1174
+                geom_hline(yintercept = 1, linetype = "dotted") + 
1175
+                xlab(xlabel) + ylab("Ratio") +
1176
+                scale_fill_manual("Class", values = c(dist_class, r_pos_class), 
1177
+                                  labels = c("real", "permuted", "r+ (r>0)", "r- (r<=0)"), 
1178
+                ) + # labels vector can be kind of manually specified here because the levels were previosly defined in a certain order
1179
+                scale_x_discrete(labels = xlabels_peakGene_praw.class) +
1180
+                theme_bw() +  
1181
+                #theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8), strip.background = element_blank(), strip.placement = "outside", axis.title.y = element_blank()) +
1182
+                # theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8) , axis.title.y = element_blank()) +
1183
+                theme_main +
1184
+                facet_wrap(~ factor(set), nrow = 2, scales = "free_y", strip.position = "left") 
1185
+            
1186
+            #  plot two FDR plots as well: (fraction of negative / negative+positive) and (fraction of permuted / permuted + real)
1187
+            # that could give an indication of whether an FDR based on the permuted or based on the negative correlations would be more stringent
1188
+            
1189
+            # R PEAK GENE #
1190
+            
1191
+            xlabel = paste0("Correlation coefficient r")
1192
+            
1193
+            sum_real = table(peakGeneCorrelations.all[indexCur,]$class)[names(dist_class)[1]]
1194
+            sum_rnd  = table(peakGeneCorrelations.all[indexCur,]$class)[names(dist_class)[2]]
1195
+            binData.r = peakGeneCorrelations.all[indexCur,] %>%
1196
+                dplyr::group_by(class) %>%
1197
+                dplyr::count(peak_gene.r.class) %>%
1198
+                dplyr::mutate(nnorm = dplyr::case_when(class == !! (names(dist_class)[1]) ~ .data$n / (sum_real / sum_rnd), 
1199
+                                                       TRUE ~ as.double(.data$n)))
1200
+            
1201
+            xlabel = paste0("Correlation coefficient r (binned)")
1202
+            
1203
+            gD = ggplot(binData.r, aes(peak_gene.r.class, nnorm, group = class, fill = class)) + 
1204
+                geom_bar(stat = "identity", position = position_dodge(preserve = "single"), na.rm = FALSE, width = 0.5) +
1205
+                geom_line(aes(peak_gene.r.class, nnorm, group = class, color= class), stat = "identity") +
1206
+                scale_fill_manual("Group", labels = names(dist_class), values = dist_class) +
1207
+                scale_color_manual("Group", labels = names(dist_class), values = dist_class) +
1208
+                scale_x_discrete(labels = xlabels_peakGene_r.class2, drop = FALSE) +
1209
+                theme_bw() + theme(legend.position = "none") +
1210
+                xlab(xlabel) + ylab("Abundance") +
1211
+                theme_main  +
1212
+                scale_y_continuous(labels = scales::scientific)
1213
+            
1214
+            
1215
+            mainTitle = paste0("Summary QC (TF: ", TFCur, ", gene type: ", paste0(geneTypesSelected, collapse = "+"), ",\n", .prettyNum(range), " bp promoter range)")
1216
+            
1217
+            plots_all = ( ((gA2 | gB3 ) + 
1218
+                               patchwork::plot_layout(widths = c(2.5,1.5))) / ((gD) + 
1219
+                                                                                   patchwork::plot_layout(widths = c(4))) ) + 
1220
+                patchwork::plot_layout(heights = c(2,1), guides = 'collect') +
1221
+                patchwork::plot_annotation(title = mainTitle, theme = theme(plot.title = element_text(hjust = 0.5)))
1222
+            
1223
+            plot(plots_all)
1081 1224
         }
1082
-        
1083
-        # Get subset also for just the real data
1084
-        indexCurReal = intersect(indexCur, which(peakGeneCorrelations.all$class == names(dist_class)[1]))
1085
-        
1086
-        
1087
-        xlabel = paste0("Correlation raw p-value")
1088
-        
1089
-        # DENSITY PLOTS P VALUE
1090
-        
1091
-        # TODO: Densities as ratio
1092
-        # https://stackoverflow.com/questions/58629959/how-can-i-extract-data-from-a-kernel-density-function-in-r-for-many-samples-at-o#:~:text=To%20compute%20the%20density%20you,can%20use%20the%20package%20spatstat%20.
1093
-        
1094
-        # Produce the labels for the class-specific subtitles
1095
-        customLabel_class = .customLabeler(table(peakGeneCorrelations.all[indexCur,]$class))
1096
-        
1097
-        r_pos_freq = table(peakGeneCorrelations.all[indexCur,]$r_positive)
1098
-        labeler_r_pos = labeller(r_positive = c("TRUE"  = paste0("r positive (", r_pos_freq["TRUE"], ")"), 
1099
-                                                "FALSE" = paste0("r negative (", r_pos_freq["FALSE"], ")")) )
1100
-        theme_main = theme(axis.text.x = element_text(angle = 0, hjust = 0.5, size = rel(0.8)),
1101
-                           axis.text.y = element_text(size=rel(0.8)),
1102
-                           panel.grid.major = element_blank(), panel.grid.minor = element_blank())
1103
-        
1104
-        
1105
-        
1106
-        ## p-val density curves stratified by real/permuted ##
1107
-        
1108
-        gA2 = ggplot(peakGeneCorrelations.all[indexCur,], aes(peak_gene.p_raw, color = r_positive)) + geom_density()  +
1109
-          facet_wrap(~ class, labeller = labeller(class=freq_class) ) +
1110
-          xlab(xlabel) + ylab("Density") +  theme_bw() +
1111
-          scale_color_manual(labels = names(r_pos_class), values = r_pos_class) +
1112
-          theme(legend.position = "none", axis.text=element_text(size=rel(0.6)), strip.text.x = element_text(size = rel(0.8)))
1113
-        
1114
-        # Helper function to retrieve all tables and data aggregation steps for subsequent visualization
1115
-        tbl.l = .createTables_peakGeneQC(peakGeneCorrelations.all[indexCur,], networkType_details, colors_vec, range)
1116
-        
1117
-        
1118
-        xlabel = paste0("Correlation raw\np-value (binned)")
1119
-        
1120
-        
1121
-        xlabels = levels(tbl.l$d_merged$peak_gene.p.raw.class)
1122
-        xlabels[setdiff(seq_len(length(xlabels)), c(1, floor(length(xlabels)/2), length(xlabels)))] <- ""
1123
-        
1124
-        gB3 = ggplot(tbl.l$d_merged, aes(peak_gene.p.raw.class, ratio, fill = classAll)) + 
1125
-          geom_bar(stat = "identity", position="dodge", na.rm = TRUE, width = 0.5) + 
1126
-          geom_hline(yintercept = 1, linetype = "dotted") + 
1127
-          xlab(xlabel) + ylab("Ratio") +
1128
-          scale_fill_manual("Class", values = c(dist_class, r_pos_class), 
1129
-                            labels = c("real", "permuted", "r+ (r>0)", "r- (r<=0)"), 
1130
-          ) + # labels vector can be kind of manually specified here because the levels were previosly defined in a certain order
1131
-          scale_x_discrete(labels = xlabels_peakGene_praw.class) +
1132
-          theme_bw() +  
1133
-          #theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8), strip.background = element_blank(), strip.placement = "outside", axis.title.y = element_blank()) +
1134
-          # theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8) , axis.title.y = element_blank()) +
1135
-          theme_main +
1136
-          facet_wrap(~ factor(set), nrow = 2, scales = "free_y", strip.position = "left") 
1137
-        
1138
-        #  plot two FDR plots as well: (fraction of negative / negative+positive) and (fraction of permuted / permuted + real)
1139
-        # that could give an indication of whether an FDR based on the permuted or based on the negative correlations would be more stringent
1140
-        
1141
-        # R PEAK GENE #
1142
-        
1143
-        xlabel = paste0("Correlation coefficient r")
1144
-        
1145
-        sum_real = table(peakGeneCorrelations.all[indexCur,]$class)[names(dist_class)[1]]
1146
-        sum_rnd  = table(peakGeneCorrelations.all[indexCur,]$class)[names(dist_class)[2]]
1147
-        binData.r = peakGeneCorrelations.all[indexCur,] %>%
1148
-          dplyr::group_by(class) %>%
1149
-          dplyr::count(peak_gene.r.class) %>%
1150
-          dplyr::mutate(nnorm = dplyr::case_when(class == !! (names(dist_class)[1]) ~ .data$n / (sum_real / sum_rnd), 
1151
-                                                 TRUE ~ as.double(.data$n)))
1152
-        
1153
-        xlabel = paste0("Correlation coefficient r (binned)")
1154
-        
1155
-        gD = ggplot(binData.r, aes(peak_gene.r.class, nnorm, group = class, fill = class)) + 
1156
-          geom_bar(stat = "identity", position = position_dodge(preserve = "single"), na.rm = FALSE, width = 0.5) +
1157
-          geom_line(aes(peak_gene.r.class, nnorm, group = class, color= class), stat = "identity") +
1158
-          scale_fill_manual("Group", labels = names(dist_class), values = dist_class) +
1159
-          scale_color_manual("Group", labels = names(dist_class), values = dist_class) +
1160
-          scale_x_discrete(labels = xlabels_peakGene_r.class2, drop = FALSE) +
1161
-          theme_bw() + theme(legend.position = "none") +
1162
-          xlab(xlabel) + ylab("Abundance") +
1163
-          theme_main  +
1164
-          scale_y_continuous(labels = scales::scientific)
1165
-        
1166
-        
1167
-        mainTitle = paste0("Summary QC (TF: ", TFCur, ", gene type: ", paste0(geneTypesSelected, collapse = "+"), ",\n", .prettyNum(range), " bp promoter range)")
1168
-        
1169
-        plots_all = ( ((gA2 | gB3 ) + 
1170
-                         patchwork::plot_layout(widths = c(2.5,1.5))) / ((gD) + 
1171
-                                                                           patchwork::plot_layout(widths = c(4))) ) + 
1172
-          patchwork::plot_layout(heights = c(2,1), guides = 'collect') +
1173
-          patchwork::plot_annotation(title = mainTitle, theme = theme(plot.title = element_text(hjust = 0.5)))
1174
-        
1175
-        plot(plots_all)
1176
-        
1225
+        pageCounter = pageCounter + 1
1226
+
1177 1227
         
1178 1228
       } # end for each TF
1179 1229
       
... ...
@@ -1183,9 +1233,18 @@ plotDiagnosticPlots_peakGene <- function(GRN,
1183 1233
         #plot(ChIPseeker::plotAnnoBar(GRN@annotation$consensusPeaks_obj))
1184 1234
         
1185 1235
         # no plot, as this is somehow just a list and no ggplot object
1186
-        ChIPseeker::plotAnnoPie(GRN@annotation$consensusPeaks_obj, 
1187
-                                main = paste0("\nPeak annotation BEFORE filtering (n = ", nrow(GRN@annotation$consensusPeaks), ")"))
1188
-        plot(ChIPseeker::plotDistToTSS(GRN@annotation$consensusPeaks_obj))
1236
+        if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
1237
+            
1238
+            ChIPseeker::plotAnnoPie(GRN@annotation$consensusPeaks_obj, 
1239
+                                    main = paste0("\nPeak annotation BEFORE filtering (n = ", nrow(GRN@annotation$consensusPeaks), ")"))
1240
+        }
1241
+        pageCounter = pageCounter + 1
1242
+          
1243
+        if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
1244
+            plot(ChIPseeker::plotDistToTSS(GRN@annotation$consensusPeaks_obj))
1245
+        }
1246
+        
1247
+        pageCounter = pageCounter + 1
1189 1248
       }
1190 1249
       
1191 1250
       # PEAK AND GENE PROPERTIES #
... ...
@@ -1288,101 +1347,99 @@ plotDiagnosticPlots_peakGene <- function(GRN,
1288 1347
         r_positive_label = c("TRUE"   = paste0("r+(r>0, n=", .prettyNum(r_pos_tbl[["TRUE"]]), ")"), 
1289 1348
                              "FALSE" = paste0("r-(r<=0, n=" ,.prettyNum(r_pos_tbl[["FALSE"]]), ")"))
1290 1349
         
1291
-        # Class in the facet_wrap has been removed, as this is only for real data here
1292
-        gA3 = ggplot(dataCur, aes(peak_gene.p_raw, color = .data[[newColName]])) + geom_density(size = 0.2)  +
1293
-          facet_wrap(~r_positive, labeller = labeller(r_positive = r_positive_label), nrow = 2) +
1294
-          geom_density(aes(color = classNew), color = "black",  linetype = "dotted", alpha = 1) + 
1295
-          xlab(xlabel) + ylab("Density (for real data only)") +  theme_bw() +
1296
-          scale_color_manual(newColName, values = mycolors, labels = var.label, drop = FALSE ) +
1297
-          theme(axis.text=element_text(size=rel(0.6)), strip.text.x = element_text(size = rel(0.8)), 
1298
-                legend.text=element_text(size=rel(0.6)), legend.position = "none")
1299
-        
1300
-        # The following causes an error with patchwork
1301
-        # gA3 = ggplot(dataCur, aes(peak_gene.p_raw, color = .data[[newColName]])) + geom_density(size = 0.2)  +
1302
-        #   facet_wrap(~ class + r_positive, labeller = labeller(class=freq_class, r_positive = r_positive_label), nrow = 2) +
1303
-        #   geom_density(aes(color = classNew), color = "black",  linetype = "dotted", alpha = 1) + 
1304
-        #   xlab(xlabel) + ylab("Density") +  theme_bw() +
1305
-        #   scale_color_manual(newColName, values = mycolors, labels = var.label, drop = FALSE ) +
1306
-        #   theme(axis.text=element_text(size=rel(0.6)), strip.text.x = element_text(size = rel(0.8)), 
1307
-        #         legend.text=element_text(size=rel(0.6)), legend.position = "none")
1308
-        
1309
-        
1310
-        # Ratios for r+ / r-
1311
-        freq =  dataCur %>%
1312
-          dplyr::group_by(class, .data[[newColName]], peak_gene.p.raw.class, r_positive) %>%
1313
-          dplyr::summarise(n = dplyr::n()) %>%
1314
-          dplyr::ungroup() %>%
1315
-          tidyr::complete(class, .data[[newColName]], peak_gene.p.raw.class, r_positive, fill = list(n = 0)) %>% # SOme cases might be missing
1316
-          dplyr::group_by(class, .data[[newColName]], peak_gene.p.raw.class) %>% # dont group by r_positive because we want to calculate the ratio within each group
1317
-          dplyr::mutate(  
1318
-            n = .data$n + 1, # to allow ratios to be computed even for 0 counts
1319
-            ratio_pos_raw = .data$n[r_positive] / .data$n[!r_positive]) %>%
1320
-          dplyr::filter(r_positive, class == names(dist_class)[1])# Keep only one r_positive row per grouping as we operate via the ratio and this data is duplicated otherwise. Remove random data also because these have been filtered out before are only back due to the complete invokation.
1321
-        
1322
-        # Cap ratios > 10 at 10 to avoid visual artefacts
1323
-        freq$ratio_pos_raw[which(freq$ratio_pos_raw > 10)] = 10
1324
-        
1325
-        # Without proper colors for now, this will be added after the next plot
1326
-        gB3 = ggplot(freq, aes(peak_gene.p.raw.class, ratio_pos_raw, fill = .data[[newColName]])) + 
1327
-          geom_bar(stat = "identity", position="dodge", na.rm = TRUE, width = 0.8) + 
1328
-          geom_hline(yintercept = 1, linetype = "dotted") + 
1329
-          xlab(xlabel) + ylab("Ratio r+ / r- (capped at 10)") +
1330
-          scale_x_discrete(labels = xlabels_peakGene_praw.class) +
1331
-          scale_fill_manual (varCur, values = mycolors, labels = var.label, drop = FALSE )  +
1332
-          theme_bw() +  
1333
-          #theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8), strip.background = element_blank(), strip.placement = "outside", axis.title.y = element_blank()) +
1334
-          # theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8) , axis.title.y = element_blank()) +
1335
-          theme_main +
1336
-          facet_wrap(~ factor(class), nrow = 2, scales = "free_y", strip.position = "left", labeller = labeller(class=freq_class)) 
1337
-        
1338
-        
1339
-        plots_all = ( ((gA3 | gB3 ) + 
1340
-                         patchwork::plot_layout(widths = c(2.5,1.5))) ) + 
1341
-          patchwork::plot_layout(guides = 'collect') +
1342
-          patchwork::plot_annotation(title = mainTitle, theme = theme(plot.title = element_text(hjust = 0.5)))
1343
-        
1344
-        plot(plots_all)
1345
-        
1346
-        
1347
-        # VERSION JUDITH: simplified peak.gene distance + another variable as histogram, no pemuted data
1348
-        
1349
-        datasetName = ""
1350
-        if (!is.null(GRN@config$metadata$name)) {
1351
-          datasetName = GRN@config$metadata$name
1350
+        if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
1351
+            
1352
+            
1353
+            # Class in the facet_wrap has been removed, as this is only for real data here
1354
+            gA3 = ggplot(dataCur, aes(peak_gene.p_raw, color = .data[[newColName]])) + geom_density(size = 0.2)  +
1355
+                facet_wrap(~r_positive, labeller = labeller(r_positive = r_positive_label), nrow = 2) +
1356
+                geom_density(aes(color = classNew), color = "black",  linetype = "dotted", alpha = 1) + 
1357
+                xlab(xlabel) + ylab("Density (for real data only)") +  theme_bw() +
1358
+                scale_color_manual(newColName, values = mycolors, labels = var.label, drop = FALSE ) +
1359
+                theme(axis.text=element_text(size=rel(0.6)), strip.text.x = element_text(size = rel(0.8)), 
1360
+                      legend.text=element_text(size=rel(0.6)), legend.position = "none")
1361
+            
1362
+            # Ratios for r+ / r-
1363
+            freq =  dataCur %>%
1364
+                dplyr::group_by(class, .data[[newColName]], peak_gene.p.raw.class, r_positive) %>%
1365
+                dplyr::summarise(n = dplyr::n()) %>%
1366
+                dplyr::ungroup() %>%
1367
+                tidyr::complete(class, .data[[newColName]], peak_gene.p.raw.class, r_positive, fill = list(n = 0)) %>% # SOme cases might be missing
1368
+                dplyr::group_by(class, .data[[newColName]], peak_gene.p.raw.class) %>% # dont group by r_positive because we want to calculate the ratio within each group
1369
+                dplyr::mutate(  
1370
+                    n = .data$n + 1, # to allow ratios to be computed even for 0 counts
1371
+                    ratio_pos_raw = .data$n[r_positive] / .data$n[!r_positive]) %>%
1372
+                dplyr::filter(r_positive, class == names(dist_class)[1])# Keep only one r_positive row per grouping as we operate via the ratio and this data is duplicated otherwise. Remove random data also because these have been filtered out before are only back due to the complete invokation.
1373
+            
1374
+            # Cap ratios > 10 at 10 to avoid visual artefacts
1375
+            freq$ratio_pos_raw[which(freq$ratio_pos_raw > 10)] = 10
1376
+            
1377
+            # Without proper colors for now, this will be added after the next plot
1378
+            gB3 = ggplot(freq, aes(peak_gene.p.raw.class, ratio_pos_raw, fill = .data[[newColName]])) + 
1379
+                geom_bar(stat = "identity", position="dodge", na.rm = TRUE, width = 0.8) + 
1380
+                geom_hline(yintercept = 1, linetype = "dotted") + 
1381
+                xlab(xlabel) + ylab("Ratio r+ / r- (capped at 10)") +
1382
+                scale_x_discrete(labels = xlabels_peakGene_praw.class) +
1383
+                scale_fill_manual (varCur, values = mycolors, labels = var.label, drop = FALSE )  +
1384
+                theme_bw() +  
1385
+                #theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8), strip.background = element_blank(), strip.placement = "outside", axis.title.y = element_blank()) +
1386
+                # theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8) , axis.title.y = element_blank()) +
1387
+                theme_main +
1388
+                facet_wrap(~ factor(class), nrow = 2, scales = "free_y", strip.position = "left", labeller = labeller(class=freq_class)) 
1389
+            
1390
+            
1391
+            plots_all = ( ((gA3 | gB3 ) + 
1392
+                               patchwork::plot_layout(widths = c(2.5,1.5))) ) + 
1393
+                patchwork::plot_layout(guides = 'collect') +
1394
+                patchwork::plot_annotation(title = mainTitle, theme = theme(plot.title = element_text(hjust = 0.5)))
1395
+            
1396
+            plot(plots_all)
1352 1397
         }
1353
-        
1354
-        mainTitle2 = paste0(datasetName, "\nSummary QC (TF: ", TFCur, ", gene type: ", paste0(geneTypesSelected, collapse = "+"), ", ",
1355
-                            .prettyNum(range), " bp promoter range, stratified by distance + ", varCur, ")")
1356
-        
1357
-        # r+ and r-
1358
-        binwidth = 0.1
1359
-        mycolors <- viridis::viridis(2)
1360
-        xlabel = paste0("Correlation raw p-value (", binwidth, " bins)")
1361
-        
1362
-        dataCur = dataCur %>%
1363
-          dplyr::mutate(peak_gene.distance.class250k = factor(dplyr::if_else(peak_gene.distance <= 250000, "<=250k", ">250k"))) %>%
1364
-          dplyr::select(-peak_gene.distance)
1365
-        
1366
-        # closed = "left", boundary = 0, ensure correct numbers. See https://github.com/tidyverse/ggplot2/issues/1739
1367
-        # Without boundary = 0, counts are actually wrong
1398
+        pageCounter = pageCounter + 1
1368 1399
         
1369 1400
         
1370
-        nrows_plot = 2
1371
-        if (length(unique(dataCur[[newColName]])) > 9) {
1372
-          nrows_plot = 3
1401
+        # VERSION JUDITH: simplified peak.gene distance + another variable as histogram, no permuted data
1402
+        if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
1403
+            
1404
+            datasetName = ""
1405
+            if (!is.null(GRN@config$metadata$name)) {
1406
+                datasetName = GRN@config$metadata$name
1407
+            }
1408
+            
1409
+            mainTitle2 = paste0(datasetName, "\nSummary QC (TF: ", TFCur, ", gene type: ", paste0(geneTypesSelected, collapse = "+"), ", ",
1410
+                                .prettyNum(range), " bp promoter range, stratified by distance + ", varCur, ")")
1411
+            
1412
+            # r+ and r-
1413
+            binwidth = 0.1
1414
+            mycolors <- viridis::viridis(2)
1415
+            xlabel = paste0("Correlation raw p-value (", binwidth, " bins)")
1416
+            
1417
+            dataCur = dataCur %>%
1418
+                dplyr::mutate(peak_gene.distance.class250k = factor(dplyr::if_else(peak_gene.distance <= 250000, "<=250k", ">250k"))) %>%
1419
+                dplyr::select(-peak_gene.distance)
1420
+            
1421
+            # closed = "left", boundary = 0, ensure correct numbers. See https://github.com/tidyverse/ggplot2/issues/1739
1422
+            # Without boundary = 0, counts are actually wrong
1423
+            
1424
+            
1425
+            nrows_plot = 2
1426
+            if (length(unique(dataCur[[newColName]])) > 9) {
1427
+                nrows_plot = 3
1428
+            }
1429
+            
1430
+            gA5 = ggplot(dataCur, aes(peak_gene.p_raw, fill = r_positive)) + 
1431
+                geom_histogram(binwidth = binwidth, position="dodge", closed = "left", boundary = 0)  +
1432
+                facet_wrap(~ peak_gene.distance.class250k  + .data[[newColName]], nrow = nrows_plot, scales = "free_y") +
1433
+                xlab(xlabel) + ylab(paste0("Abundance for classes with n>=", nGroupsMin)) +  theme_bw() +
1434
+                ggtitle(mainTitle2) + 
1435
+                scale_fill_manual("Class for r", values = mycolors, labels = r_positive_label, drop = FALSE ) +
1436
+                theme(axis.text=element_text(size=rel(0.6)), strip.text.x = element_text(size = rel(0.6)), 
1437
+                      legend.text=element_text(size=rel(0.7)))
1438
+            
1439
+            
1440
+            plot(gA5)
1373 1441
         }
1374
-        
1375
-        gA5 = ggplot(dataCur, aes(peak_gene.p_raw, fill = r_positive)) + 
1376
-          geom_histogram(binwidth = binwidth, position="dodge", closed = "left", boundary = 0)  +
1377
-          facet_wrap(~ peak_gene.distance.class250k  + .data[[newColName]], nrow = nrows_plot, scales = "free_y") +
1378
-          xlab(xlabel) + ylab(paste0("Abundance for classes with n>=", nGroupsMin)) +  theme_bw() +
1379
-          ggtitle(mainTitle2) + 
1380
-          scale_fill_manual("Class for r", values = mycolors, labels = r_positive_label, drop = FALSE ) +
1381
-          theme(axis.text=element_text(size=rel(0.6)), strip.text.x = element_text(size = rel(0.6)), 
1382
-                legend.text=element_text(size=rel(0.7)))
1383
-        
1384
-        
1385
-        plot(gA5)
1442
+        pageCounter = pageCounter + 1
1386 1443
         
1387 1444
         
1388 1445
         
... ...
@@ -1401,21 +1458,29 @@ plotDiagnosticPlots_peakGene <- function(GRN,
1401 1458
       indexFilt = intersect(indexFilt, indexCur)
1402 1459
       
1403 1460
       if (length(indexFilt) > 0) {
1404
-        g = ggplot(peakGeneCorrelations.all[indexFilt,], aes(peak_gene.p_raw, color = peak_gene.distance_class_abs)) + geom_density() + 
1405
-          ggtitle(paste0("Density of the raw p-value distributions")) + 
1406
-          facet_wrap(~ r_positive, ncol = 2, labeller = labeler_r_pos) + 
1407
-          scale_color_viridis_d(labels = .classFreq_label(table(peakGeneCorrelations.all[indexFilt,]$peak_gene.distance_class_abs))) +
1408
-          theme_bw()
1409
-        plot(g)
1410
-        
1461
+          
1462
+          if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
1463
+              g = ggplot(peakGeneCorrelations.all[indexFilt,], aes(peak_gene.p_raw, color = peak_gene.distance_class_abs)) + geom_density() + 
1464
+                  ggtitle(paste0("Density of the raw p-value distributions")) + 
1465
+                  facet_wrap(~ r_positive, ncol = 2, labeller = labeler_r_pos) + 
1466
+                  scale_color_viridis_d(labels = .classFreq_label(table(peakGeneCorrelations.all[indexFilt,]$peak_gene.distance_class_abs))) +
1467
+                  theme_bw()
1468
+              plot(g)
1469
+          }
1470
+        pageCounter = pageCounter + 1
1411 1471
         
1412 1472
         if (includeRobustCols) {
1413
-          g = ggplot(peakGeneCorrelations.all[indexFilt,], aes(peak_gene.p_raw.robust, color = peak_gene.distance_class_abs)) + geom_density() + 
1414
-            ggtitle(paste0("Density of the raw p-value distributions\n(stratified by whether r is positive)")) + 
1415
-            facet_wrap(~ r_positive, ncol = 2, labeller = labeler_r_pos) + 
1416
-            scale_color_viridis_d(labels = .classFreq_label(table(peakGeneCorrelations.all[indexFilt,]$peak_gene.distance_class_abs))) +
1417
-            theme_bw()
1418
-          plot(g)
1473
+            
1474
+            if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
1475
+                
1476
+                g = ggplot(peakGeneCorrelations.all[indexFilt,], aes(peak_gene.p_raw.robust, color = peak_gene.distance_class_abs)) + geom_density() + 
1477
+                    ggtitle(paste0("Density of the raw p-value distributions\n(stratified by whether r is positive)")) + 
1478
+                    facet_wrap(~ r_positive, ncol = 2, labeller = labeler_r_pos) + 
1479
+                    scale_color_viridis_d(labels = .classFreq_label(table(peakGeneCorrelations.all[indexFilt,]$peak_gene.distance_class_abs))) +
1480
+                    theme_bw()
1481
+                plot(g)
1482
+            }
1483
+          pageCounter = pageCounter + 1
1419 1484
           
1420 1485
           
1421 1486
         }
... ...
@@ -1426,13 +1491,17 @@ plotDiagnosticPlots_peakGene <- function(GRN,
1426 1491
       
1427 1492
       
1428 1493
       if (length(indexFilt) > 0) {
1429
-        g = ggplot(peakGeneCorrelations.all[indexFilt,], aes(peak_gene.p_raw, color = r_positive)) + 
1430
-          geom_density() + 
1431
-          ggtitle(paste0("Density of the raw p-value distributions")) + 
1432
-          facet_wrap(~ peak_gene.distance_class_abs,  ncol = 2, labeller = .customLabeler(table(peakGeneCorrelations.all$peak_gene.distance_class_abs))) +
1433
-          scale_color_manual(labels = .classFreq_label(table(peakGeneCorrelations.all[indexFilt,]$r_positive)), values = r_pos_class) +
1434
-          theme_bw()
1435
-        plot(g)
1494
+          
1495
+        if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
1496
+            g = ggplot(peakGeneCorrelations.all[indexFilt,], aes(peak_gene.p_raw, color = r_positive)) + 
1497
+                geom_density() + 
1498
+                ggtitle(paste0("Density of the raw p-value distributions")) + 
1499
+                facet_wrap(~ peak_gene.distance_class_abs,  ncol = 2, labeller = .customLabeler(table(peakGeneCorrelations.all$peak_gene.distance_class_abs))) +
1500
+                scale_color_manual(labels = .classFreq_label(table(peakGeneCorrelations.all[indexFilt,]$r_positive)), values = r_pos_class) +
1501
+                theme_bw()
1502
+            plot(g)
1503
+        }
1504
+        pageCounter = pageCounter + 1
1436 1505
         
1437 1506
         
1438 1507
       }
... ...
@@ -1441,15 +1510,18 @@ plotDiagnosticPlots_peakGene <- function(GRN,
1441 1510
       #
1442 1511
       # Focus on peak_gene.r
1443 1512
       #
1513
+      if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
1514
+          
1515
+          g = ggplot(peakGeneCorrelations.all[indexCur,], aes(peak_gene.r, color = peak_gene.distance_class_abs)) + geom_density() + 
1516
+              geom_density(data = peakGeneCorrelations.all[indexCur,], aes(peak_gene.r), color = "black") + 
1517
+              ggtitle(paste0("Density of the correlation coefficients")) + 
1518
+              scale_color_viridis_d(labels = .classFreq_label(table(peakGeneCorrelations.all[indexCur,]$peak_gene.distance_class_abs))) +
1519
+              theme_bw()
1520
+          plot(g)
1521
+      }
1522
+      pageCounter = pageCounter + 1
1444 1523
       
1445
-      g = ggplot(peakGeneCorrelations.all[indexCur,], aes(peak_gene.r, color = peak_gene.distance_class_abs)) + geom_density() + 
1446
-        geom_density(data = peakGeneCorrelations.all[indexCur,], aes(peak_gene.r), color = "black") + 
1447
-        ggtitle(paste0("Density of the correlation coefficients")) + 
1448
-        scale_color_viridis_d(labels = .classFreq_label(table(peakGeneCorrelations.all[indexCur,]$peak_gene.distance_class_abs))) +
1449
-        theme_bw()
1450
-      plot(g)
1451
-      
1452
-      
1524
+      .printWarningPageNumber(pages, pageCounter)
1453 1525
       if (!is.null(fileBase)) {
1454 1526
         grDevices::dev.off()
1455 1527
       }
... ...
@@ -1468,108 +1540,6 @@ plotDiagnosticPlots_peakGene <- function(GRN,
1468 1540
   
1469 1541
 }
1470 1542
 
1471
-.createTables_peakGeneQC <- function(peakGeneCorrelations.all.cur, networkType_details, colors_vec, range) {
1472
-  
1473
-  d = peakGeneCorrelations.all.cur %>% 
1474
-    dplyr::group_by(r_positive, class, peak_gene.p.raw.class) %>%
1475
-    dplyr::summarise(n = dplyr::n()) %>%
1476
-    dplyr::ungroup()
1477
-  
1478
-  # Some classes might be missing, add them here with explicit zeros
1479
-  for (r_pos in c(TRUE, FALSE)) {
1480
-    for (classCur in networkType_details){
1481
-      for (pclassCur in levels(peakGeneCorrelations.all.cur$peak_gene.p.raw.class)) {
1482
-        
1483
-        row = which(d$r_positive == r_pos & d$class == classCur & as.character(d$peak_gene.p.raw.class) == as.character(pclassCur))
1484
-        if (length(row) == 0) {
1485
-          d = tibble::add_row(d, r_positive = r_pos, class = classCur, peak_gene.p.raw.class = pclassCur, n = 0)
1486
-        }
1487
-      }
1488
-    }
1489
-  }
1490
-  
1491
-  # Restore the "ordered" factor for class
1492
-  d$peak_gene.p.raw.class = factor(d$peak_gene.p.raw.class, ordered = TRUE, levels =  levels(peakGeneCorrelations.all.cur$peak_gene.p.raw.class))
1493
-  
1494
-  
1495
-  # Normalization factors
1496
-  dsum = d %>%
1497
-    dplyr::group_by(r_positive, class) %>%
1498
-    dplyr::summarise(sum_n = sum(.data$n))
1499
-  
1500
-  
1501
-  # Summarize per bin
1502
-  d2 = d %>%
1503
-    dplyr::group_by(class, peak_gene.p.raw.class)%>%
1504
-    dplyr::summarise(sum_pos = .data$n[r_positive],
1505
-                     sum_neg = .data$n[!r_positive]) %>%
1506
-    dplyr::mutate(ratio_pos_raw = sum_pos / sum_neg,
1507
-                  enrichment_pos = sum_pos / (sum_pos + sum_neg),
1508
-                  ratio_neg = 1 - enrichment_pos) %>%
1509
-    dplyr::ungroup()
1510
-  
1511
-  # Compare between real and permuted
1512
-  normFactor_real = dplyr::filter(dsum, class ==  !! (networkType_details[1])) %>%  dplyr::pull(sum_n) %>% sum() /
1513
-    dplyr::filter(dsum, class ==  !! (networkType_details[2])) %>%  dplyr::pull(sum_n) %>% sum()
1514
-  
1515
-  # ratio_norm not used currently, no normalization necessary here or not even useful because we dont want to normalize the r_pos and r_neg ratios: These are signal in a way. Only when comparing between real and permuted, we have to account for sample size for corrections
1516
-  d3 = d %>%
1517
-    dplyr::group_by(peak_gene.p.raw.class, r_positive) %>%
1518
-    dplyr::summarise(n_real     = .data$n[class == !! (names(colors_vec)[1]) ],
1519
-                     n_permuted = .data$n[class == !! (names(colors_vec)[2]) ]) %>%
1520
-    dplyr::ungroup() %>%
1521
-    dplyr::mutate(ratio_real_raw = n_real / n_permuted,
1522
-                  ratio_real_norm = ratio_real_raw / normFactor_real,
1523
-                  enrichment_real      = n_real / (n_real + n_permuted),
1524
-                  enrichment_real_norm = (n_real / normFactor_real) / ((n_real / normFactor_real) + n_permuted)) 
1525
-  
1526
-  
1527
-  stopifnot(identical(levels(d2$peak_gene.p.raw.class), levels(d3$peak_gene.p.raw.class)))
1528
-  # 2 enrichment bar plots but combined using facet_wrap
1529
-  d2$set = "r+ / r-"; d3$set = "real / permuted" 
1530
-  d_merged <- tibble::tibble(peak_gene.p.raw.class = c(as.character(d2$peak_gene.p.raw.class), 
1531
-                                                       as.character(d3$peak_gene.p.raw.class)),
1532
-                             ratio = c(d2$ratio_pos_raw, d3$ratio_real_norm),
1533
-                             classAll = c(as.character(d2$class), d3$r_positive),
1534
-                             set = c(d2$set, d3$set)) %>%
1535
-    dplyr::mutate(classAll = factor(classAll, levels=c(paste0("real_",range), paste0("random_",range), "TRUE", "FALSE")),
1536
-                  peak_gene.p.raw.class = factor(peak_gene.p.raw.class, levels = levels(d2$peak_gene.p.raw.class)))
1537
-  
1538
-  d4 = tibble::tibble(peak_gene.p.raw.class = unique(d$peak_gene.p.raw.class), 
1539
-                      n_rpos_real = NA_integer_, n_rpos_random = NA_integer_,
1540
-                      n_rneg_real = NA_integer_, n_rneg_random = NA_integer_,
1541
-                      ratio_permuted_real_rpos_norm = NA_real_,
1542
-                      ratio_permuted_real_rneg_norm = NA_real_)
1543
-  
1544
-  for (i in seq_len(nrow(d4))) {
1545
-    row_d2 = which(d2$class == networkType_details[1] & d2$peak_gene.p.raw.class == d4$peak_gene.p.raw.class[i])
1546
-    stopifnot(length(row_d2) == 1)
1547
-    d4[i, "n_rpos_real"] = d2[row_d2, "sum_pos"] %>% unlist()
1548
-    d4[i, "n_rneg_real"] = d2[row_d2, "sum_neg"] %>% unlist()
1549
-    row_d2 = which(d2$class == paste0("random_",range) & d2$peak_gene.p.raw.class == d4$peak_gene.p.raw.class[i])
1550
-    d4[i, "n_rpos_random"] = d2[row_d2, "sum_pos"] %>% unlist()
1551
-    d4[i, "n_rneg_random"] = d2[row_d2, "sum_neg"] %>% unlist()
1552
-    
1553
-    row_d3 = which(d3$r_positive == TRUE & d3$peak_gene.p.raw.class == d4$peak_gene.p.raw.class[i])
1554
-    d4[i, "ratio_permuted_real_rpos_norm"] = 1- d3[row_d3, "ratio_real_norm"] %>% unlist()
1555
-    row_d3 = which(d3$r_positive == FALSE & d3$peak_gene.p.raw.class == d4$peak_gene.p.raw.class[i])
1556
-    d4[i, "ratio_permuted_real_rneg_norm"] = 1- d3[row_d3, "ratio_real_norm"] %>% unlist()
1557
-  }
1558
-  
1559
-  d4 = d4 %>%
1560
-    dplyr::mutate(ratio_rneg_rpos_real = n_rneg_real / (n_rneg_real + n_rpos_real),
1561
-                  ratio_rneg_rpos_random = n_rneg_random / (n_rneg_random + n_rpos_random),
1562
-                  peak_gene.p.raw.class.bin = as.numeric(peak_gene.p.raw.class)) %>%
1563
-    dplyr::arrange(peak_gene.p.raw.class.bin)
1564
-  
1565
-  d4_melt = reshape2::melt(d4, id  = c("peak_gene.p.raw.class.bin", "peak_gene.p.raw.class")) %>%
1566
-    dplyr::filter(grepl("ratio", variable))
1567
-  
1568
-  
1569
-  list(d = d, d2 = d2, d3 = d3, d4 = d4, d4_melt = d4_melt, d_merged = d_merged)
1570
-  
1571
-}
1572
-
1573 1543
 
1574 1544
 
1575 1545
 .customLabeler <- function(tbl_freq) {
... ...
@@ -1578,9 +1548,7 @@ plotDiagnosticPlots_peakGene <- function(GRN,
1578 1548
   ggplot2::as_labeller(tbl_freq_label)
1579 1549
 }
1580 1550
 
1581
-.classFreq_label <- function(tbl_freq) {
1582
-  paste0(names(tbl_freq), " (", tbl_freq, ")")
1583
-}
1551
+
1584 1552
 
1585 1553
 
1586 1554
 ###### Connection summaries ########
... ...
@@ -1594,6 +1562,7 @@ plotDiagnosticPlots_peakGene <- function(GRN,
1594 1562
 #' @template plotAsPDF
1595 1563
 #' @template pdf_width
1596 1564
 #' @template pdf_height
1565
+#' @template pages
1597 1566
 #' @template forceRerun
1598 1567
 #' @return The same \code{\linkS4class{GRN}} object, without modifications. In addition, for the specified \code{type}, a PDF file (default filename is \code{GRN.connectionSummary_{type}.pdf}) is produced with a connection summary.
1599 1568
 #' @examples 
... ...
@@ -1604,7 +1573,7 @@ plotDiagnosticPlots_peakGene <- function(GRN,
1604 1573
 #' @importFrom circlize colorRamp2
1605 1574
 plot_stats_connectionSummary <- function(GRN, type = "heatmap", 
1606 1575
                                          outputFolder = NULL, basenameOutput = NULL, 
1607
-                                         plotAsPDF = TRUE, pdf_width = 12, pdf_height = 12, 
1576
+                                         plotAsPDF = TRUE, pdf_width = 12, pdf_height = 12, pages = NULL,
1608 1577
                                          forceRerun = FALSE) {
1609 1578
   
1610 1579
   GRN = .addFunctionLogToObject(GRN)
... ...
@@ -1614,6 +1583,7 @@ plot_stats_connectionSummary <- function(GRN, type = "heatmap",
1614 1583
   checkmate::assertFlag(plotAsPDF)
1615 1584
   checkmate::assertNumeric(pdf_width, lower = 5, upper = 99)
1616 1585
   checkmate::assertNumeric(pdf_height, lower = 5, upper = 99)
1586
+  checkmate::assert(checkmate::checkNull(pages), checkmate::checkIntegerish(pages, lower = 1))
1617 1587
   checkmate::assertFlag(forceRerun)
1618 1588
   checkmate::assert(checkmate::checkNull(outputFolder), checkmate::checkCharacter(outputFolder, min.chars = 1))
1619 1589
   checkmate::assert(checkmate::checkNull(basenameOutput), checkmate::checkCharacter(basenameOutput, len = 1, min.chars = 1, any.missing = FALSE))
... ...
@@ -1627,7 +1597,7 @@ plot_stats_connectionSummary <- function(GRN, type = "heatmap",
1627 1597
       file = NULL
1628 1598
     }
1629 1599
     
1630
-    .plot_stats_connectionSummaryHeatmap(GRN, file = file, pdf_width = pdf_width, pdf_height = pdf_height, forceRerun = forceRerun) 
1600
+    .plot_stats_connectionSummaryHeatmap(GRN, file = file, pdf_width = pdf_width, pdf_height = pdf_height, pages = pages, forceRerun = forceRerun) 
1631 1601
   
1632 1602
   } else if (type ==  "boxplot") {
1633 1603
     
... ...
@@ -1637,7 +1607,7 @@ plot_stats_connectionSummary <- function(GRN, type = "heatmap",
1637 1607
       file = NULL
1638 1608
     }
1639 1609
     
1640
-    .plot_stats_connectionSummaryBoxplot(GRN, file = file, pdf_width = pdf_width, pdf_height = pdf_height, forceRerun = forceRerun) 
1610
+    .plot_stats_connectionSummaryBoxplot(GRN, file = file, pdf_width = pdf_width, pdf_height = pdf_height, pages = pages, forceRerun = forceRerun) 
1641 1611
     
1642 1612
   } else if (type ==  "density") {
1643 1613
     
... ...
@@ -1647,7 +1617,7 @@ plot_stats_connectionSummary <- function(GRN, type = "heatmap",
1647 1617
     } else {
1648 1618
       file = NULL
1649 1619
     }
1650
-    .plot_stats_connectionSummaryDensity(GRN, file = file, pdf_width = pdf_width, pdf_height = pdf_height, forceRerun = forceRerun) 
1620
+    .plot_stats_connectionSummaryDensity(GRN, file = file, pdf_width = pdf_width, pdf_height = pdf_height, pages = pages, forceRerun = forceRerun) 
1651 1621
   }
1652 1622
   
1653 1623
   GRN
... ...
@@ -1655,13 +1625,13 @@ plot_stats_connectionSummary <- function(GRN, type = "heatmap",
1655 1625
 }
1656 1626
 
1657 1627
 
1658
-.plot_stats_connectionSummaryHeatmap <- function(GRN, file = NULL, pdf_width = 12, pdf_height = 12, forceRerun = FALSE) {
1628
+.plot_stats_connectionSummaryHeatmap <- function(GRN, file = NULL, pdf_width = 12, pdf_height = 12, pages = NULL, forceRerun = FALSE) {
1659 1629
   
1660 1630
   start = Sys.time()
1661 1631
   
1662 1632
   futile.logger::flog.info(paste0("Plotting connection summary", dplyr::if_else(is.null(file), "", paste0(" to file ", file))))
1663 1633
   
1664
-  if ((!is.null(file) && !file.exists(file))| forceRerun) {
1634
+  if ((!is.null(file) && !file.exists(file)) | is.null(file) | forceRerun) {
1665 1635
     
1666 1636
     if (nrow(GRN@stats$connections) == 0) {
1667 1637
       message = paste0("Statistics summary missing from object, please run the function generateStatsSummary first")
... ...
@@ -1676,7 +1646,7 @@ plot_stats_connectionSummary <- function(GRN, type = "heatmap",
1676 1646
       grDevices::pdf(file = file, width = pdf_width, height = pdf_height)
1677 1647
     }
1678 1648
     
1679
-    
1649
+    pageCounter = 1 
1680 1650
     for (allowMissingTFsCur in unique(GRN@stats$connections$allowMissingTFs)) {
1681 1651
       
1682 1652
       for (allowMissingGenesCur in unique(GRN@stats$connections$allowMissingGenes)) {
... ...
@@ -1723,30 +1693,31 @@ plot_stats_connectionSummary <- function(GRN, type = "heatmap",
1723 1693
               permIndex = as.character(permCur)
1724 1694
               permSuffix = paste0(dplyr::if_else(permCur == 0, " (real)", " (permuted)"))
1725 1695
               
1726
-              titlePlot =  paste0("Number of unique ", gsub("^n", "", elemCur), permSuffix,
1727
-                                  "\nallowMissingTFs: ", allowMissingTFsCur, 
1728
-                                  ", allowMissingGenes: ", allowMissingGenesCur, 
1729
-                                  ",\n TF_peak.connectionType: ", TF_peak.connectionTypeCur)
1730
-              
1731
-              
1732
-              colors = circlize::colorRamp2(c(0, maxDataRange), c("white", "red"))
1733
-              
1734
-              ComplexHeatmap::Heatmap(
1735
-                plotData.l[[permIndex]],
1736
-                name = "Number of\nconnections",
1737
-                col = colors,
1738
-                cluster_columns = FALSE, cluster_rows = FALSE,
1739
-                row_names_side = "right", row_names_gp = grid::gpar(fontsize = 10), 
1740
-                column_title = titlePlot,
1741
-                column_names_gp = grid::gpar(fontsize = 10),
1742
-                row_title = "TF-peak FDR",
1743
-                cell_fun = function(j, i, x, y, width, height, fill) {
1744
-                  grid::grid.text(sprintf("%.0f", plotData.l[[permIndex]][i, j]), x, y, gp = grid::gpar(fontsize = 10))
1745
-                }
1746
-              ) %>% plot()
1747
-              
1748
-              
1749
-              
1696
+              if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
1697
+                  
1698
+                  titlePlot =  paste0("Number of unique ", gsub("^n", "", elemCur), permSuffix,
1699
+                                      "\nallowMissingTFs: ", allowMissingTFsCur, 
1700
+                                      ", allowMissingGenes: ", allowMissingGenesCur, 
1701
+                                      ",\n TF_peak.connectionType: ", TF_peak.connectionTypeCur)
1702
+                  
1703
+                  colors = circlize::colorRamp2(c(0, maxDataRange), c("white", "red"))
1704
+                  
1705
+                  ComplexHeatmap::Heatmap(
1706
+                      plotData.l[[permIndex]],
1707
+                      name = "Number of\nconnections",
1708
+                      col = colors,
1709
+                      cluster_columns = FALSE, cluster_rows = FALSE,
1710
+                      row_names_side = "right", row_names_gp = grid::gpar(fontsize = 10), 
1711
+                      column_title = titlePlot,
1712
+                      column_names_gp = grid::gpar(fontsize = 10),
1713
+                      row_title = "TF-peak FDR",
1714
+                      cell_fun = function(j, i, x, y, width, height, fill) {
1715
+                          grid::grid.text(sprintf("%.0f", plotData.l[[permIndex]][i, j]), x, y, gp = grid::gpar(fontsize = 30))
1716
+                      }
1717
+                  ) %>% plot()
1718
+                  
1719
+              }
1720
+              pageCounter = pageCounter + 1
1750 1721
               
1751 1722
             }
1752 1723
             
... ...
@@ -1757,6 +1728,7 @@ plot_stats_connectionSummary <- function(GRN, type = "heatmap",
1757 1728
       }
1758 1729
     }
1759 1730
     
1731
+    .printWarningPageNumber(pages, pageCounter)
1760 1732
     if (!is.null(file)) {
1761 1733
       grDevices::dev.off()
1762 1734
     }
... ...
@@ -1768,7 +1740,7 @@ plot_stats_connectionSummary <- function(GRN, type = "heatmap",
1768 1740
   
1769 1741
 }
1770 1742
 
1771
-.plot_stats_connectionSummaryDensity <- function(GRN, file = file, TF_peak.connectionType, allowMissingTFs = FALSE, allowMissingGenes = FALSE, pdf_width = 12, pdf_height = 12, forceRerun = FALSE) {
1743
+.plot_stats_connectionSummaryDensity <- function(GRN, file = file, TF_peak.connectionType, allowMissingTFs = FALSE, allowMissingGenes = FALSE, pdf_width = 12, pdf_height = 12, pages = NULL, forceRerun = FALSE) {
1772 1744
   
1773 1745
   # TODO
1774 1746
   # Same for both permutations, in one plot
... ...
@@ -1788,11 +1760,11 @@ plot_stats_connectionSummary <- function(GRN, type = "heatmap",
1788 1760
 
1789 1761
 
1790 1762
 # Compare real and permuted one, replicate Darias plots
1791
-.plot_stats_connectionSummaryBoxplot <- function(GRN, file = NULL, pdf_width = 12, pdf_height = 12, forceRerun = FALSE) {
1763
+.plot_stats_connectionSummaryBoxplot <- function(GRN, file = NULL, pdf_width = 12, pdf_height = 12, pages = NULL, forceRerun = FALSE) {
1792 1764
   
1793 1765
   start = Sys.time()
1794 1766
   
1795
-  if ((!is.null(file) && !file.exists(file))| forceRerun) {
1767
+  if ((!is.null(file) && !file.exists(file))| is.null(file) | forceRerun) {
1796 1768
     
1797 1769
     
1798 1770
     futile.logger::flog.info(paste0("Plotting diagnostic plots for network connections", dplyr::if_else(is.null(file), "", paste0(" to file ", file))))
... ...
@@ -1816,7 +1788,7 @@ plot_stats_connectionSummary <- function(GRN, type = "heatmap",
1816 1788
     pb <- progress::progress_bar$new(total = nElemsTotal)
1817 1789
     
1818 1790
     
1819
-    
1791
+    pageCounter = 1 
1820 1792
     for (TF_peak.fdr_cur in rev(TF_peak.fdrs)) {
1821 1793
       
1822 1794
       for (peak_gene.fdr_cur in peak_gene.fdrs) {
... ...
@@ -1828,64 +1800,70 @@ plot_stats_connectionSummary <- function(GRN, type = "heatmap",
1828 1800
             for (TF_peak.connectionTypeCur in .getAll_TF_peak_connectionTypes(GRN)) {
1829 1801
               
1830 1802
               pb$tick()
1831
-              elemCur = "TF"
1832
-              dataCur0.df = as.data.frame(stats_details.l[["0"]][[TF_peak.fdr_cur]][[peak_gene.fdr_cur]] [[allowMissingTFsCur]] [[allowMissingGenesCur]] [[TF_peak.connectionTypeCur]] [[elemCur]])
1833
-              dataCur1.df = as.data.frame(stats_details.l[["1"]][[TF_peak.fdr_cur]][[peak_gene.fdr_cur]] [[allowMissingTFsCur]] [[allowMissingGenesCur]] [[TF_peak.connectionTypeCur]] [[elemCur]])
1834
-              
1835
-              if (nrow(dataCur0.df) == 0) {
1836
-                dataCur0.df = data.frame(. = "MISSING", Freq = 0)
1837
-              }
1838
-              if (nrow(dataCur1.df) == 0) {
1839
-                dataCur1.df = data.frame(. = "MISSING", Freq = 0)
1803
+                
1804
+              if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) {
1805
+                    
1806
+                  elemCur = "TF"
1807
+                  dataCur0.df = as.data.frame(stats_details.l[["0"]][[TF_peak.fdr_cur]][[peak_gene.fdr_cur]] [[allowMissingTFsCur]] [[allowMissingGenesCur]] [[TF_peak.connectionTypeCur]] [[elemCur]])
1808
+                  dataCur1.df = as.data.frame(stats_details.l[["1"]][[TF_peak.fdr_cur]][[peak_gene.fdr_cur]] [[allowMissingTFsCur]] [[allowMissingGenesCur]] [[TF_peak.connectionTypeCur]] [[elemCur]])
1809
+                  
1810
+                  if (nrow(dataCur0.df) == 0) {
1811
+                      dataCur0.df = data.frame(. = "MISSING", Freq = 0)
1812
+                  }
1813
+                  if (nrow(dataCur1.df) == 0) {
1814
+                      dataCur1.df = data.frame(. = "MISSING", Freq = 0)
1815
+                  }
1816
+                  
1817
+                  dataCur.df = rbind(dplyr::mutate(dataCur0.df, networkType = "real", connectionType = elemCur), 
1818
+                                     dplyr::mutate(dataCur1.df, networkType = "permuted", connectionType = elemCur)) %>% 
1819
+                      tibble::as_tibble()
1820
+                  
1821
+                  for (elemCur in c("gene","peak.gene","peak.TF")) {
1822
+                      
1823
+                      dataCur0.df = as.data.frame(stats_details.l[["0"]][[TF_peak.fdr_cur]][[peak_gene.fdr_cur]] [[allowMissingTFsCur]] [[allowMissingGenesCur]] [[TF_peak.connectionTypeCur]] [[elemCur]])
1824
+                      dataCur1.df = as.data.frame(stats_details.l[["1"]][[TF_peak.fdr_cur]][[peak_gene.fdr_cur]] [[allowMissingTFsCur]] [[allowMissingGenesCur]] [[TF_peak.connectionTypeCur]] [[elemCur]])
1825
+                      
1826
+                      if (nrow(dataCur0.df) == 0) {
1827
+                          dataCur0.df = data.frame(. = "MISSING", Freq = 0)
1828
+                      }
1829
+                      if (nrow(dataCur1.df) == 0) {
1830
+                          dataCur1.df = data.frame(. = "MISSING", Freq = 0)
1831
+                      }
1832
+                      
1833
+                      dataCur.df = rbind(dataCur.df,
1834
+                                         dplyr::mutate(dataCur0.df, networkType = "real", connectionType = elemCur), 
1835
+                                         dplyr::mutate(dataCur1.df, networkType = "permuted", connectionType = elemCur))
1836
+                      
1837
+                  }
1838
+                  
1839
+                  dataCur.df$networkType =as.factor(dataCur.df$networkType)
1840
+                  
1841
+                  titlePlot =  paste0("TF_peak.fdr: ", TF_peak.fdr_cur, 
1842
+                                      ", peak_gene.fdr: ", peak_gene.fdr_cur, 
1843
+                                      "\nallowMissingTFs: ", allowMissingTFsCur, 
1844
+                                      ", allowMissingGenes: ", allowMissingGenesCur, 
1845
+                                      ",\n TF_peak.connectionType: ", TF_peak.connectionTypeCur)
1846
+                  
1847
+                  if (max(dataCur.df$Freq)> 0) {
1848
+                      
1849
+                      g = ggplot(dataCur.df, aes(networkType, log10(Freq), fill = networkType)) + geom_boxplot() +theme_bw() + 
1850
+                          xlab("Network type")  +ylab(paste0("log10(Number of connections per category",  ", empty = 0)")) +
1851
+                          ggtitle(titlePlot) + 
1852
+                          facet_wrap(~connectionType, scales = "free") + 
1853
+                          scale_fill_manual("Network type", values = c("real" =  "red", "permuted" = "gray"), 
1854
+                                            labels = c("real" =  "real", "permuted" = "permuted"), drop = FALSE)
1855
+                      
1856
+                      suppressWarnings(plot(g))
1857
+                      
1858
+                  } else {
1859
+                      
1860
+                      plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', axes=FALSE, main = titlePlot)
1861
+                      message = paste0(titlePlot, "\nNo data to show")
1862
+                      text(x = 0.5, y = 0.5, message, cex = 1.2, col = "red")
1863
+                  }
1840 1864
               }
1865
+              pageCounter = pageCounter + 1  
1841 1866
               
1842
-              dataCur.df = rbind(dplyr::mutate(dataCur0.df, networkType = "real", connectionType = elemCur), 
1843
-                                 dplyr::mutate(dataCur1.df, networkType = "permuted", connectionType = elemCur)) %>% 
1844
-                tibble::as_tibble()
1845
-              
1846
-              for (elemCur in c("gene","peak.gene","peak.TF")) {
1847
-                
1848
-                dataCur0.df = as.data.frame(stats_details.l[["0"]][[TF_peak.fdr_cur]][[peak_gene.fdr_cur]] [[allowMissingTFsCur]] [[allowMissingGenesCur]] [[TF_peak.connectionTypeCur]] [[elemCur]])
1849
-                dataCur1.df = as.data.frame(stats_details.l[["1"]][[TF_peak.fdr_cur]][[peak_gene.fdr_cur]] [[allowMissingTFsCur]] [[allowMissingGenesCur]] [[TF_peak.connectionTypeCur]] [[elemCur]])
1850
-                
1851
-                if (nrow(dataCur0.df) == 0) {
1852
-                  dataCur0.df = data.frame(. = "MISSING", Freq = 0)
1853
-                }
1854
-                if (nrow(dataCur1.df) == 0) {
1855
-                  dataCur1.df = data.frame(. = "MISSING", Freq = 0)
1856
-                }
1857
-                
1858
-                dataCur.df = rbind(dataCur.df,
1859
-                                   dplyr::mutate(dataCur0.df, networkType = "real", connectionType = elemCur), 
1860
-                                   dplyr::mutate(dataCur1.df, networkType = "permuted", connectionType = elemCur))
1861
-                
1862
-              }
1863
-              
1864
-              dataCur.df$networkType =as.factor(dataCur.df$networkType)
1865
-              
1866
-              titlePlot =  paste0("TF_peak.fdr: ", TF_peak.fdr_cur, 
1867
-                                  ", peak_gene.fdr: ", peak_gene.fdr_cur, 
1868
-                                  "\nallowMissingTFs: ", allowMissingTFsCur, 
1869
-                                  ", allowMissingGenes: ", allowMissingGenesCur, 
1870
-                                  ",\n TF_peak.connectionType: ", TF_peak.connectionTypeCur)
1871
-              
1872
-              if (max(dataCur.df$Freq)> 0) {
1873
-                
1874
-                g = ggplot(dataCur.df, aes(networkType, log10(Freq), fill = networkType)) + geom_boxplot() +theme_bw() + 
1875
-                  xlab("Network type")  +ylab(paste0("log10(Number of connections per category",  ", empty = 0)")) +
1876
-                  ggtitle(titlePlot) + 
1877
-                  facet_wrap(~connectionType, scales = "free") + 
1878
-                  scale_fill_manual("Network type", values = c("real" =  "red", "permuted" = "gray"), 
1879
-                                    labels = c("real" =  "real", "permuted" = "permuted"), drop = FALSE)
1880
-                
1881
-                suppressWarnings(plot(g))
1882
-                
1883
-              } else {
1884
-                
1885
-                plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', axes=FALSE, main = titlePlot)
1886
-                message = paste0(titlePlot, "\nNo data to show")
1887
-                text(x = 0.5, y = 0.5, message, cex = 1.2, col = "red")
1888
-              }
1889 1867
               
1890 1868
             } #end  for (TF_peak.connectionTypeCur in TF_peak.connectionTypesAll)
1891 1869
             
... ...
@@ -1897,6 +1875,7 @@ plot_stats_connectionSummary <- function(GRN, type = "heatmap",
1897 1875
       
1898 1876
     } # end for TF_peak.fdr_cur
1899 1877
     
1878
+    .printWarningPageNumber(pages, pageCounter)
1900 1879
     if (!is.null(file)) {
1901 1880
       grDevices::dev.off()
1902 1881
     }
... ...
@@ -1908,9 +1887,6 @@ plot_stats_connectionSummary <- function(GRN, type = "heatmap",
1908 1887
 }
1909 1888
 
1910 1889
 
1911
-
1912
-
1913
-
1914 1890
 ######## General & communities Stats Functions ########
1915 1891
 
1916 1892
 
... ...
@@ -1924,6 +1900,7 @@ plot_stats_connectionSummary <- function(GRN, type = "heatmap",
1924 1900
 #' @template plotAsPDF
1925 1901
 #' @template pdf_width
1926 1902
 #' @template pdf_height
1903
+#' @template pages
1927 1904
 #' @return The same \code{\linkS4class{GRN}} object with no changes. The results are output to a file.
1928 1905
 #' @seealso \code{\link{plotGeneralEnrichment}}
1929 1906
 #' @seealso \code{\link{plotCommunitiesStats}}
... ...
@@ -1934,7 +1911,7 @@ plot_stats_connectionSummary <- function(GRN, type = "heatmap",
1934 1911
 #' GRN = plotGeneralGraphStats(GRN, plotAsPDF = FALSE)
1935 1912
 #' @export
1936 1913
 plotGeneralGraphStats <- function(GRN, outputFolder = NULL, basenameOutput = NULL, 
1937
-                                  plotAsPDF = TRUE, pdf_width = 12, pdf_height = 12, 
1914
+                                  plotAsPDF = TRUE, pdf_width = 12, pdf_height = 12, pages = NULL,
1938 1915
                                   forceRerun = FALSE) {
1939 1916
   
1940 1917
   start = Sys.time()
... ...
@@ -1944,6 +1921,7 @@ plotGeneralGraphStats <- function(GRN, outputFolder = NULL, basenameOutput = NUL
1944 1921
   checkmate::assertFlag(plotAsPDF)
1945 1922
   checkmate::assertNumeric(pdf_width, lower = 5, upper = 99)
1946 1923
   checkmate::assertNumeric(pdf_height, lower = 5, upper = 99)
1924
+  checkmate::assert(checkmate::checkNull(pages), checkmate::checkIntegerish(pages, lower = 1))
1947 1925
   checkmate::assertFlag(forceRerun)
1948 1926
   checkmate::assert(checkmate::checkNull(outputFolder), checkmate::checkCharacter(outputFolder, min.chars = 1))
1949 1927
   checkmate::assert(checkmate::checkNull(basenameOutput), checkmate::checkCharacter(basenameOutput, len = 1, min.chars = 1, any.missing = FALSE))
... ...
@@ -1975,52 +1953,59 @@ plotGeneralGraphStats <- function(GRN, outputFolder = NULL, basenameOutput = NUL
1975 1953
                         plot.title = element_text(hjust = 0.5))
1976 1954
     
1977 1955
     
1978
-    gVertexDist = ggplot(totalVerteces, aes(x="", y=Count, fill=Class)) + geom_bar(stat="identity") +
1979
-      coord_polar("y", start=0) +
1980
-      scale_fill_manual(values = c("#3B9AB2", "#F21A00", "#E1AF00")) +
1981
-      geom_text(aes(label = paste0(round(Count/sum(Count) *100, digits = 1), "%")),
1982
-                position = position_stack(vjust = 0.5)) +
1983
-      theme_plots +
1984
-      ggtitle(paste0("Vertices (n=", sum(totalVerteces$Count), ")"))
1985
-    
1986
-    geneDist = as.data.frame(table(droplevels(GRN@connections$all.filtered$`0`$gene.type)))
1987
-    if (nrow(geneDist) == 0) {
1988
-      gGeneDist = 
1989
-        ggplot() + 
1990
-        annotate("text", x = 4, y = 25, size=5, color = "red", label = "Nothing to plot:\nNo genes found") + 
1991
-        theme_void()
1992
-      
1993
-      
1994
-      message = "No genes found in the GRN object. Make sure the filtered connections contain also genes."
1995