Browse code

Small changes

Christian Arnold authored on 27/09/2022 10:47:15
Showing8 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: GRaNIE
2 2
 Title: GRaNIE: Reconstruction cell type specific gene regulatory networks including enhancers using chromatin accessibility and RNA-seq data
3
-Version: 1.1.14
3
+Version: 1.1.15
4 4
 Encoding: UTF-8
5 5
 Authors@R: c(person("Christian", "Arnold", email =
6 6
         "chrarnold@web.de", role = c("cre","aut")),
... ...
@@ -3,7 +3,7 @@
3 3
 #' Genetic variants associated with diseases often affect non-coding regions, thus likely having a regulatory role. To understand the effects of genetic variants in these regulatory regions, identifying genes that are modulated by specific regulatory elements (REs) is crucial. The effect of gene regulatory elements, such as enhancers, is often cell-type specific, likely because the combinations of transcription factors (TFs) that are regulating a given enhancer have celltype specific activity. This TF activity can be quantified with existing tools such as \code{diffTF} and captures differences in binding of a TF in open chromatin regions. Collectively, this forms a gene regulatory network (\code{eGRN}) with cell-type and data-specific TF-RE and RE-gene links. Here, we reconstruct such a \code{eGRN} using bulk RNAseq and open chromatin (e.g., using ATACseq or ChIPseq for open chromatin marks) and optionally TF activity data. Our network contains different types of links, connecting TFs to regulatory elements, the latter of which is connected to genes in the vicinity or within the same chromatin domain (TAD). We use a statistical framework to assign empirical FDRs and weights to all links using a permutation-based approach.
4 4
 #' 
5 5
 #' @section Package functions:
6
-#' See the Vignettes for a workflow example and more generally \url{https://grp-zaugg.embl-community.io/GRaNIE/articles/} for all project-related information.
6
+#' See the Vignettes for a workflow example and more generally \url{https://grp-zaugg.embl-community.io/GRaNIE} for all project-related information.
7 7
 #' 
8 8
 #' @section GRN object:
9 9
 #' The \code{GRaNIE} package works with \code{GRN} objects. See \code{\linkS4class{GRN}} for details.
... ...
@@ -39,8 +39,14 @@ initializeGRN <- function(objectMetadata = list(),
39 39
   .checkAndLoadPackagesGenomeAssembly(genomeAssembly)
40 40
   
41 41
   # Create the folder first if not yet existing
42
+  checkmate::assertCharacter(outputFolder, min.chars = 1, len = 1)
42 43
   if (!dir.exists(outputFolder)) {
43
-    dir.create(outputFolder)
44
+    res = dir.create(outputFolder)
45
+    if (!res) {
46
+        message = paste0("Could not create the output directory ", outputFolder, ". Check the path /and/or access rights.")
47
+        .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
48
+    }
49
+    checkmate::assertDirectoryExists(outputFolder, access = "w")
44 50
   }
45 51
   
46 52
   # Create an absolute path out of the given outputFolder now that it exists
... ...
@@ -109,6 +115,9 @@ initializeGRN <- function(objectMetadata = list(),
109 115
   
110 116
   futile.logger::flog.info(paste0("Empty GRN object created successfully. Type the object name (e.g., GRN) to retrieve summary information about it at any time."))
111 117
   
118
+  futile.logger::flog.info(paste0(" Default output folder: ", GRN@config$directories$outputRoot))
119
+  futile.logger::flog.info(paste0(" Genome assembly: ", genomeAssembly))
120
+  
112 121
   .printExecutionTime(start, prefix = "")
113 122
   
114 123
   GRN
... ...
@@ -187,11 +196,17 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
187 196
     # Normalize ID column names
188 197
     if (idColumn_peaks != "peakID") {
189 198
       counts_peaks = dplyr::rename(counts_peaks, peakID = !!(idColumn_peaks))
199
+      idColumn_peaks ="peakID"
190 200
     }
191 201
     if (idColumn_RNA != "ENSEMBL") {
192 202
       counts_rna = dplyr::rename(counts_rna, ENSEMBL = !!(idColumn_RNA))
203
+      idColumn_RNA = "ENSEMBL"
193 204
     }
194
-    
205
+      
206
+    # Check existence of correct ID column now
207
+    checkmate::assertSubset(idColumn_peaks, colnames(counts_peaks))
208
+    checkmate::assertSubset(idColumn_RNA, colnames(counts_rna))
209
+
195 210
     # Check ID columns for missing values and remove
196 211
     rna_missing_ID =  which(is.na(counts_rna$ENSEMBL))
197 212
     if (length(rna_missing_ID) > 0) {
... ...
@@ -257,8 +272,11 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
257 272
     GRN@data$peaks$counts_norm = data.l[["peaks"]] %>% dplyr::mutate(isFiltered = FALSE)
258 273
     countsRNA.norm.df = data.l[["RNA"]]  %>% dplyr::mutate(isFiltered = FALSE)
259 274
     
275
+    futile.logger::flog.info(paste0( "Final dimensions of data:"))
276
+    futile.logger::flog.info(paste0( " RNA  : ", nrow(countsRNA.norm.df)  , " x ", ncol(countsRNA.norm.df)   - 2, " (rows x columns)"))
277
+    futile.logger::flog.info(paste0( " peaks: ", nrow(countsPeaks.norm.df), " x ", ncol(countsPeaks.norm.df) - 1, " (rows x columns)"))
260 278
     # Create permutations for RNA
261
-    futile.logger::flog.info(paste0( "Produce ", .getMaxPermutation(GRN), " permutations of RNA-counts"))
279
+    futile.logger::flog.info(paste0( "Generate ", .getMaxPermutation(GRN), " permutations of RNA-counts"))
262 280
     GRN@data$RNA$counts_norm.l = .shuffleColumns(countsRNA.norm.df, .getMaxPermutation(GRN), returnUnshuffled = TRUE, returnAsList = TRUE)
263 281
     
264 282
     if (!is.null(sampleMetadata)) {
... ...
@@ -341,7 +359,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
341 359
     
342 360
     overlappingPeaks = which(GenomicRanges::countOverlaps(consensus.gr ,consensus.gr) >1)
343 361
     
344
-    if (length(overlappingPeaks) > 0){
362
+    if (length(overlappingPeaks) > 0) {
345 363
       
346 364
       ids = (consensus.gr[overlappingPeaks] %>% as.data.frame())$peakID
347 365
       
... ...
@@ -574,11 +592,12 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
574 592
 }
575 593
 
576 594
 .populateGeneAnnotation <- function (GRN) {
577
-  
578
-  futile.logger::flog.info(paste0(" Calculate statistics for each gene (mean and CV)"))
579
-  
595
+
596
+
580 597
   countsRNA.clean  = getCounts(GRN, type = "rna", norm = TRUE, permuted = FALSE)
581 598
   
599
+  futile.logger::flog.info(paste0(" Calculate statistics for each of the ", nrow(countsRNA.clean), " genes that were provided with the RNA-seq data (mean and CV)"))
600
+  
582 601
   countsRNA.m = as.matrix(dplyr::select(countsRNA.clean, -.data$ENSEMBL))
583 602
   
584 603
   rowMeans_rna = rowMeans(countsRNA.m)
... ...
@@ -591,7 +610,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
591 610
                                 gene.mean = rowMeans_rna, 
592 611
                                 gene.median = rowMedians_rna, 
593 612
                                 gene.CV = CV_rna) %>%
594
-    dplyr::full_join(genomeAnnotation.df, by = c("gene.ENSEMBL")) %>%
613
+    dplyr::left_join(genomeAnnotation.df, by = c("gene.ENSEMBL")) %>%
595 614
     dplyr::mutate(gene.type = forcats::fct_explicit_na(.data$gene.type, na_level = "unknown/missing"))
596 615
   
597 616
   GRN@annotation$genes = metadata_rna
... ...
@@ -704,6 +723,8 @@ filterData <- function (GRN,
704 723
   checkmate::assertNumber(maxNormalizedMean_peaks, lower = minNormalizedMean_peaks , null.ok = TRUE)
705 724
   checkmate::assertNumber(maxNormalizedMeanRNA, lower = minNormalizedMeanRNA, null.ok = TRUE)
706 725
   checkmate::assertCharacter(chrToKeep_peaks, min.len = 1, any.missing = FALSE, null.ok = TRUE)
726
+  checkmate::assertSubset(chrToKeep_peaks, GRN@data$peaks$consensusPeaks %>% dplyr::pull(.data$chr) %>% unique() %>% as.character())
727
+  
707 728
   checkmate::assertIntegerish(minSize_peaks, lower = 1, null.ok = TRUE)
708 729
   checkmate::assertIntegerish(maxSize_peaks, lower = dplyr::if_else(is.null(minSize_peaks), 1, minSize_peaks), null.ok = TRUE)
709 730
   checkmate::assertNumber(minCV_peaks, lower = 0, null.ok = TRUE)
... ...
@@ -739,6 +760,11 @@ filterData <- function (GRN,
739 760
     futile.logger::flog.info(paste0("Collectively, filter ", nPeaksBefore -length(peaks_toKeep), " out of ", nPeaksBefore, " peaks."))
740 761
     futile.logger::flog.info(paste0("Number of remaining peaks: ", length(peaks_toKeep)))
741 762
     
763
+    if (length(peaks_toKeep) < 1000) {
764
+        message = paste0("Too few peaks (", length(peaks_toKeep), ") remain after filtering. At least 1000 peaks must remain. Adjust the filtering settings.")
765
+        .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
766
+    }
767
+    
742 768
     GRN@data$peaks$consensusPeaks$isFiltered  = ! GRN@data$peaks$consensusPeaks$peakID  %in% peaks_toKeep
743 769
     #GRN@data$peaks$counts_raw$isFiltered = ! GRN@data$peaks$counts_raw$peakID  %in% peaks_toKeep
744 770
     GRN@data$peaks$counts_norm$isFiltered = ! GRN@data$peaks$counts_norm$peakID  %in% peaks_toKeep
... ...
@@ -757,7 +783,12 @@ filterData <- function (GRN,
757 783
                                     minMean = minNormalizedMeanRNA, maxMean = maxNormalizedMeanRNA, 
758 784
                                     minCV = minCV_genes, maxCV = maxCV_genes) 
759 785
     
760
-    futile.logger::flog.info(paste0(" Number of rows in total: ", nrow(GRN@data$RNA$counts_norm.l[["0"]])))
786
+    
787
+    if (length(genes.CV) < 100) {
788
+        message = paste0("Too few genes (", length(genes.CV), ") remain after filtering. At least 100 genes must remain. Adjust the filtering settings.")
789
+        .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
790
+    }
791
+    
761 792
     for (permutationCur in c(0)) {
762 793
       permIndex = as.character(permutationCur)
763 794
       rowMeans = rowMeans(dplyr::select(GRN@data$RNA$counts_norm.l[[permIndex]], -.data$ENSEMBL))
... ...
@@ -768,8 +799,6 @@ filterData <- function (GRN,
768 799
     # Raw counts are left untouched and filtered where needed only
769 800
     futile.logger::flog.info(paste0(" Flagged ", nRowsFlagged, " rows because the row mean was smaller than ", minNormalizedMeanRNA))
770 801
     
771
-    # TODO: Filter genes by CV
772
-    
773 802
     GRN@config$isFiltered = TRUE
774 803
   } 
775 804
   
... ...
@@ -1093,7 +1122,7 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) {
1093 1122
   if (is.null(GRN@data$TFs$TF_peak_overlap) | forceRerun) {
1094 1123
     
1095 1124
     
1096
-    futile.logger::flog.info(paste0("Overlap peaks and TFBS using ", nCores, " cores. This may take a few minutes..."))
1125
+    futile.logger::flog.info(paste0("Overlap peaks and TFBS using ", nCores, " cores. This may take a while, particularly if the number of samples is large..."))
1097 1126
     
1098 1127
     genomeAssembly = GRN@config$parameters$genomeAssembly
1099 1128
     seqlengths = .getChrLengths(genomeAssembly)
... ...
@@ -1254,17 +1283,25 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) {
1254 1283
   
1255 1284
   start = Sys.time()
1256 1285
   
1286
+  # Set the column name to just ENSEMBL to avoid column mismatch issues
1287
+  HOCOMOCO_mapping$ENSEMBL = HOCOMOCO_mapping$TF.ENSEMBL
1288
+  
1257 1289
   # Filter to only the TFs
1258 1290
   # In addition, the no of TF because multiple TFs can map to the same gene/ ENSEMBL ID
1259 1291
   # Also filter 0 count genes because they otherwise cause errors downstream
1260 1292
   rowSums = rowSums(dplyr::select(matrix1, -.data$ENSEMBL))
1261 1293
   
1262 1294
   # Keep only Ensembl IDs from TFs we have data from
1263
-  matrix1.norm.TFs.df = dplyr::filter(matrix1, .data$ENSEMBL %in% HOCOMOCO_mapping$ENSEMBL, rowSums != 0)
1295
+  matrix1.norm.TFs.df = dplyr::filter(matrix1, .data$ENSEMBL %in% HOCOMOCO_mapping$TF.ENSEMBL, rowSums != 0)
1296
+  
1297
+  nFiltered1 = dplyr::filter(matrix1, ! .data$ENSEMBL %in% HOCOMOCO_mapping$TF.ENSEMBL) %>% nrow()
1298
+  nFiltered2 = dplyr::filter(matrix1, rowSums == 0) %>% nrow()
1264 1299
   
1265 1300
   diff = nrow(matrix1) - nrow(matrix1.norm.TFs.df)
1266 1301
   if (diff > 0) {
1267
-    message = paste0(whitespacePrefix, "Retain ", nrow(matrix1.norm.TFs.df), " rows from TF/gene data out of ", nrow(matrix1), " (filter non-TF genes and TF genes with 0 counts throughout and keep only unique ENSEMBL IDs).")
1302
+    message = paste0(whitespacePrefix, "Retain ", nrow(matrix1.norm.TFs.df), " unique genes from TF/gene data out of ", nrow(matrix1), 
1303
+                     " (filter ",  nFiltered1, " non-TF genes and ", nFiltered2, 
1304
+                     " TF genes with 0 counts throughout).")
1268 1305
     futile.logger::flog.info(message)
1269 1306
   }
1270 1307
   
... ...
@@ -1273,7 +1310,7 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) {
1273 1310
     .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
1274 1311
   }
1275 1312
   
1276
-  HOCOMOCO_mapping.exp = dplyr::filter(HOCOMOCO_mapping, .data$ENSEMBL %in% matrix1.norm.TFs.df$ENSEMBL)
1313
+  HOCOMOCO_mapping.exp = dplyr::filter(HOCOMOCO_mapping, .data$TF.ENSEMBL %in% matrix1.norm.TFs.df$ENSEMBL)
1277 1314
   futile.logger::flog.info(paste0(whitespacePrefix, "Correlate TF/gene data for ", nrow(matrix1.norm.TFs.df), " unique Ensembl IDs (TFs) and peak counts for ", nrow(matrix_peaks), " peaks."))
1278 1315
   futile.logger::flog.info(paste0(whitespacePrefix, "Note: For subsequent steps, the same gene may be associated with multiple TF, depending on the translation table."))
1279 1316
   # Correlate TF gene counts with peak counts 
... ...
@@ -1301,10 +1338,10 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) {
1301 1338
   # Reorder to make sure the order is the same. Due to the duplication ID issue, the number of columns may increase after the column selection
1302 1339
   
1303 1340
   # Some columns may be removed here due to zero standard deviation
1304
-  HOCOMOCO_mapping.exp.filt = HOCOMOCO_mapping.exp %>% dplyr::filter(.data$ENSEMBL %in% colnames(sort.cor.m))
1341
+  HOCOMOCO_mapping.exp.filt = HOCOMOCO_mapping.exp %>% dplyr::filter(.data$TF.ENSEMBL %in% colnames(sort.cor.m))
1305 1342
   
1306 1343
   sort.cor.m = sort.cor.m[,as.character(HOCOMOCO_mapping.exp.filt$ENSEMBL)] 
1307
-  colnames(sort.cor.m) = as.character(HOCOMOCO_mapping.exp.filt$HOCOID)
1344
+  colnames(sort.cor.m) = as.character(HOCOMOCO_mapping.exp.filt$TF.HOCOID)
1308 1345
   
1309 1346
   .printExecutionTime(start, prefix = whitespacePrefix)
1310 1347
   sort.cor.m
... ...
@@ -2007,14 +2044,16 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
2007 2044
                                      corMethod,
2008 2045
                                      whitespacePrefix = "  ")
2009 2046
     
2010
-    # TODO:Check here for the TFs
2047
+    # TODO:Check here for the TFs BMAL1.0.A
2011 2048
     allTF = intersect(colnames(peak_TF_overlap.df), colnames(peaksCor.m))
2049
+    checkmate::assertIntegerish(length(allTF), lower = 1)
2050
+    
2012 2051
     futile.logger::flog.info(paste0(" Run FDR calculations for ", length(allTF), " TFs for which TFBS predictions and ",
2013 2052
                                     connectionTypeCur, " data for the corresponding gene are available."))
2014 2053
     if (length(allTF) < ncol(peak_TF_overlap.df) | length(allTF) < ncol(peaksCor.m)) {
2015 2054
       
2016 2055
       TF_missing = setdiff(colnames(peak_TF_overlap.df), colnames(peaksCor.m))
2017
-      if (length(TF_missing) > 0) futile.logger::flog.info(paste0("  Skip the following ", length(TF_missing), " TF due to missing data: ", paste0(TF_missing, collapse = ",")))
2056
+      if (length(TF_missing) > 0) futile.logger::flog.info(paste0("  Skip the following ", length(TF_missing), " TF due to missing dataor because they are marked as filtered: ", paste0(TF_missing, collapse = ",")))
2018 2057
     }
2019 2058
     
2020 2059
     stopifnot(nrow(peaksCor.m) == nrow(peak_TF_overlap.df))
... ...
@@ -160,7 +160,9 @@
160 160
     countsRNA.df  = dplyr::select(countsRNA,  tidyselect::all_of(c(idColumn_RNA,  sharedColumns)))
161 161
     countsPeaks.df = dplyr::select(countsPeaks, tidyselect::all_of(c(idColumn_peaks, sharedColumns)))
162 162
     
163
-    futile.logger::flog.info(paste0(" ", length(sharedColumns), " samples (", paste0(sharedColumns, collapse = ","), ") are shared between the peaks and RNA-Seq data"))
163
+    futile.logger::flog.info(paste0(" ", length(sharedColumns), " samples (", 
164
+                                    dplyr::if_else(length(sharedColumns) > 100, "names omitted here", paste0(sharedColumns, collapse = ",")), 
165
+                                    ") are shared between the peaks and RNA-Seq data"))
164 166
     
165 167
     notIntersecting_peaks = setdiff(colnames(countsPeaks),c(sharedColumns,idColumn_peaks))
166 168
     notIntersecting_RNA  = setdiff(colnames(countsRNA),c(sharedColumns,idColumn_RNA))
... ...
@@ -998,6 +998,12 @@ calculateCommunitiesEnrichment <- function(GRN,
998 998
       .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
999 999
   }
1000 1000
   
1001
+  if (length(selCommunities) < length(communities)) {
1002
+      missingCommunities = setdiff(communities, selCommunities)
1003
+      message = paste0("Some of the requested communities (", paste0(missingCommunities , collapse = ","), ") were not found. Only the following communities are available: ", paste0(selCommunities, collapse = ","))
1004
+      .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)
1005
+  }
1006
+  
1001 1007
   selCommunities
1002 1008
   
1003 1009
 }
... ...
@@ -326,7 +326,9 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
326 326
           plot(g)
327 327
           
328 328
         }, error = function(e) {
329
-          futile.logger::flog.warn(paste0(" Could not plot PCA with variable ", varCur))
329
+          message = paste0(" Could not plot PCA with variable ", varCur)
330
+          .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)   
331
+            
330 332
           plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n')
331 333
           message = paste0(" Could not plot PCA with variable ", varCur)
332 334
           text(x = 0.5, y = 0.5, message, cex = 1.6, col = "red")
... ...
@@ -493,7 +495,7 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL,
493 495
 #' @template basenameOutput
494 496
 #' @template plotDetails
495 497
 #' @param dataType Character vector. One of, or both of, \code{"real"} or \code{"permuted"}. For which data type, real or permuted data, to produce the diagnostic plots?
496
-#' @param nTFMax \code{NULL} or Integer. Default \code{NULL}. Maximum number of TFs to process. Can be used for testing purposes by setting this to a small number i(.e., 10)
498
+#' @param nTFMax \code{NULL} or Integer > 0. Default \code{NULL}. Maximum number of TFs to process. Can be used for testing purposes by setting this to a small number i(.e., 10)
497 499
 #' @template plotAsPDF
498 500
 #' @template pdf_width
499 501
 #' @param pdf_height_base  Number. Default 8. Base height of the PDF, in cm, per connection type. The total height is automatically determined based on the number of connection types that are found in the object (e.g., expression or TF activity). For example, when two connection types are found, the base height is multiplied by 2.
... ...
@@ -524,7 +526,7 @@ plotDiagnosticPlots_TFPeaks <- function(GRN,
524 526
   checkmate::assert(checkmate::checkNull(basenameOutput), checkmate::checkCharacter(basenameOutput, len = 1, min.chars = 1, any.missing = FALSE))
525 527
   checkmate::assertFlag(plotDetails)
526 528
   checkmate::assertSubset(dataType, c("real", "permuted"), empty.ok = FALSE)
527
-  checkmate::assert(checkmate::checkNull(nTFMax), checkmate::checkIntegerish(nTFMax))
529
+  checkmate::assert(checkmate::checkNull(nTFMax), checkmate::checkIntegerish(nTFMax, lower = 1))
528 530
   checkmate::assertFlag(plotAsPDF)
529 531
   checkmate::assertNumeric(pdf_width, lower = 5, upper = 99)
530 532
   checkmate::assertNumeric(pdf_height_base, lower = 5, upper = 99)
... ...
@@ -917,6 +919,10 @@ plotDiagnosticPlots_peakGene <- function(GRN,
917 919
   checkmate::assert(checkmate::checkNull(outputFolder), checkmate::checkCharacter(outputFolder, min.chars = 1))
918 920
   checkmate::assert(checkmate::checkNull(basenameOutput), checkmate::checkCharacter(basenameOutput, len = 1, min.chars = 1, any.missing = FALSE))
919 921
   checkmate::assertList(gene.types, any.missing = FALSE, min.len = 1, types = "character")
922
+  for (geneTypesCur in gene.types) {
923
+      checkmate::assertSubset(geneTypesCur, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE)
924
+  }  
925
+
920 926
   checkmate::assertFlag(useFiltered)
921 927
   checkmate::assertFlag(plotDetails)
922 928
   checkmate::assertFlag(plotPerTF)
... ...
@@ -1122,6 +1128,9 @@ plotDiagnosticPlots_peakGene <- function(GRN,
1122 1128
         filenameCur = paste0(fileBase, paste0(geneTypesSelected, collapse = "+"), filteredStr, ".pdf")
1123 1129
         .checkOutputFile(filenameCur)
1124 1130
         grDevices::pdf(file = filenameCur, width = pdf_width, height = pdf_height)
1131
+        
1132
+        futile.logger::flog.info(paste0(" Plotting to file ", filenameCur))
1133
+        
1125 1134
       }
1126 1135
       
1127 1136
       if (plotPerTF) {
... ...
@@ -2342,8 +2351,8 @@ plotGeneralEnrichment <- function(GRN, outputFolder = NULL, basenameOutput = NUL
2342 2351
 #' @inheritParams plotCommunitiesEnrichment
2343 2352
 #' @param communities Numeric vector. Default \code{seq_len(10)}. Depending on what was specified in the \code{display} parameter, this parameter would indicate either the rank or the label of the communities to be plotted. i.e. for \code{communities = c(1,4)}, if \code{display = "byRank"} the results for the first and fourth largest communities will be plotted. if \code{display = "byLabel"}, the results for the communities labeled \code{"1"}, and \code{"4"} will be plotted. If set to \code{NULL}, all communities will be plotted
2344 2353
 #' @template forceRerun
2345
-#' @param topnGenes Integer. Default 20. Number of genes to plot, sorted by their rank or label.
2346
-#' @param topnTFs Integer. Default 20. Number of TFs to plot, sorted by their rank or label.
2354
+#' @param topnGenes Integer > 0. Default 20. Number of genes to plot, sorted by their rank or label.
2355
+#' @param topnTFs Integer > 0. Default 20. Number of TFs to plot, sorted by their rank or label.
2347 2356
 #' @return The same \code{\linkS4class{GRN}} object, without modifications.
2348 2357
 #' @seealso \code{\link{plotGeneralGraphStats}}
2349 2358
 #' @seealso \code{\link{calculateCommunitiesStats}}
... ...
@@ -2354,7 +2363,7 @@ plotGeneralEnrichment <- function(GRN, outputFolder = NULL, basenameOutput = NUL
2354 2363
 #' GRN = plotCommunitiesStats(GRN, plotAsPDF = FALSE, pages = 1)
2355 2364
 #' @export
2356 2365
 plotCommunitiesStats <- function(GRN, outputFolder = NULL, basenameOutput = NULL, 
2357
-                                 display = "byRank", communities = seq_len(10), 
2366
+                                 display = "byRank", communities = seq_len(5), 
2358 2367
                                  topnGenes = 20, topnTFs = 20, 
2359 2368
                                  plotAsPDF = TRUE, pdf_width = 12, pdf_height = 12, pages = NULL,
2360 2369
                                  forceRerun = FALSE){
... ...
@@ -2368,8 +2377,8 @@ plotCommunitiesStats <- function(GRN, outputFolder = NULL, basenameOutput = NULL
2368 2377
   checkmate::assert(checkmate::checkNull(outputFolder), checkmate::checkCharacter(outputFolder, min.chars = 1))
2369 2378
   checkmate::assert(checkmate::checkNull(basenameOutput), checkmate::checkCharacter(basenameOutput, len = 1, min.chars = 1, any.missing = FALSE))
2370 2379
   
2371
-  checkmate::assertIntegerish(topnGenes)
2372
-  checkmate::assertIntegerish(topnTFs)
2380
+  checkmate::assertIntegerish(topnGenes, lower = 1)
2381
+  checkmate::assertIntegerish(topnTFs, lower = 1)
2373 2382
   checkmate::assertChoice(display , c("byRank", "byLabel"))
2374 2383
   checkmate::assert(checkmate::checkNull(communities), checkmate::checkNumeric(communities, lower = 1, any.missing = FALSE, min.len = 1))
2375 2384
   checkmate::assertFlag(plotAsPDF)
... ...
@@ -2542,8 +2551,8 @@ plotCommunitiesStats <- function(GRN, outputFolder = NULL, basenameOutput = NULL
2542 2551
 #' @inheritParams plotGeneralEnrichment
2543 2552
 #' @param display Character. Default \code{"byRank"}. One of: \code{"byRank"}, \code{"byLabel"}. Specify whether the communities will be displayed based on their rank, where the largest community (with most vertices) would have a rank of 1, or by their label. Note that the label is independent of the rank.
2544 2553
 #' @param communities \code{NULL} or numeric vector. Default \code{NULL}. If set to \code{NULL}, the default, all communities enrichments that have been calculated before are plotted. If a numeric vector is specified: Depending on what was specified in the \code{display} parameter, this parameter indicates either the rank or the label of the communities to be plotted. i.e. for \code{communities = c(1,4)}, if \code{display = "byRank"} the results for the first and fourth largest communities are plotted. if \code{display = "byLabel"}, the results for the communities labeled \code{"1"}, and \code{"4"} are plotted. 
2545
-#' @param nSignificant Numeric >0. Default 3. Threshold to filter out an ontology term with less than \code{nSignificant} overlapping genes. 
2546
-#' @param nID Numeric >0. Default 10. For the reduced summary heatmap, number of top terms to select per community / for the general enrichment.
2554
+#' @param nSignificant Numeric > 0. Default 3. Threshold to filter out an ontology term with less than \code{nSignificant} overlapping genes. 
2555
+#' @param nID Numeric > 0. Default 10. For the reduced summary heatmap, number of top terms to select per community / for the general enrichment.
2547 2556
 #' @template maxWidth_nchar_plot
2548 2557
 #' @return  The same \code{\linkS4class{GRN}} object, without modifications.
2549 2558
 #' @seealso \code{\link{plotGeneralEnrichment}}
... ...
@@ -2921,8 +2930,8 @@ plotTFEnrichment <- function(GRN, rankType = "degree", n = NULL, TF.names = NULL
2921 2930
   checkmate::assert(checkmate::checkNull(n), checkmate::checkNumeric(n))
2922 2931
   checkmate::assertNumeric(p, lower = 0, upper = 1)
2923 2932
   checkmate::assertNumeric(topn_pvalue, lower = 1)
2924
-  checkmate::assertNumeric(nSignificant, lower = 0)
2925
-  checkmate::assertNumeric(nID, lower = 0)
2933
+  checkmate::assertNumeric(nSignificant, lower = 1)
2934
+  checkmate::assertNumeric(nID, lower = 1)
2926 2935
   checkmate::assertFlag(plotAsPDF)
2927 2936
   checkmate::assertNumeric(pdf_width, lower = 5, upper = 99)
2928 2937
   checkmate::assertNumeric(pdf_height, lower = 5, upper = 99)
... ...
@@ -2938,13 +2947,16 @@ plotTFEnrichment <- function(GRN, rankType = "degree", n = NULL, TF.names = NULL
2938 2947
   outputFolder = .checkOutputFolder(GRN, outputFolder)
2939 2948
   
2940 2949
   
2941
-  if (rankType == "custom"){
2950
+  if (rankType == "custom") {
2942 2951
     if(is.null(TF.names)){
2943
-      futile.logger::flog.error("To plot the GO enrichment for a custom set of TFs, you must provide the TF names in the 'TF.names' parameter.")
2952
+      message = "To plot the GO enrichment for a custom set of TFs, you must provide the TF names in the 'TF.names' parameter."
2953
+      .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
2944 2954
     }
2945 2955
     wrongTFs = setdiff(TF.names, unique(GRN@connections$all.filtered$`0`$TF.name))
2946 2956
     if (length(wrongTFs)>0){
2947
-      futile.logger::flog.warn(paste0("The TFs ",  paste0(wrongTFs, collapse = " + "), " are not in the filtered GRN. They will be ommited from the results."))
2957
+       message = paste0("The TFs ",  paste0(wrongTFs, collapse = " + "), " are not in the filtered GRN. They will be ommited from the results.")
2958
+       .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)
2959
+
2948 2960
     }
2949 2961
     TFset = setdiff(TF.names, wrongTFs) 
2950 2962
   } else{ #rankType = "degree"
... ...
@@ -3467,7 +3479,8 @@ visualizeGRN <- function(GRN, outputFolder = NULL,  basenameOutput = NULL, plotA
3467 3479
     
3468 3480
     if (nrow(grn.merged) == 0) {
3469 3481
         
3470
-        futile.logger::flog.warn(paste0("No rows left in the GRN. Creating empty plot."))
3482
+        message = paste0("No rows left in the GRN. Creating empty plot.")
3483
+        .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)
3471 3484
         
3472 3485
         plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n', main = title)
3473 3486
         message = paste0(title, "\n\nThe GRN has no edges that pass the filter criteria.")
... ...
@@ -43,9 +43,9 @@ As they are listed under \code{Suggests}, they may not yet be installed, and the
43 43
 
44 44
 \item{display}{Character. Default \code{"byRank"}. One of: \code{"byRank"}, \code{"byLabel"}. Specify whether the communities will be displayed based on their rank, where the largest community (with most vertices) would have a rank of 1, or by their label. Note that the label is independent of the rank.}
45 45
 
46
-\item{topnGenes}{Integer. Default 20. Number of genes to plot, sorted by their rank or label.}
46
+\item{topnGenes}{Integer > 0. Default 20. Number of genes to plot, sorted by their rank or label.}
47 47
 
48
-\item{topnTFs}{Integer. Default 20. Number of TFs to plot, sorted by their rank or label.}
48
+\item{topnTFs}{Integer > 0. Default 20. Number of TFs to plot, sorted by their rank or label.}
49 49
 
50 50
 \item{maxWidth_nchar_plot}{Integer (>=10). Default 50. Maximum number of characters for a term before it is truncated.}
51 51
 
... ...
@@ -9,7 +9,7 @@ plotCommunitiesStats(
9 9
   outputFolder = NULL,
10 10
   basenameOutput = NULL,
11 11
   display = "byRank",
12
-  communities = seq_len(10),
12
+  communities = seq_len(5),
13 13
   topnGenes = 20,
14 14
   topnTFs = 20,
15 15
   plotAsPDF = TRUE,
... ...
@@ -30,9 +30,9 @@ plotCommunitiesStats(
30 30
 
31 31
 \item{communities}{Numeric vector. Default \code{seq_len(10)}. Depending on what was specified in the \code{display} parameter, this parameter would indicate either the rank or the label of the communities to be plotted. i.e. for \code{communities = c(1,4)}, if \code{display = "byRank"} the results for the first and fourth largest communities will be plotted. if \code{display = "byLabel"}, the results for the communities labeled \code{"1"}, and \code{"4"} will be plotted. If set to \code{NULL}, all communities will be plotted}
32 32
 
33
-\item{topnGenes}{Integer. Default 20. Number of genes to plot, sorted by their rank or label.}
33
+\item{topnGenes}{Integer > 0. Default 20. Number of genes to plot, sorted by their rank or label.}
34 34
 
35
-\item{topnTFs}{Integer. Default 20. Number of TFs to plot, sorted by their rank or label.}
35
+\item{topnTFs}{Integer > 0. Default 20. Number of TFs to plot, sorted by their rank or label.}
36 36
 
37 37
 \item{plotAsPDF}{\code{TRUE} or \code{FALSE}. Default \code{TRUE}.Should the plots be printed to a PDF file? If set to \code{TRUE}, a PDF file is generated, the name of which depends on the value of \code{basenameOutput}. If set to \code{FALSE}, all plots are printed to the currently active device. Note that most functions print more than one plot, which means you may only see the last plot depending on your active graphics device.}
38 38