Browse code

Bugfix

Christian Arnold authored on 04/10/2022 13:27:32
Showing14 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: GRaNIE
2 2
 Title: GRaNIE: Reconstruction cell type specific gene regulatory networks including enhancers using chromatin accessibility and RNA-seq data
3
-Version: 1.1.15
3
+Version: 1.1.16
4 4
 Encoding: UTF-8
5 5
 Authors@R: c(person("Christian", "Arnold", email =
6 6
         "chrarnold@web.de", role = c("cre","aut")),
... ...
@@ -119,9 +119,9 @@ setMethod("show",
119 119
             if (!is.null(GRN@data$peaks$consensusPeaks)) {
120 120
               nPeaks_filt  = nPeaks(GRN, filter = TRUE)
121 121
               nPeaks_all   = nPeaks(GRN, filter = FALSE)
122
-              cat(" Number of peaks (filtered, all): ", nPeaks_filt, ", ",  nPeaks_all, "\n", sep = "")
122
+              cat(" # peaks (filtered, all): ", nPeaks_filt, ", ",  nPeaks_all, "\n", sep = "")
123 123
             } else {
124
-              cat (" Number of peaks: No peak data found.\n")
124
+              cat (" # peaks: No peak data found.\n")
125 125
             }
126 126
             
127 127
             if (!is.null(GRN@data$RNA$counts_orig)) {
... ...
@@ -135,16 +135,30 @@ setMethod("show",
135 135
                 
136 136
               }
137 137
              
138
-              cat(" Number of genes (filtered, all): ", nGenes_filt, ", ",  nGenes_all, "\n", sep = "")
138
+              cat(" # genes (filtered, all): ", nGenes_filt, ", ",  nGenes_all, "\n", sep = "")
139 139
             } else {
140
-              cat (" Number of genes: No RNA-seq data found.\n")
140
+              cat (" # genes: No RNA-seq data found.\n")
141 141
             }
142 142
             
143
+            if (!is.null(GRN@data$RNA$counts_orig) & !is.null(GRN@data$peaks$consensusPeaks)) {
144
+                
145
+                cat(" # shared samples: ", nrow(GRN@data$metadata), "\n", sep = "")
146
+            } 
147
+            
148
+            if (!is.null(GRN@annotation$TFs)) {
149
+                
150
+                cat(" # TFs (with expression data): ", nrow(GRN@annotation$TFs), "\n", sep = "")
151
+            } 
152
+            
153
+            
143 154
             cat("Parameters:\n")
144 155
             if (!is.null(GRN@config$metadata$genomeAsembly)) {
145 156
               cat(" Genome assembly: ", GRN@config$parameters$genomeAssembly, "\n")
146 157
             }
147 158
             
159
+            cat(" Output directory: ",  GRN@config$directories$outputRoot, "\n")
160
+           
161
+            
148 162
             cat("Provided metadata:\n")
149 163
             
150 164
             for (nameCur in names(GRN@config$metadata)) {
... ...
@@ -203,7 +217,7 @@ setMethod("show",
203 217
                 df = igraph::vertex.attributes(GRN@graph[["TF_gene"]]$graph)
204 218
                 if (!is.null(df) & "community" %in% names(df)) {
205 219
                     communities = df %>% as.data.frame() %>% dplyr::count(.data$community) %>% dplyr::arrange(dplyr::desc(.data$n))
206
-                    cat("  Communities, sorted by size (n = Number of nodes): ", paste0(communities$community, " (n=", communities$n, collapse = "), "), ")\n", sep = "")
220
+                    cat("  Communities, sorted by size (n = # nodes): ", paste0(communities$community, " (n=", communities$n, collapse = "), "), ")\n", sep = "")
207 221
                 } else {
208 222
                     cat("  None found\n")
209 223
                 }
... ...
@@ -323,7 +323,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
323 323
     rownames(counts_rna)  = counts_rna$ENSEMBL
324 324
     
325 325
     
326
-    
326
+    futile.logger::flog.info("Storing count matrices...")
327 327
     # Store the raw peaks data efficiently as DESeq object only if it contains only integers, otherwise store as normal matrix
328 328
     
329 329
     if (isIntegerMatrix(counts_peaks[, GRN@config$sharedSamples])) {
... ...
@@ -331,8 +331,21 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
331 331
                                                                   colData = dplyr::filter(GRN@data$metadata, .data$has_both) %>% tibble::column_to_rownames("sampleID"),
332 332
                                                                   design = ~1)
333 333
     } else {
334
+        
335
+      # Store as sparse matrix if enough 0s
336
+      rowSample = min(100, nrow(counts_peaks))
337
+      zeroEntries = length(which(counts_peaks[1:rowSample, GRN@config$sharedSamples] %>% unlist(use.names = FALSE)  == 0))
338
+      fractionZero = zeroEntries / (rowSample * length(GRN@config$sharedSamples)) 
339
+      if (fractionZero> 0.1) {
340
+          futile.logger::flog.info(paste0("Storing GRN@data$peaks$counts_orig matrix as sparse matrix because fraction of 0s is > 0.1 (", fractionZero, ")"))
341
+          GRN@data$peaks$counts_orig = .asSparseMatrix(as.matrix(counts_peaks %>% dplyr::select(-.data$peakID)), 
342
+                                                       convertNA_to_zero = FALSE, 
343
+                                                       dimnames = list(counts_peaks$peakID, colnames(GRN@config$sharedSamples)))
344
+      } else {
345
+          GRN@data$peaks$counts_orig = as.matrix(counts_peaks[, GRN@config$sharedSamples])
346
+      }
334 347
       
335
-      GRN@data$peaks$counts_orig = as.matrix(counts_peaks[, GRN@config$sharedSamples])
348
+     
336 349
     }
337 350
     
338 351
     if (isIntegerMatrix(counts_rna[, GRN@config$sharedSamples])) {
... ...
@@ -340,17 +353,30 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
340 353
                                                                 colData = dplyr::filter(GRN@data$metadata, .data$has_both) %>% tibble::column_to_rownames("sampleID"),
341 354
                                                                 design = ~1)   
342 355
     } else {
343
-      GRN@data$RNA$counts_orig = as.matrix(counts_rna[, GRN@config$sharedSamples])
356
+      
357
+      
358
+      # Store as sparse matrix if enough 0s
359
+      rowSample = min(100, nrow(counts_rna))
360
+      zeroEntries = length(which(counts_rna[1:rowSample, GRN@config$sharedSamples] %>% unlist(use.names = FALSE)  == 0))
361
+      fractionZero = zeroEntries / (rowSample * length(GRN@config$sharedSamples)) 
362
+      if (fractionZero> 0.1) {
363
+          futile.logger::flog.info(paste0("Storing GRN@data$RNA$counts_orig matrix as sparse matrix because fraction of 0s is > 0.1 (", fractionZero, ")"))
364
+          GRN@data$RNA$counts_orig = .asSparseMatrix(as.matrix(counts_rna %>% dplyr::select(-.data$ENSEMBL)), 
365
+                                                       convertNA_to_zero = FALSE, 
366
+                                                       dimnames = list(counts_rna$ENSEMBL, colnames(GRN@config$sharedSamples)))
367
+      } else {
368
+          GRN@data$RNA$counts_orig = as.matrix(counts_rna[, GRN@config$sharedSamples])
369
+      }
344 370
     }
345 371
     
346
-    
372
+    futile.logger::flog.info(paste0("Creating consensus peaks and check for overlapping peaks..."))
347 373
     
348 374
     # Consensus peaks
349 375
     GRN@data$peaks$consensusPeaks = .createConsensusPeaksDF(getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE)) 
350 376
     GRN@data$peaks$consensusPeaks = GRN@data$peaks$consensusPeaks[match(getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE)$peakID, GRN@data$peaks$consensusPeaks$peakID), ]
351 377
     stopifnot(c("chr", "start", "end", "peakID", "isFiltered") %in% colnames(GRN@data$peaks$consensusPeaks))
352 378
     
353
-    futile.logger::flog.info(paste0("Check for overlapping peaks..."))
379
+    
354 380
     
355 381
     # Assume 0-based exclusive format, see https://arnaudceol.wordpress.com/2014/09/18/chromosome-coordinate-systems-0-based-1-based/ and http://genome.ucsc.edu/FAQ/FAQformat.html#format1 for details
356 382
     consensus.gr   = .constructGRanges(GRN@data$peaks$consensusPeaks, seqlengths = .getChrLengths(GRN@config$parameters$genomeAssembly), GRN@config$parameters$genomeAssembly, zeroBased = TRUE)
... ...
@@ -381,6 +407,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
381 407
     }
382 408
     
383 409
   }
410
+  futile.logger::flog.info(paste0("Adding peak and gene annotation..."))
384 411
   
385 412
   # Add peak annotation once
386 413
   GRN = .populatePeakAnnotation(GRN)
... ...
@@ -632,7 +659,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
632 659
   futile.logger::flog.info(paste0("Calculate GC-content for peaks. This may take a while"))
633 660
   start = Sys.time()
634 661
   genomeAssembly = GRN@config$parameters$genomeAssembly
635
-  #TODO: GC content for all peaks
662
+
636 663
   genome = .getGenomeObject(genomeAssembly, type = "BSgenome")
637 664
   
638 665
   # Get peaks as GRanges object
... ...
@@ -1351,7 +1378,7 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) {
1351 1378
   
1352 1379
   peak_TF_overlapCur.df = .asMatrixFromSparse(GRN@data$TFs$TF_peak_overlap, convertZero_to_NA = FALSE) %>% 
1353 1380
     tibble::as_tibble() %>%
1354
-    dplyr::filter(!.data$isFiltered) %>% 
1381
+    dplyr::filter(!.data$isFiltered) %>%  # Works because 1 / 0 is interpreted here as logical and not 1/0
1355 1382
     dplyr::select(-.data$isFiltered) 
1356 1383
   
1357 1384
   if (perm > 0 & shuffle) {
... ...
@@ -1401,6 +1428,7 @@ addData_TFActivity <- function(GRN, normalization = "cyclicLoess", name = "TF_ac
1401 1428
     counts.df = getCounts(GRN, type = "peaks", norm = FALSE, permuted = FALSE) %>%
1402 1429
       tibble::as_tibble()
1403 1430
     
1431
+    # TODO replace by getter
1404 1432
     countsPeaks = .normalizeNewDataWrapper(GRN@data$peaks$counts_orig, normalization = normalization)
1405 1433
     
1406 1434
     stopifnot(identical(nrow(countsPeaks), nrow(GRN@data$TFs$TF_peak_overlap)))
... ...
@@ -2044,7 +2072,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
2044 2072
                                      corMethod,
2045 2073
                                      whitespacePrefix = "  ")
2046 2074
     
2047
-    # TODO:Check here for the TFs BMAL1.0.A
2075
+
2048 2076
     allTF = intersect(colnames(peak_TF_overlap.df), colnames(peaksCor.m))
2049 2077
     checkmate::assertIntegerish(length(allTF), lower = 1)
2050 2078
     
... ...
@@ -2053,7 +2081,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails
2053 2081
     if (length(allTF) < ncol(peak_TF_overlap.df) | length(allTF) < ncol(peaksCor.m)) {
2054 2082
       
2055 2083
       TF_missing = setdiff(colnames(peak_TF_overlap.df), colnames(peaksCor.m))
2056
-      if (length(TF_missing) > 0) futile.logger::flog.info(paste0("  Skip the following ", length(TF_missing), " TF due to missing dataor because they are marked as filtered: ", paste0(TF_missing, collapse = ",")))
2084
+      if (length(TF_missing) > 0) futile.logger::flog.info(paste0("  Skip the following ", length(TF_missing), " TF due to missing data or because they are marked as filtered: ", paste0(TF_missing, collapse = ",")))
2057 2085
     }
2058 2086
     
2059 2087
     stopifnot(nrow(peaksCor.m) == nrow(peak_TF_overlap.df))
... ...
@@ -3755,8 +3783,8 @@ add_TF_gene_correlation <- function(GRN, corMethod = "pearson", addRobustRegress
3755 3783
         }
3756 3784
         
3757 3785
       } else {
3758
-        message =paste0(" Nothing to do, skip.")
3759
-        .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)
3786
+        futile.logger::flog.info(paste0(" Nothing to do, skip."))
3787
+       
3760 3788
           
3761 3789
         if (addRobustRegression) {
3762 3790
           res.df = tibble::tribble(~TF.name, ~TF.ENSEMBL, ~gene.ENSEMBL, ~TF_gene.r, ~TF_gene.p_raw, ~TF_gene.p_raw.robust, 
... ...
@@ -4277,7 +4305,9 @@ loadExampleObject <- function(forceDownload = FALSE, fileURL = "https://www.embl
4277 4305
 #' 
4278 4306
 #' @template GRN 
4279 4307
 #' @param type Character. Either \code{peaks} or \code{rna}. \code{peaks} corresponds to the counts for the open chromatin data, while \code{rna} refers to th RNA-seq counts. If set to \code{rna}, both permuted and non-permuted data can be retrieved, while for \code{peaks}, only the non-permuted one (i.e., 0) can be retrieved.
4280
-#' @param norm Logical. \code{TRUE} or \code{FALSE}. Should original (often raw, but this may not necessarily be the case) or normalized counts be returned?
4308
+#' @param norm Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should original (often raw, but this may not necessarily be the case) or normalized counts be returned?
4309
+#' @param asMatrix Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. If set to \code{FALSE}, counts are returned as a data frame with or without an ID column (see \code{includeIDColumn}). If set to \code{TRUE}, counts are returned as a matrix with the ID column as row names.
4310
+#' @param includeIDColumn Logical. \code{TRUE} or \code{FALSE}. Default \code{TRUE}. Only relevant if \code{asMatrix = FALSE}. If set to \code{TRUE}, an explicit ID column is returned (no row names). If set to \code{FALSE}, the IDs are in the row names instead.
4281 4311
 #' @template permuted
4282 4312
 #' @export
4283 4313
 #' @import tibble
... ...
@@ -4287,7 +4317,8 @@ loadExampleObject <- function(forceDownload = FALSE, fileURL = "https://www.embl
4287 4317
 #' counts.df = getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE)
4288 4318
 #' @return Data frame of counts, with the type as indicated by the function parameters. This function does **NOT** return a \code{\linkS4class{GRN}} object.
4289 4319
 
4290
-getCounts <- function(GRN, type, norm, permuted = FALSE) {
4320
+# TODO document and implement two new arguments here
4321
+getCounts <- function(GRN, type, norm = FALSE, permuted = FALSE, asMatrix = FALSE, includeIDColumn = TRUE) {
4291 4322
   
4292 4323
   checkmate::assertClass(GRN, "GRN")
4293 4324
   GRN = .addFunctionLogToObject(GRN)     
... ...
@@ -4307,36 +4338,50 @@ getCounts <- function(GRN, type, norm, permuted = FALSE) {
4307 4338
         result = GRN@data$peaks$counts_norm
4308 4339
         
4309 4340
       } else {
4310
-        # Handle case when this is null due to already norm. counts as input
4311
-        countsOrig = GRN@data$peaks$counts_orig
4312
-        if (checkmate::testClass(countsOrig, "DESeqDataSet")) {
4313
-          result = DESeq2::counts(countsOrig, norm = FALSE) %>% as.data.frame() %>% tibble::rownames_to_column("peakID")
4314
-        } else {
4315
-          result = countsOrig %>% dplyr::select(-.data$isFiltered) %>% as.data.frame()
4341
+            # Handle case when this is null due to already norm. counts as input
4342
+          
4343
+           # TODO: Unify the annotation column 
4344
+            classPeaks = class(GRN@data$peaks$counts_orig)
4345
+            if ("DESeqDataSet" %in% classPeaks) {
4346
+              result = DESeq2::counts(GRN@data$peaks$counts_orig, norm = FALSE) %>% as.data.frame() %>% tibble::rownames_to_column("peakID")
4347
+            } else if ("matrix" %in% classPeaks) {
4348
+                result = GRN@data$peaks$counts_orig %>% dplyr::select(-.data$isFiltered) %>% as.data.frame()
4349
+            } else if ("dgCMatrix" %in% classPeaks) {
4350
+                result = .asMatrixFromSparse(GRN@data$peaks$counts_orig, convertZero_to_NA = FALSE) %>% dplyr::select(-.data$isFiltered) %>% as.data.frame()
4351
+            } else {
4352
+                message = paste0("Unsupported class for GRN@data$peaks$counts_orig. Contact the authors.")
4353
+                .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4354
+            }
4355
+            
4316 4356
         }
4317
-        
4318
-      }
4319
-      
4357
+
4320 4358
       
4321 4359
     } else if (type == "rna") {
4322 4360
       
4323 4361
       if (norm) {
4324
-        result = GRN@data$RNA$counts_norm.l[[permIndex]]
4362
+            result = GRN@data$RNA$counts_norm.l[[permIndex]]
4325 4363
         
4326 4364
       } else {
4327
-        
4328
-        # Handle case when this is null due to already norm. counts as input
4329
-        countsOrig = GRN@data$RNA$counts_orig
4330
-        if (checkmate::testClass(countsOrig, "DESeqDataSet")) {
4331
-          result = DESeq2::counts(countsOrig, norm = FALSE) %>% as.data.frame() %>% tibble::rownames_to_column("ENSEMBL")
4332
-        } else {
4333
-          result = countsOrig %>% dplyr::select(-.data$isFiltered) %>% as.data.frame()
4334
-          # TODO: isFiltered column?
4335
-        }
4365
+            # TODO: Unify the annotation column 
4366
+            classRNA = class(GRN@data$RNA$counts_orig)
4367
+            if ("DESeqDataSet" %in% classRNA) {
4368
+                result = DESeq2::counts(GRN@data$RNA$counts_orig, norm = FALSE) %>% 
4369
+                    as.data.frame() %>% tibble::rownames_to_column("ENSEMBL")
4370
+            } else if ("matrix" %in% classRNA) {
4371
+                result = GRN@data$RNA$counts_orig %>% 
4372
+                    dplyr::select(-.data$isFiltered) %>% as.data.frame()
4373
+            } else if ("dgCMatrix" %in% classRNA) {
4374
+                result = .asMatrixFromSparse(GRN@data$RNA$counts_orig, convertZero_to_NA = FALSE) %>% 
4375
+                    dplyr::select(-.data$isFiltered) %>% as.data.frame() %>% tibble::rownames_to_column("ENSEMBL")
4376
+            } else {
4377
+                message = paste0("Unsupported class for GRN@data$RNA$counts_orig. Contact the authors.")
4378
+                .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4379
+            }
4336 4380
         
4337 4381
       }
4338
-      
4382
+        
4339 4383
     }
4384
+      
4340 4385
   } else { # permutation 1
4341 4386
     
4342 4387
     if (type == "peaks") {
... ...
@@ -4540,7 +4585,7 @@ getGRNConnections <- function(GRN, type = "all.filtered",  permuted = FALSE,
4540 4585
     }
4541 4586
   } else {
4542 4587
     
4543
-    # TODO: Re-create the output folder here nd adjust to the OS-specific path separator, do not rely on what is stored in the object
4588
+    #  Re-create the output folder here and adjust to the OS-specific path separator, do not rely on what is stored in the object
4544 4589
     if (.Platform$OS.type == "windows") {
4545 4590
       GRN@config$directories$output_plots = gsub('/', ('\\'), GRN@config$directories$output_plots, fixed = TRUE)
4546 4591
     } else {
... ...
@@ -627,14 +627,21 @@
627 627
 
628 628
 
629 629
 isIntegerMatrix <- function(df) {
630
-  
631
-  res = all.equal(unlist(df), as.integer(unlist(df)), check.attributes = FALSE)
632
-  
633
-  if (is.logical(res)) {
634
-    return(TRUE)
630
+    
631
+  # Quick test first: test only first row.
632
+  res = all.equal(unlist(df[1,]), as.integer(unlist(df[1,])), check.attributes = FALSE)  
633
+  if (!is.logical(res)) {
634
+      return(FALSE)
635 635
   } else {
636
-    return(FALSE)
636
+      res = all.equal(unlist(df), as.integer(unlist(df)), check.attributes = FALSE)
637
+      if (is.logical(res)) {
638
+          return(TRUE)
639
+      } else {
640
+          return(FALSE)
641
+      }
637 642
   }
643
+
644
+  
638 645
 }
639 646
 
640 647
 .findSuitableColumnsToPlot <- function(metadataTable, remove1LevelFactors = TRUE, verbose = TRUE) {
... ...
@@ -10,6 +10,7 @@
10 10
 #' @param directed \code{TRUE} or \code{FALSE}.  Default \code{FALSE}. Should the network be directed?
11 11
 #' @template forceRerun
12 12
 #' @export
13
+#' @seealso \code{\link{filterGRNAndConnectGenes}}
13 14
 #' @examples 
14 15
 #' # See the Workflow vignette on the GRaNIE website for examples
15 16
 #' GRN = loadExampleObject()
... ...
@@ -3340,7 +3340,7 @@ plotTFEnrichment <- function(GRN, rankType = "degree", n = NULL, TF.names = NULL
3340 3340
 
3341 3341
 #' Visualize a filtered eGRN in a flexible manner. 
3342 3342
 #' 
3343
-#' This function can visualize a filtered eGRN in a very flexible manner and requires a filtered set of connections in the \code{\linkS4class{GRN}} object as generated by \code{\link{filterGRNAndConnectGenes}}. 
3343
+#' This function can visualize a filtered eGRN in a very flexible manner and requires a \code{\linkS4class{GRN}} object as generated by \code{\link{build_eGRN_graph}}. 
3344 3344
 #'
3345 3345
 #' @template GRN 
3346 3346
 #' @template outputFolder
... ...
@@ -3360,7 +3360,7 @@ plotTFEnrichment <- function(GRN, rankType = "degree", n = NULL, TF.names = NULL
3360 3360
 #' @param vertexLabel_cex Numeric. Default \code{0.4}. Font size (multiplication factor, device-dependent)
3361 3361
 #' @param vertexLabel_dist Numeric. Default \code{0} vertex. Distance between the label and the vertex.
3362 3362
 #' @template forceRerun
3363
-#' @seealso \code{\link{filterGRNAndConnectGenes}}
3363
+#' @seealso \code{\link{build_eGRN_graph}}
3364 3364
 #' @examples
3365 3365
 #' GRN = loadExampleObject()
3366 3366
 #' GRN = visualizeGRN(GRN, maxRowsToPlot = 700, graph = "TF-gene", colorby = "type")
... ...
@@ -3368,7 +3368,8 @@ plotTFEnrichment <- function(GRN, rankType = "degree", n = NULL, TF.names = NULL
3368 3368
 #' @export
3369 3369
 visualizeGRN <- function(GRN, outputFolder = NULL,  basenameOutput = NULL, plotAsPDF = TRUE, pdf_width = 12, pdf_height = 12,
3370 3370
                          title = NULL, maxRowsToPlot = 500, nCommunitiesMax = 8, graph = "TF-gene" , colorby = "type", layout = "fr",
3371
-                         vertice_color_TFs = list(h = 10, c = 85, l = c(25, 95)), vertice_color_peaks = list(h = 135, c = 45, l = c(35, 95)), vertice_color_genes = list(h = 260, c = 80, l = c(30, 90)),
3371
+                         vertice_color_TFs = list(h = 10, c = 85, l = c(25, 95)), vertice_color_peaks = list(h = 135, c = 45, l = c(35, 95)), 
3372
+                         vertice_color_genes = list(h = 260, c = 80, l = c(30, 90)),
3372 3373
                          vertexLabel_cex = 0.4, vertexLabel_dist = 0, forceRerun = FALSE
3373 3374
 ) {
3374 3375
     
... ...
@@ -3404,6 +3405,11 @@ visualizeGRN <- function(GRN, outputFolder = NULL,  basenameOutput = NULL, plotA
3404 3405
     
3405 3406
     outputFolder = .checkOutputFolder(GRN, outputFolder)
3406 3407
     
3408
+    if (is.null(GRN@graph$TF_gene) | is.null(GRN@graph$TF_peak_gene)) {
3409
+        message = paste0("Could not find information in the graph slot. Make sure you run the function build_eGRN_graph")
3410
+        .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
3411
+    }
3412
+    
3407 3413
     metadata_visualization.l = getBasic_metadata_visualization(GRN)
3408 3414
     # if (useDefaultMetadata) {
3409 3415
     #   metadata_visualization.l = getBasic_metadata_visualization(GRN)
... ...
@@ -3522,7 +3528,7 @@ visualizeGRN <- function(GRN, outputFolder = NULL,  basenameOutput = NULL, plotA
3522 3528
         colors_categories.l[["TF"]]  = c(color_gradient[1], color_gradient[nBins_real]) 
3523 3529
         colors_categories.l[["TF"]]  = color_gradient 
3524 3530
         symbols_categories.l[["TF"]] = c(15,NA,15)
3525
-        vertice_color_TFs   = append(list(metadata_visualization.l[["RNA_expression_TF"]],    "HOCOID",     "baseMean_log"), vertice_color_TFs)
3531
+        vertice_color_TFs   = append(list(metadata_visualization.l[["RNA_expression_TF"]],    "TF.HOCOID",     "baseMean_log"), vertice_color_TFs)
3526 3532
         #}
3527 3533
         
3528 3534
         if(graph == "TF-peak-gene"){
... ...
@@ -9,7 +9,7 @@ Genetic variants associated with diseases often affect non-coding regions, thus
9 9
 }
10 10
 \section{Package functions}{
11 11
 
12
-See the Vignettes for a workflow example and more generally \url{https://grp-zaugg.embl-community.io/GRaNIE/articles/} for all project-related information.
12
+See the Vignettes for a workflow example and more generally \url{https://grp-zaugg.embl-community.io/GRaNIE} for all project-related information.
13 13
 }
14 14
 
15 15
 \section{GRN object}{
... ...
@@ -37,3 +37,6 @@ This function requires a filtered set of connections in the \code{\linkS4class{G
37 37
 GRN = loadExampleObject()
38 38
 GRN = build_eGRN_graph(GRN, forceRerun = FALSE)
39 39
 }
40
+\seealso{
41
+\code{\link{filterGRNAndConnectGenes}}
42
+}
... ...
@@ -4,16 +4,27 @@
4 4
 \alias{getCounts}
5 5
 \title{Get counts for the various data defined in a \code{\linkS4class{GRN}} object}
6 6
 \usage{
7
-getCounts(GRN, type, norm, permuted = FALSE)
7
+getCounts(
8
+  GRN,
9
+  type,
10
+  norm = FALSE,
11
+  permuted = FALSE,
12
+  asMatrix = FALSE,
13
+  includeIDColumn = TRUE
14
+)
8 15
 }
9 16
 \arguments{
10 17
 \item{GRN}{Object of class \code{\linkS4class{GRN}}}
11 18
 
12 19
 \item{type}{Character. Either \code{peaks} or \code{rna}. \code{peaks} corresponds to the counts for the open chromatin data, while \code{rna} refers to th RNA-seq counts. If set to \code{rna}, both permuted and non-permuted data can be retrieved, while for \code{peaks}, only the non-permuted one (i.e., 0) can be retrieved.}
13 20
 
14
-\item{norm}{Logical. \code{TRUE} or \code{FALSE}. Should original (often raw, but this may not necessarily be the case) or normalized counts be returned?}
21
+\item{norm}{Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should original (often raw, but this may not necessarily be the case) or normalized counts be returned?}
15 22
 
16 23
 \item{permuted}{\code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should the permuted data be taken (\code{TRUE}) or the non-permuted, original one (\code{FALSE})?}
24
+
25
+\item{asMatrix}{Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. If set to \code{FALSE}, counts are returned as a data frame with or without an ID column (see \code{includeIDColumn}). If set to \code{TRUE}, counts are returned as a matrix with the ID column as row names.}
26
+
27
+\item{includeIDColumn}{Logical. \code{TRUE} or \code{FALSE}. Default \code{TRUE}. Only relevant if \code{asMatrix = FALSE}. If set to \code{TRUE}, an explicit ID column is returned (no row names). If set to \code{FALSE}, the IDs are in the row names instead.}
17 28
 }
18 29
 \value{
19 30
 Data frame of counts, with the type as indicated by the function parameters. This function does **NOT** return a \code{\linkS4class{GRN}} object.
... ...
@@ -38,9 +38,9 @@ plotCommunitiesEnrichment(
38 38
 
39 39
 \item{p}{Numeric. Default 0.05. p-value threshold to determine significance.}
40 40
 
41
-\item{nSignificant}{Numeric >0. Default 3. Threshold to filter out an ontology term with less than \code{nSignificant} overlapping genes.}
41
+\item{nSignificant}{Numeric > 0. Default 3. Threshold to filter out an ontology term with less than \code{nSignificant} overlapping genes.}
42 42
 
43
-\item{nID}{Numeric >0. Default 10. For the reduced summary heatmap, number of top terms to select per community / for the general enrichment.}
43
+\item{nID}{Numeric > 0. Default 10. For the reduced summary heatmap, number of top terms to select per community / for the general enrichment.}
44 44
 
45 45
 \item{maxWidth_nchar_plot}{Integer (>=10). Default 50. Maximum number of characters for a term before it is truncated.}
46 46
 
... ...
@@ -29,7 +29,7 @@ plotDiagnosticPlots_TFPeaks(
29 29
 
30 30
 \item{dataType}{Character vector. One of, or both of, \code{"real"} or \code{"permuted"}. For which data type, real or permuted data, to produce the diagnostic plots?}
31 31
 
32
-\item{nTFMax}{\code{NULL} or Integer. Default \code{NULL}. Maximum number of TFs to process. Can be used for testing purposes by setting this to a small number i(.e., 10)}
32
+\item{nTFMax}{\code{NULL} or Integer > 0. Default \code{NULL}. Maximum number of TFs to process. Can be used for testing purposes by setting this to a small number i(.e., 10)}
33 33
 
34 34
 \item{plotAsPDF}{\code{TRUE} or \code{FALSE}. Default \code{TRUE}.Should the plots be printed to a PDF file? If set to \code{TRUE}, a PDF file is generated, the name of which depends on the value of \code{basenameOutput}. If set to \code{FALSE}, all plots are printed to the currently active device. Note that most functions print more than one plot, which means you may only see the last plot depending on your active graphics device.}
35 35
 
... ...
@@ -37,9 +37,9 @@ plotTFEnrichment(
37 37
 
38 38
 \item{p}{Numeric. Default 0.05. p-value threshold to determine significance.}
39 39
 
40
-\item{nSignificant}{Numeric >0. Default 3. Threshold to filter out an ontology term with less than \code{nSignificant} overlapping genes.}
40
+\item{nSignificant}{Numeric > 0. Default 3. Threshold to filter out an ontology term with less than \code{nSignificant} overlapping genes.}
41 41
 
42
-\item{nID}{Numeric >0. Default 10. For the reduced summary heatmap, number of top terms to select per community / for the general enrichment.}
42
+\item{nID}{Numeric > 0. Default 10. For the reduced summary heatmap, number of top terms to select per community / for the general enrichment.}
43 43
 
44 44
 \item{display_pAdj}{\code{TRUE} or \code{FALSE}. Default \code{FALSE}. Is the p-value being displayed in the plots the adjusted p-value? This parameter is relevant for KEGG, Disease Ontology, and Reactome enrichments, and does not affect GO enrichments.}
45 45
 
... ...
@@ -66,12 +66,12 @@ visualizeGRN(
66 66
 The same \code{\linkS4class{GRN}} object, without modifications.
67 67
 }
68 68
 \description{
69
-This function can visualize a filtered eGRN in a very flexible manner and requires a filtered set of connections in the \code{\linkS4class{GRN}} object as generated by \code{\link{filterGRNAndConnectGenes}}.
69
+This function can visualize a filtered eGRN in a very flexible manner and requires a \code{\linkS4class{GRN}} object as generated by \code{\link{build_eGRN_graph}}.
70 70
 }
71 71
 \examples{
72 72
 GRN = loadExampleObject()
73 73
 GRN = visualizeGRN(GRN, maxRowsToPlot = 700, graph = "TF-gene", colorby = "type")
74 74
 }
75 75
 \seealso{
76
-\code{\link{filterGRNAndConnectGenes}}
76
+\code{\link{build_eGRN_graph}}
77 77
 }
... ...
@@ -269,7 +269,7 @@ For more parameter details, see the R help (`?filterData`).
269 269
 
270 270
 ```{r filterData, echo=TRUE, include=TRUE, eval = TRUE, class.output="scroll-200", results='hold'}
271 271
 # Chromosomes to keep for peaks. This should be a vector of chromosome names
272
-chrToKeep_peaks = c(paste0("chr", 1:22), "chrX", "chrY")
272
+chrToKeep_peaks = c(paste0("chr", 1:22), "chrX")
273 273
 GRN = filterData(GRN, minNormalizedMean_peaks = 5, minNormalizedMeanRNA = 1, chrToKeep_peaks = chrToKeep_peaks, maxSize_peaks = 10000, forceRerun = TRUE)
274 274
 ```
275 275
 We can see from the output that no peaks have been filtered due to their size and almost 11,000 have been filtered due to their small mean read counts, which collectively leaves around 64,000 peaks out of 75,000 originally. For the RNA data, almost half of the data has been filtered (16,211 out of around 35,000 genes).