Browse code

Bugfix for .makeObjectCompatible

Christian Arnold authored on 14/10/2022 12:45:16
Showing8 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: GRaNIE
2 2
 Title: GRaNIE: Reconstruction cell type specific gene regulatory networks including enhancers using chromatin accessibility and RNA-seq data
3
-Version: 1.1.20
3
+Version: 1.1.21
4 4
 Encoding: UTF-8
5 5
 Authors@R: c(person("Christian", "Arnold", email =
6 6
         "chrarnold@web.de", role = c("cre","aut")),
... ...
@@ -1,4 +1,4 @@
1
-# GRaNIE 1.1.14 to 1.1.20 (2022-12-13)
1
+# GRaNIE 1.1.14 to 1.1.21 (2022-12-13)
2 2
 
3 3
 ## Major changes
4 4
 - major object changes and optimizations, particularly related to storing the count matrices in an optimized and simpler format. In short, the count matrices are now stored either as normal or sparse matrices, depending on the amount of zeros present. In addition, only the counts after normalization are saved, the raw counts before applying normalization are not stored anymore. If no normalization is wished by the user, as before, the "normalized" counts are equal to the raw counts. `GRaNIE` is now more readily applicable for larger analyses and single-cell analysis even though we just started actively optimizing for it, so we cannot yet recommend applying our framework in a single-cell manner. Older GRN objects are automatically changed internally when executing the major functions upon the first invocation.
... ...
@@ -315,14 +315,16 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
315 315
     GRN@data$peaks$counts = .storeAsMatrixOrSparseMatrix(GRN, df = data.l[["peaks"]], ID_column = "peakID", slotName = "GRN@data$peaks$counts")
316 316
 
317 317
     # includeFiltered = TRUE here as it doesnt make a difference and because getCounts requires counts_metadata$isFiltered to be set already
318
-    GRN@data$peaks$counts_metadata = .createConsensusPeaksDF(getCounts(GRN, type = "peaks", permuted = FALSE, asMatrix = FALSE, includeFiltered = TRUE)) 
318
+    GRN@data$peaks$counts_metadata = .createConsensusPeaksDF(rownames(GRN@data$peaks$counts)) 
319 319
     stopifnot(c("chr", "start", "end", "peakID", "isFiltered") %in% colnames(GRN@data$peaks$counts_metadata))
320 320
     
321 321
     GRN@data$RNA$counts   = .storeAsMatrixOrSparseMatrix(GRN, df =  data.l[["RNA"]], ID_column = "ENSEMBL", slotName = "GRN@data$RNA$counts")
322 322
       
323 323
     GRN@data$RNA$counts_metadata = tibble::tibble(ID = data.l[["RNA"]]$ENSEMBL, isFiltered = FALSE)
324 324
     
325
-    GRN@data$RNA$counts_permuted_index = .shuffleColumns(countsRNA.norm.df %>% dplyr::select(-.data$ENSEMBL), .getMaxPermutation(GRN), returnUnshuffled = FALSE, returnAsList = FALSE)
325
+    GRN@data$RNA$counts_permuted_index = sample.int(ncol(GRN@data$RNA$counts), ncol(GRN@data$RNA$counts))
326
+    
327
+    
326 328
     
327 329
     futile.logger::flog.info(paste0( "Final dimensions of data:"))
328 330
     futile.logger::flog.info(paste0( " RNA  : ", nrow(countsRNA.norm.df)  , " x ", ncol(countsRNA.norm.df)   - 2, " (rows x columns)"))
... ...
@@ -402,11 +404,10 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor",
402 404
     
403 405
 }
404 406
 
405
-.createConsensusPeaksDF <- function(countsPeaks, idColumn = "peakID") {
407
+.createConsensusPeaksDF <- function(peakIDs) {
406 408
   
407
-  checkmate::assertChoice(idColumn, colnames(countsPeaks))
408
-  
409
-  ids.split = strsplit(countsPeaks %>% dplyr::pull(!!(idColumn)), split = "[:-]+")
409
+
410
+  ids.split = strsplit(peakIDs, split = "[:-]+")
410 411
   ids.split.length = sapply(ids.split, length)
411 412
   if (!all(ids.split.length == 3)) {
412 413
     message = paste0(" At least one of the IDs in the peaks data has an unsupported format. Make sure all peakIDs are in the format \"chr:start-end\"")
... ...
@@ -4182,8 +4183,8 @@ generateStatsSummary <- function(GRN,
4182 4183
 }
4183 4184
 
4184 4185
 
4186
+####### Retrieve GRN objects or parts of it #########
4185 4187
 
4186
-####### Misc functions #########
4187 4188
 
4188 4189
 #' Load example GRN dataset
4189 4190
 #' 
... ...
@@ -4199,58 +4200,58 @@ generateStatsSummary <- function(GRN,
4199 4200
 #' GRN = loadExampleObject()
4200 4201
 #' @return An small example \code{\linkS4class{GRN}} object
4201 4202
 loadExampleObject <- function(forceDownload = FALSE, fileURL = "https://www.embl.de/download/zaugg/GRaNIE/GRN.rds") {
4202
-  
4203
-  checkmate::assertFlag(forceDownload)
4204
-  options(timeout=200)
4205 4203
     
4206
-  if (!is.installed("BiocFileCache")) {
4207
-      
4208
-      message = paste0("The package BiocFileCache is not installed, but recommended if you want to speed-up the retrieval of the GRN example object ",
4209
-      "via this function when using it multiple times. If not installed, the example object has to be downloaded anew every time you use this function.")
4210
-      .checkPackageInstallation("BiocFileCache", message, isWarning = TRUE)
4211
-      
4212
-      GRN = readRDS(url(fileURL))
4213
-
4214
-  } else {
4215
-      
4216
-      
4217
-      # Taken and modified from https://www.bioconductor.org/packages/release/bioc/vignettes/BiocFileCache/inst/doc/BiocFileCache.html
4218
-      
4219
-      bfc <- .get_cache()
4220
-      
4221
-      rid <- BiocFileCache::bfcquery(bfc, "GRaNIE_object_example")$rid
4222
-      if (!length(rid)) {
4223
-          rid <- names(BiocFileCache::bfcadd(bfc, "GRaNIE_object_example", fileURL))
4224
-      }
4225
-      if (!isFALSE(BiocFileCache::bfcneedsupdate(bfc, rid)) | forceDownload) {
4226
-          messageStr = paste0("Downloading GRaNIE example object from ", fileURL)
4227
-          message(messageStr)
4228
-          filePath = BiocFileCache::bfcdownload(bfc, rid, ask = FALSE)
4229
-      }
4230
-      
4231
-      
4232
-      filePath = BiocFileCache::bfcrpath(bfc, rids = rid)
4233
-      
4234
-      # Now we can read in the locally stored file
4235
-      GRN = readRDS(filePath)
4236
-  }
4237
-
4238
-  
4239
-  # Change the default path to the current working directory
4240
-  GRN@config$directories$outputRoot = "."
4241
-  GRN@config$directories$output_plots = "."
4242
-  GRN@config$directories$motifFolder = "."
4243
-  GRN@config$files$output_log = "GRN.log"
4244
-  
4245
-  GRN = .makeObjectCompatible(GRN)
4246
-  
4247
-  # TODO remove once done already in the object
4248
-  GRN = add_TF_gene_correlation(GRN)
4249
-  
4250
-  # GRN = add_featureVariation(GRN, formula = "auto", metadata = "mt_frac")
4251
-  
4252
-  GRN
4253
-  
4204
+    checkmate::assertFlag(forceDownload)
4205
+    options(timeout=200)
4206
+    
4207
+    if (!is.installed("BiocFileCache")) {
4208
+        
4209
+        message = paste0("The package BiocFileCache is not installed, but recommended if you want to speed-up the retrieval of the GRN example object ",
4210
+                         "via this function when using it multiple times. If not installed, the example object has to be downloaded anew every time you use this function.")
4211
+        .checkPackageInstallation("BiocFileCache", message, isWarning = TRUE)
4212
+        
4213
+        GRN = readRDS(url(fileURL))
4214
+        
4215
+    } else {
4216
+        
4217
+        
4218
+        # Taken and modified from https://www.bioconductor.org/packages/release/bioc/vignettes/BiocFileCache/inst/doc/BiocFileCache.html
4219
+        
4220
+        bfc <- .get_cache()
4221
+        
4222
+        rid <- BiocFileCache::bfcquery(bfc, "GRaNIE_object_example")$rid
4223
+        if (!length(rid)) {
4224
+            rid <- names(BiocFileCache::bfcadd(bfc, "GRaNIE_object_example", fileURL))
4225
+        }
4226
+        if (!isFALSE(BiocFileCache::bfcneedsupdate(bfc, rid)) | forceDownload) {
4227
+            messageStr = paste0("Downloading GRaNIE example object from ", fileURL)
4228
+            message(messageStr)
4229
+            filePath = BiocFileCache::bfcdownload(bfc, rid, ask = FALSE)
4230
+        }
4231
+        
4232
+        
4233
+        filePath = BiocFileCache::bfcrpath(bfc, rids = rid)
4234
+        
4235
+        # Now we can read in the locally stored file
4236
+        GRN = readRDS(filePath)
4237
+    }
4238
+    
4239
+    
4240
+    # Change the default path to the current working directory
4241
+    GRN@config$directories$outputRoot = "."
4242
+    GRN@config$directories$output_plots = "."
4243
+    GRN@config$directories$motifFolder = "."
4244
+    GRN@config$files$output_log = "GRN.log"
4245
+    
4246
+    GRN = .makeObjectCompatible(GRN)
4247
+    
4248
+    # TODO remove once done already in the object
4249
+    GRN = add_TF_gene_correlation(GRN)
4250
+    
4251
+    # GRN = add_featureVariation(GRN, formula = "auto", metadata = "mt_frac")
4252
+    
4253
+    GRN
4254
+    
4254 4255
 }
4255 4256
 
4256 4257
 
... ...
@@ -4273,90 +4274,90 @@ loadExampleObject <- function(forceDownload = FALSE, fileURL = "https://www.embl
4273 4274
 #' counts.df = getCounts(GRN, type = "peaks", permuted = FALSE)
4274 4275
 #' @return Data frame of counts, with the type as indicated by the function parameters. This function does **NOT** return a \code{\linkS4class{GRN}} object.
4275 4276
 getCounts <- function(GRN, type,  permuted = FALSE, asMatrix = FALSE, includeIDColumn = TRUE, includeFiltered = FALSE) {
4276
-  
4277
-  checkmate::assertClass(GRN, "GRN")
4278
-  GRN = .addFunctionLogToObject(GRN)     
4279
-  
4280
-  GRN = .makeObjectCompatible(GRN)
4281
-  
4282
-  checkmate::assertChoice(type, c("peaks", "rna"))
4283
-  checkmate::assertFlag(permuted)
4284
-  
4285
-  permIndex = dplyr::if_else(permuted, "1", "0")
4286
-  
4287
-  if (type == "peaks") {
4288 4277
     
4289
-    if (permuted) {
4290
-      message = "Could not find permuted peak counts in GRN object. Peaks are not stored as permuted, set permuted = FALSE for type = \"peaks\""
4291
-      .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4292
-    }
4278
+    checkmate::assertClass(GRN, "GRN")
4279
+    GRN = .addFunctionLogToObject(GRN)     
4293 4280
     
4294
-    classPeaks = class(GRN@data$peaks$counts)
4295
-    if ("matrix" %in% classPeaks) {
4296
-      result = GRN@data$peaks$counts
4297
-    } else if ("dgCMatrix" %in% classPeaks) {
4298
-      result = .asMatrixFromSparse(GRN@data$peaks$counts, convertZero_to_NA = FALSE)
4299
-    } else {
4300
-      message = paste0("Unsupported class for GRN@data$peaks$counts. Contact the authors.")
4301
-      .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4302
-    }
4281
+    GRN = .makeObjectCompatible(GRN)
4303 4282
     
4283
+    checkmate::assertChoice(type, c("peaks", "rna"))
4284
+    checkmate::assertFlag(permuted)
4304 4285
     
4305
-  } else if (type == "rna") {
4286
+    permIndex = dplyr::if_else(permuted, "1", "0")
4306 4287
     
4307
-    classRNA = class(GRN@data$RNA$counts)
4308
-    if ("matrix" %in% classRNA) {
4309
-      result = GRN@data$RNA$counts
4310
-    } else if ("dgCMatrix" %in% classRNA) {
4311
-      result = .asMatrixFromSparse(GRN@data$RNA$counts, convertZero_to_NA = FALSE)
4312
-    } else {
4313
-      message = paste0("Unsupported class for GRN@data$RNA$counts. Contact the authors.")
4314
-      .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4288
+    if (type == "peaks") {
4289
+        
4290
+        if (permuted) {
4291
+            message = "Could not find permuted peak counts in GRN object. Peaks are not stored as permuted, set permuted = FALSE for type = \"peaks\""
4292
+            .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4293
+        }
4294
+        
4295
+        classPeaks = class(GRN@data$peaks$counts)
4296
+        if ("matrix" %in% classPeaks) {
4297
+            result = GRN@data$peaks$counts
4298
+        } else if ("dgCMatrix" %in% classPeaks) {
4299
+            result = .asMatrixFromSparse(GRN@data$peaks$counts, convertZero_to_NA = FALSE)
4300
+        } else {
4301
+            message = paste0("Unsupported class for GRN@data$peaks$counts. Contact the authors.")
4302
+            .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4303
+        }
4304
+        
4305
+        
4306
+    } else if (type == "rna") {
4307
+        
4308
+        classRNA = class(GRN@data$RNA$counts)
4309
+        if ("matrix" %in% classRNA) {
4310
+            result = GRN@data$RNA$counts
4311
+        } else if ("dgCMatrix" %in% classRNA) {
4312
+            result = .asMatrixFromSparse(GRN@data$RNA$counts, convertZero_to_NA = FALSE)
4313
+        } else {
4314
+            message = paste0("Unsupported class for GRN@data$RNA$counts. Contact the authors.")
4315
+            .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4316
+        }
4317
+        
4315 4318
     }
4316 4319
     
4317
-  }
4318
-  
4319
-  
4320
-  if (permuted) {
4321 4320
     
4322
-    if (type == "rna") {
4323
-      
4324
-      # Columns are shuffled so that non-matching samples are compared throughout the pipeline for all correlation-based analyses
4325
-      colnames(result) = colnames(result)[GRN@data$RNA$counts_permuted_index]
4326
-    }
4327
-  } 
4328
-  
4329
-  if (!includeFiltered) {
4321
+    if (permuted) {
4322
+        
4323
+        if (type == "rna") {
4324
+            
4325
+            # Columns are shuffled so that non-matching samples are compared throughout the pipeline for all correlation-based analyses
4326
+            colnames(result) = colnames(result)[GRN@data$RNA$counts_permuted_index]
4327
+        }
4328
+    } 
4330 4329
     
4331
-    if (type == "rna") {
4332
-      nonFiltered = which(! GRN@data$RNA$counts_metadata$isFiltered)
4333
-    } else {
4334
-      nonFiltered = which(! GRN@data$peaks$counts_metadata$isFiltered)
4330
+    if (!includeFiltered) {
4331
+        
4332
+        if (type == "rna") {
4333
+            nonFiltered = which(! GRN@data$RNA$counts_metadata$isFiltered)
4334
+        } else {
4335
+            nonFiltered = which(! GRN@data$peaks$counts_metadata$isFiltered)
4336
+        }
4337
+        
4338
+        result = result[nonFiltered,]
4335 4339
     }
4336 4340
     
4337
-    result = result[nonFiltered,]
4338
-  }
4339
-
4340
-  
4341
-  
4342
-  if (!asMatrix) {
4343
- 
4344
-    result.df =  result %>%
4345
-      as.data.frame()
4346 4341
     
4347
-    if (includeIDColumn)  {
4348
-      
4349
-      ID_column = dplyr::if_else(type == "rna", "ENSEMBL", "peakID")
4350
-      
4351
-      result.df = result.df %>%
4352
-        tibble::rownames_to_column(ID_column) 
4353
-    } 
4354 4342
     
4355
-    return(result.df)
4356
-
4357
-  }
4358
-  
4359
-  result
4343
+    if (!asMatrix) {
4344
+        
4345
+        result.df =  result %>%
4346
+            as.data.frame()
4347
+        
4348
+        if (includeIDColumn)  {
4349
+            
4350
+            ID_column = dplyr::if_else(type == "rna", "ENSEMBL", "peakID")
4351
+            
4352
+            result.df = result.df %>%
4353
+                tibble::rownames_to_column(ID_column) 
4354
+        } 
4355
+        
4356
+        return(result.df)
4357
+        
4358
+    }
4359
+    
4360
+    result
4360 4361
 }
4361 4362
 
4362 4363
 
... ...
@@ -4391,137 +4392,601 @@ getGRNConnections <- function(GRN, type = "all.filtered",  permuted = FALSE,
4391 4392
                               include_peakMetadata = FALSE,
4392 4393
                               include_geneMetadata = FALSE,
4393 4394
                               include_variancePartitionResults = FALSE) {
4394
-  
4395
-  checkmate::assertClass(GRN, "GRN")  
4396
-  GRN = .addFunctionLogToObject(GRN)
4397
-  
4398
-  GRN = .makeObjectCompatible(GRN)
4399
-   
4400
-  checkmate::assertChoice(type, c("TF_peaks", "peak_genes", "TF_genes", "all.filtered"))
4401
-  checkmate::assertFlag(permuted)
4402
-  #checkmate::assertIntegerish(permutation, lower = 0, upper = .getMaxPermutation(GRN))
4403
-  checkmate::assertFlag(include_TF_gene_correlations)
4404
-  checkmate::assertFlag(include_variancePartitionResults)
4405
-  checkmate::assertFlag(include_TFMetadata)
4406
-  checkmate::assertFlag(include_peakMetadata)
4407
-  checkmate::assertFlag(include_geneMetadata)
4408
-  
4409
-  permIndex = dplyr::if_else(permuted, "1", "0")
4410
-  
4411
-  if (type == "all.filtered") {
4412
-      
4413
-    merged.df = GRN@connections$all.filtered[[permIndex]]
4414
-
4415
-    if (include_TF_gene_correlations) {
4395
+    
4396
+    checkmate::assertClass(GRN, "GRN")  
4397
+    GRN = .addFunctionLogToObject(GRN)
4398
+    
4399
+    GRN = .makeObjectCompatible(GRN)
4400
+    
4401
+    checkmate::assertChoice(type, c("TF_peaks", "peak_genes", "TF_genes", "all.filtered"))
4402
+    checkmate::assertFlag(permuted)
4403
+    #checkmate::assertIntegerish(permutation, lower = 0, upper = .getMaxPermutation(GRN))
4404
+    checkmate::assertFlag(include_TF_gene_correlations)
4405
+    checkmate::assertFlag(include_variancePartitionResults)
4406
+    checkmate::assertFlag(include_TFMetadata)
4407
+    checkmate::assertFlag(include_peakMetadata)
4408
+    checkmate::assertFlag(include_geneMetadata)
4409
+    
4410
+    permIndex = dplyr::if_else(permuted, "1", "0")
4411
+    
4412
+    if (type == "all.filtered") {
4413
+        
4414
+        merged.df = GRN@connections$all.filtered[[permIndex]]
4415
+        
4416
+        if (include_TF_gene_correlations) {
4417
+            
4418
+            if (is.null(GRN@connections$TF_genes.filtered)) {
4419
+                message = "Please run the function add_TF_gene_correlation first. "
4420
+                .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4421
+            }
4422
+            
4423
+            # Merge with TF-gene table
4424
+            merged.df = merged.df %>%
4425
+                dplyr::left_join(GRN@connections$TF_genes.filtered[[permIndex]], 
4426
+                                 by = c("TF.name", "TF.ENSEMBL", "gene.ENSEMBL")) 
4427
+        }
4428
+        
4429
+        if (include_variancePartitionResults) {
4430
+            
4431
+            if (ncol(GRN@annotation$genes %>% dplyr::select(tidyselect::starts_with("variancePartition"))) == 0 |
4432
+                ncol(GRN@annotation$peaks %>% dplyr::select(tidyselect::starts_with("variancePartition"))) == 0) {
4433
+                message = "Please run the function add_featureVariation first. "
4434
+                .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4435
+            }
4436
+            
4437
+            merged.df = merged.df %>%
4438
+                dplyr::left_join(GRN@annotation$TFs %>% 
4439
+                                     dplyr::select(.data$TF.ENSEMBL, tidyselect::starts_with("TF.variancePartition")), 
4440
+                                 by = "TF.ENSEMBL")  %>%
4441
+                dplyr::left_join(GRN@annotation$genes %>% 
4442
+                                     dplyr::select(.data$gene.ENSEMBL, tidyselect::starts_with("gene.variancePartition")), 
4443
+                                 by = "gene.ENSEMBL")  %>%
4444
+                dplyr::left_join(GRN@annotation$peaks %>% 
4445
+                                     dplyr::select(.data$peak.ID, tidyselect::starts_with("peak.variancePartition")), 
4446
+                                 by = "peak.ID")
4447
+            
4448
+        }
4449
+        
4450
+        if (include_TFMetadata) {
4451
+            
4452
+            colsMissing = setdiff(colnames(GRN@annotation$TFs), colnames(merged.df))
4453
+            if (length(colsMissing) > 0) {
4454
+                merged.df = merged.df %>%
4455
+                    dplyr::left_join(GRN@annotation$TFs, by = c("TF.name", "TF.ENSEMBL"))
4456
+            }
4457
+        }
4458
+        
4459
+        if (include_peakMetadata) {
4460
+            
4461
+            colsMissing = setdiff(colnames(GRN@annotation$peaks), colnames(merged.df))
4462
+            if (length(colsMissing) > 0) {
4463
+                merged.df = merged.df %>%
4464
+                    dplyr::left_join(GRN@annotation$peaks, by = "peak.ID")
4465
+            }
4466
+        }
4467
+        
4468
+        if (include_geneMetadata) {
4469
+            
4470
+            colsMissing = setdiff(colnames(GRN@annotation$genes), colnames(merged.df))
4471
+            if (length(colsMissing) > 0) {
4472
+                merged.df = merged.df %>%
4473
+                    dplyr::left_join(GRN@annotation$genes, by = "gene.ENSEMBL")
4474
+            }
4475
+        }
4476
+        
4477
+        
4478
+        merged.df = merged.df %>%
4479
+            dplyr::select(tidyselect::starts_with("TF."), 
4480
+                          tidyselect::starts_with("peak."), 
4481
+                          tidyselect::starts_with("TF_peak."), 
4482
+                          tidyselect::starts_with("gene."), 
4483
+                          tidyselect::starts_with("peak_gene."), 
4484
+                          tidyselect::starts_with("TF_gene."), 
4485
+                          tidyselect::everything()) %>%
4486
+            tibble::as_tibble()
4487
+        
4488
+        return(merged.df)
4489
+        
4490
+    } else if (type == "TF_peaks") {
4491
+        
4492
+        return(tibble::as_tibble(GRN@connections$TF_peaks[[permIndex]]$main))
4493
+        
4494
+    } else if (type == "peak_genes") {
4495
+        
4496
+        return(tibble::as_tibble(GRN@connections$peak_genes[[permIndex]]))
4497
+        
4498
+    } else if (type == "TF_genes") {
4416 4499
         
4417 4500
         if (is.null(GRN@connections$TF_genes.filtered)) {
4418 4501
             message = "Please run the function add_TF_gene_correlation first. "
4419 4502
             .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4420 4503
         }
4421 4504
         
4422
-        # Merge with TF-gene table
4423
-        merged.df = merged.df %>%
4424
-            dplyr::left_join(GRN@connections$TF_genes.filtered[[permIndex]], 
4425
-                             by = c("TF.name", "TF.ENSEMBL", "gene.ENSEMBL")) 
4426
-    }
4505
+        return(tibble::as_tibble(GRN@connections$TF_genes.filtered[[permIndex]]))
4506
+        
4507
+    } 
4508
+    
4509
+}
4510
+
4511
+
4512
+#' Get counts for the various data defined in a \code{\linkS4class{GRN}} object
4513
+#' 
4514
+#' Get counts for the various data defined in a \code{\linkS4class{GRN}} object.
4515
+#' \strong{Note: This function, as all \code{get} functions from this package, does NOT return a \code{\linkS4class{GRN}} object.}
4516
+#' 
4517
+#' @template GRN 
4518
+#' @param type Character. Either \code{peaks} or \code{rna}. \code{peaks} corresponds to the counts for the open chromatin data, while \code{rna} refers to th RNA-seq counts. If set to \code{rna}, both permuted and non-permuted data can be retrieved, while for \code{peaks}, only the non-permuted one (i.e., 0) can be retrieved.
4519
+#' @param asMatrix Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. If set to \code{FALSE}, counts are returned as a data frame with or without an ID column (see \code{includeIDColumn}). If set to \code{TRUE}, counts are returned as a matrix with the ID column as row names.
4520
+#' @param includeIDColumn Logical. \code{TRUE} or \code{FALSE}. Default \code{TRUE}. Only relevant if \code{asMatrix = FALSE}. If set to \code{TRUE}, an explicit ID column is returned (no row names). If set to \code{FALSE}, the IDs are in the row names instead.
4521
+#' @param includeFiltered  Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. If set to \code{FALSE}, genes or peaks marked as filtered (after running the function \code{filterData}) will not be returned. If set to \code{TRUE}, all elements are returned regardless of the currently active filter status.
4522
+#' @template permuted
4523
+#' @export
4524
+#' @import tibble
4525
+#' @examples 
4526
+#' # See the Workflow vignette on the GRaNIE website for examples
4527
+#' GRN = loadExampleObject()
4528
+#' counts.df = getCounts(GRN, type = "peaks", permuted = FALSE)
4529
+#' @return Data frame of counts, with the type as indicated by the function parameters. This function does **NOT** return a \code{\linkS4class{GRN}} object.
4530
+getCounts <- function(GRN, type,  permuted = FALSE, asMatrix = FALSE, includeIDColumn = TRUE, includeFiltered = FALSE) {
4531
+    
4532
+    checkmate::assertClass(GRN, "GRN")
4533
+    GRN = .addFunctionLogToObject(GRN)     
4534
+    
4535
+    GRN = .makeObjectCompatible(GRN)
4536
+    
4537
+    checkmate::assertChoice(type, c("peaks", "rna"))
4538
+    checkmate::assertFlag(permuted)
4427 4539
     
4428
-    if (include_variancePartitionResults) {
4540
+    permIndex = dplyr::if_else(permuted, "1", "0")
4541
+    
4542
+    if (type == "peaks") {
4429 4543
         
4430
-        if (ncol(GRN@annotation$genes %>% dplyr::select(tidyselect::starts_with("variancePartition"))) == 0 |
4431
-            ncol(GRN@annotation$peaks %>% dplyr::select(tidyselect::starts_with("variancePartition"))) == 0) {
4432
-            message = "Please run the function add_featureVariation first. "
4544
+        if (permuted) {
4545
+            message = "Could not find permuted peak counts in GRN object. Peaks are not stored as permuted, set permuted = FALSE for type = \"peaks\""
4433 4546
             .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4434 4547
         }
4435 4548
         
4436
-        merged.df = merged.df %>%
4437
-            dplyr::left_join(GRN@annotation$TFs %>% 
4438
-                                 dplyr::select(.data$TF.ENSEMBL, tidyselect::starts_with("TF.variancePartition")), 
4439
-                             by = "TF.ENSEMBL")  %>%
4440
-            dplyr::left_join(GRN@annotation$genes %>% 
4441
-                                 dplyr::select(.data$gene.ENSEMBL, tidyselect::starts_with("gene.variancePartition")), 
4442
-                             by = "gene.ENSEMBL")  %>%
4443
-            dplyr::left_join(GRN@annotation$peaks %>% 
4444
-                                 dplyr::select(.data$peak.ID, tidyselect::starts_with("peak.variancePartition")), 
4445
-                             by = "peak.ID")
4549
+        classPeaks = class(GRN@data$peaks$counts)
4550
+        if ("matrix" %in% classPeaks) {
4551
+            result = GRN@data$peaks$counts
4552
+        } else if ("dgCMatrix" %in% classPeaks) {
4553
+            result = .asMatrixFromSparse(GRN@data$peaks$counts, convertZero_to_NA = FALSE)
4554
+        } else {
4555
+            message = paste0("Unsupported class for GRN@data$peaks$counts. Contact the authors.")
4556
+            .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4557
+        }
4446 4558
         
4447
-    }
4448
-    
4449
-    if (include_TFMetadata) {
4450 4559
         
4451
-        colsMissing = setdiff(colnames(GRN@annotation$TFs), colnames(merged.df))
4452
-        if (length(colsMissing) > 0) {
4453
-            merged.df = merged.df %>%
4454
-                dplyr::left_join(GRN@annotation$TFs, by = c("TF.name", "TF.ENSEMBL"))
4560
+    } else if (type == "rna") {
4561
+        
4562
+        classRNA = class(GRN@data$RNA$counts)
4563
+        if ("matrix" %in% classRNA) {
4564
+            result = GRN@data$RNA$counts
4565
+        } else if ("dgCMatrix" %in% classRNA) {
4566
+            result = .asMatrixFromSparse(GRN@data$RNA$counts, convertZero_to_NA = FALSE)
4567
+        } else {
4568
+            message = paste0("Unsupported class for GRN@data$RNA$counts. Contact the authors.")
4569
+            .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4455 4570
         }
4571
+        
4456 4572
     }
4457 4573
     
4458
-    if (include_peakMetadata) {
4574
+    
4575
+    if (permuted) {
4459 4576
         
4460
-        colsMissing = setdiff(colnames(GRN@annotation$peaks), colnames(merged.df))
4461
-        if (length(colsMissing) > 0) {
4462
-            merged.df = merged.df %>%
4463
-                dplyr::left_join(GRN@annotation$peaks, by = "peak.ID")
4577
+        if (type == "rna") {
4578
+            
4579
+            # Columns are shuffled so that non-matching samples are compared throughout the pipeline for all correlation-based analyses
4580
+            colnames(result) = colnames(result)[GRN@data$RNA$counts_permuted_index]
4464 4581
         }
4465
-    }
4582
+    } 
4466 4583
     
4467
-    if (include_geneMetadata) {
4584
+    if (!includeFiltered) {
4468 4585
         
4469
-        colsMissing = setdiff(colnames(GRN@annotation$genes), colnames(merged.df))
4470
-        if (length(colsMissing) > 0) {
4471
-            merged.df = merged.df %>%
4472
-                dplyr::left_join(GRN@annotation$genes, by = "gene.ENSEMBL")
4586
+        if (type == "rna") {
4587
+            nonFiltered = which(! GRN@data$RNA$counts_metadata$isFiltered)
4588
+        } else {
4589
+            nonFiltered = which(! GRN@data$peaks$counts_metadata$isFiltered)
4473 4590
         }
4591
+        
4592
+        result = result[nonFiltered,]
4474 4593
     }
4475
-
4476 4594
     
4477
-    merged.df = merged.df %>%
4478
-        dplyr::select(tidyselect::starts_with("TF."), 
4479
-                      tidyselect::starts_with("peak."), 
4480
-                      tidyselect::starts_with("TF_peak."), 
4481
-                      tidyselect::starts_with("gene."), 
4482
-                      tidyselect::starts_with("peak_gene."), 
4483
-                      tidyselect::starts_with("TF_gene."), 
4484
-                      tidyselect::everything()) %>%
4485
-        tibble::as_tibble()
4486
-
4487
-    return(merged.df)
4488
-    
4489
-  } else if (type == "TF_peaks") {
4490
-    
4491
-    return(tibble::as_tibble(GRN@connections$TF_peaks[[permIndex]]$main))
4492
-    
4493
-  } else if (type == "peak_genes") {
4494 4595
     
4495
-    return(tibble::as_tibble(GRN@connections$peak_genes[[permIndex]]))
4496 4596
     
4497
-  } else if (type == "TF_genes") {
4498
-      
4499
-    if (is.null(GRN@connections$TF_genes.filtered)) {
4500
-      message = "Please run the function add_TF_gene_correlation first. "
4501
-      .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4597
+    if (!asMatrix) {
4598
+        
4599
+        result.df =  result %>%
4600
+            as.data.frame()
4601
+        
4602
+        if (includeIDColumn)  {
4603
+            
4604
+            ID_column = dplyr::if_else(type == "rna", "ENSEMBL", "peakID")
4605
+            
4606
+            result.df = result.df %>%
4607
+                tibble::rownames_to_column(ID_column) 
4608
+        } 
4609
+        
4610
+        return(result.df)
4611
+        
4502 4612
     }
4503
-      
4504
-    return(tibble::as_tibble(GRN@connections$TF_genes.filtered[[permIndex]]))
4505
-      
4506
-  } 
4507
-  
4508
-}
4509
-
4510
-.getAll_TF_peak_connectionTypes <- function (GRN) {
4511
-  
4512
-  TF_peak.connectionTypesAll = unique(as.character(GRN@connections$TF_peaks[["0"]]$main$TF_peak.connectionType))
4513
-  if (length(TF_peak.connectionTypesAll) > 1) {
4514
-    # Dont use all, always separate between the different connection types
4515
-    # TF_peak.connectionTypesAll = c(TF_peak.connectionTypesAll, "all")
4516
-  }
4517
-  TF_peak.connectionTypesAll 
4613
+    
4614
+    result
4518 4615
 }
4519 4616
 
4520 4617
 
4521
-.checkOutputFolder <- function (GRN, outputFolder) {
4522
-  
4523
-  if (!is.null(outputFolder)) {
4524
-    
4618
+#' Extract connections or links from a \code{\linkS4class{GRN}} object as da data frame.
4619
+#' 
4620
+#' Returns stored connections/links (either TF-peak, peak-genes or the filtered set of connections as produced by \code{\link{filterGRNAndConnectGenes}}). 
4621
+#' \strong{Note: This function, as all \code{get} functions from this package, does NOT return a \code{\linkS4class{GRN}} object.}
4622
+#' 
4623
+#' @export
4624
+#' @template GRN 
4625
+#' @template permuted
4626
+#' @param type Character. One of \code{TF_peaks}, \code{peak_genes}, \code{TF_genes} or \code{all.filtered}. Default \code{all.filtered}. The type of connections to retrieve.
4627
+#' @param include_TF_gene_correlations Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should TFs and gene correlations be returned as well? If set to \code{TRUE}, they must have been computed beforehand with \code{\link{add_TF_gene_correlation}}. Only relevant for type = "all.filtered"
4628
+#' @param include_TFMetadata Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should TF metadata be returned as well? Only relevant for type = "all.filtered"
4629
+#' @param include_peakMetadata Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should peak metadata be returned as well?  Only relevant for type = "all.filtered"
4630
+#' @param include_geneMetadata Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should gene metadata be returned as well?  Only relevant for type = "all.filtered"
4631
+#' @param include_variancePartitionResults Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. 
4632
+#' Should the results from the function \code{\link{add_featureVariation}} be included? 
4633
+#' If set to \code{TRUE}, they must have been computed beforehand with \code{\link{add_featureVariation}}; otherwise, an error is thrown.
4634
+#' Only relevant for type = "all.filtered"
4635
+#' @return A data frame with the requested connections. This function does **NOT** return a \code{\linkS4class{GRN}} object.
4636
+#' @seealso \code{\link{filterGRNAndConnectGenes}}
4637
+#' @seealso \code{\link{add_featureVariation}}
4638
+#' @seealso \code{\link{add_TF_gene_correlation}}
4639
+#' @examples 
4640
+#' # See the Workflow vignette on the GRaNIE website for examples
4641
+#' GRN = loadExampleObject()
4642
+#' GRN_con.all.df = getGRNConnections(GRN)
4643
+getGRNConnections <- function(GRN, type = "all.filtered",  permuted = FALSE, 
4644
+                              include_TF_gene_correlations = FALSE, 
4645
+                              include_TFMetadata = FALSE,
4646
+                              include_peakMetadata = FALSE,
4647
+                              include_geneMetadata = FALSE,
4648
+                              include_variancePartitionResults = FALSE) {
4649
+    
4650
+    checkmate::assertClass(GRN, "GRN")  
4651
+    GRN = .addFunctionLogToObject(GRN)
4652
+    
4653
+    GRN = .makeObjectCompatible(GRN)
4654
+    
4655
+    checkmate::assertChoice(type, c("TF_peaks", "peak_genes", "TF_genes", "all.filtered"))
4656
+    checkmate::assertFlag(permuted)
4657
+    #checkmate::assertIntegerish(permutation, lower = 0, upper = .getMaxPermutation(GRN))
4658
+    checkmate::assertFlag(include_TF_gene_correlations)
4659
+    checkmate::assertFlag(include_variancePartitionResults)
4660
+    checkmate::assertFlag(include_TFMetadata)
4661
+    checkmate::assertFlag(include_peakMetadata)
4662
+    checkmate::assertFlag(include_geneMetadata)
4663
+    
4664
+    permIndex = dplyr::if_else(permuted, "1", "0")
4665
+    
4666
+    if (type == "all.filtered") {
4667
+        
4668
+        merged.df = GRN@connections$all.filtered[[permIndex]]
4669
+        
4670
+        if (include_TF_gene_correlations) {
4671
+            
4672
+            if (is.null(GRN@connections$TF_genes.filtered)) {
4673
+                message = "Please run the function add_TF_gene_correlation first. "
4674
+                .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4675
+            }
4676
+            
4677
+            # Merge with TF-gene table
4678
+            merged.df = merged.df %>%
4679
+                dplyr::left_join(GRN@connections$TF_genes.filtered[[permIndex]], 
4680
+                                 by = c("TF.name", "TF.ENSEMBL", "gene.ENSEMBL")) 
4681
+        }
4682
+        
4683
+        if (include_variancePartitionResults) {
4684
+            
4685
+            if (ncol(GRN@annotation$genes %>% dplyr::select(tidyselect::starts_with("variancePartition"))) == 0 |
4686
+                ncol(GRN@annotation$peaks %>% dplyr::select(tidyselect::starts_with("variancePartition"))) == 0) {
4687
+                message = "Please run the function add_featureVariation first. "
4688
+                .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4689
+            }
4690
+            
4691
+            merged.df = merged.df %>%
4692
+                dplyr::left_join(GRN@annotation$TFs %>% 
4693
+                                     dplyr::select(.data$TF.ENSEMBL, tidyselect::starts_with("TF.variancePartition")), 
4694
+                                 by = "TF.ENSEMBL")  %>%
4695
+                dplyr::left_join(GRN@annotation$genes %>% 
4696
+                                     dplyr::select(.data$gene.ENSEMBL, tidyselect::starts_with("gene.variancePartition")), 
4697
+                                 by = "gene.ENSEMBL")  %>%
4698
+                dplyr::left_join(GRN@annotation$peaks %>% 
4699
+                                     dplyr::select(.data$peak.ID, tidyselect::starts_with("peak.variancePartition")), 
4700
+                                 by = "peak.ID")
4701
+            
4702
+        }
4703
+        
4704
+        if (include_TFMetadata) {
4705
+            
4706
+            colsMissing = setdiff(colnames(GRN@annotation$TFs), colnames(merged.df))
4707
+            if (length(colsMissing) > 0) {
4708
+                merged.df = merged.df %>%
4709
+                    dplyr::left_join(GRN@annotation$TFs, by = c("TF.name", "TF.ENSEMBL"))
4710
+            }
4711
+        }
4712
+        
4713
+        if (include_peakMetadata) {
4714
+            
4715
+            colsMissing = setdiff(colnames(GRN@annotation$peaks), colnames(merged.df))
4716
+            if (length(colsMissing) > 0) {
4717
+                merged.df = merged.df %>%
4718
+                    dplyr::left_join(GRN@annotation$peaks, by = "peak.ID")
4719
+            }
4720
+        }
4721
+        
4722
+        if (include_geneMetadata) {
4723
+            
4724
+            colsMissing = setdiff(colnames(GRN@annotation$genes), colnames(merged.df))
4725
+            if (length(colsMissing) > 0) {
4726
+                merged.df = merged.df %>%
4727
+                    dplyr::left_join(GRN@annotation$genes, by = "gene.ENSEMBL")
4728
+            }
4729
+        }
4730
+        
4731
+        
4732
+        merged.df = merged.df %>%
4733
+            dplyr::select(tidyselect::starts_with("TF."), 
4734
+                          tidyselect::starts_with("peak."), 
4735
+                          tidyselect::starts_with("TF_peak."), 
4736
+                          tidyselect::starts_with("gene."), 
4737
+                          tidyselect::starts_with("peak_gene."), 
4738
+                          tidyselect::starts_with("TF_gene."), 
4739
+                          tidyselect::everything()) %>%
4740
+            tibble::as_tibble()
4741
+        
4742
+        return(merged.df)
4743
+        
4744
+    } else if (type == "TF_peaks") {
4745
+        
4746
+        return(tibble::as_tibble(GRN@connections$TF_peaks[[permIndex]]$main))
4747
+        
4748
+    } else if (type == "peak_genes") {
4749
+        
4750
+        return(tibble::as_tibble(GRN@connections$peak_genes[[permIndex]]))
4751
+        
4752
+    } else if (type == "TF_genes") {
4753
+        
4754
+        if (is.null(GRN@connections$TF_genes.filtered)) {
4755
+            message = "Please run the function add_TF_gene_correlation first. "
4756
+            .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4757
+        }
4758
+        
4759
+        return(tibble::as_tibble(GRN@connections$TF_genes.filtered[[permIndex]]))
4760
+        
4761
+    } 
4762
+    
4763
+}
4764
+
4765
+
4766
+
4767
+#' Retrieve parameters for previously used function calls and general parameters for a \code{\linkS4class{GRN}} object. 
4768
+#' 
4769
+#' \strong{Note: This function, as all \code{get} functions from this package, does NOT return a \code{\linkS4class{GRN}} object.}
4770
+#' 
4771
+#' @export
4772
+#' @template GRN 
4773
+#' @param name Character. Default \code{all}. Name of parameter or function name to retrieve. Set to the special keyword \code{all} to retrieve all parameters.
4774
+#' @param type Character. Either \code{function} or \code{parameter}. Default \code{parameter}. When set to \code{function}, a valid \code{GRaNIE} function name must be given that has been run before. When set to \code{parameter}, in combination with \code{name}, returns a specific parameter (as specified in \code{GRN@config})).
4775
+#' @return The requested parameters. This function does **NOT** return a \code{\linkS4class{GRN}} object.
4776
+#' @examples 
4777
+#' # See the Workflow vignette on the GRaNIE website for examples
4778
+#' GRN = loadExampleObject()
4779
+#' params.l = getParameters(GRN, type = "parameter", name = "all")
4780
+getParameters <- function (GRN, type = "parameter", name = "all") {
4781
+    
4782
+    checkmate::assertClass(GRN, "GRN")
4783
+    GRN = .addFunctionLogToObject(GRN)
4784
+    
4785
+    GRN = .makeObjectCompatible(GRN)
4786
+    
4787
+    checkmate::assertChoice(type, c("function", "parameter"))
4788
+    
4789
+    if (type == "function") {
4790
+        
4791
+        checkmate::assertCharacter(name, any.missing = FALSE, len = 1)
4792
+        functionParameters = GRN@config$functionParameters[[name]]
4793
+        if (is.null(functionParameters)) {
4794
+            checkmate::assertChoice(name, ls(paste0("package:", utils::packageName())))
4795
+        } 
4796
+        
4797
+        return(functionParameters)
4798
+        
4799
+    } else {
4800
+        
4801
+        if (name == "all") {
4802
+            return(GRN@config)
4803
+        } else {
4804
+            parameters = GRN@config[[name]]
4805
+            if (is.null(parameters)) {
4806
+                checkmate::assertChoice(name, names(GRN@config$parameters))
4807
+            } 
4808
+            
4809
+            return(parameters)
4810
+        }
4811
+        
4812
+    }
4813
+    
4814
+}
4815
+
4816
+
4817
+
4818
+#' Optional convenience function to delete intermediate data from the function \code{\link{AR_classification_wrapper}} and summary statistics that may occupy a lot of space
4819
+#' @export
4820
+#' @template GRN
4821
+#' @return An updated \code{\linkS4class{GRN}} object, with some slots being deleted (\code{GRN@data$TFs$classification} as well as \code{GRN@stats$connectionDetails.l})
4822
+#' @examples 
4823
+#' # See the Workflow vignette on the GRaNIE website for examples
4824
+#' GRN = loadExampleObject()
4825
+#' GRN = deleteIntermediateData(GRN)
4826
+deleteIntermediateData <- function(GRN) {
4827
+    
4828
+    checkmate::assertClass(GRN, "GRN")
4829
+    GRN = .addFunctionLogToObject(GRN)
4830
+    
4831
+    GRN = .makeObjectCompatible(GRN)
4832
+    
4833
+    for (permutationCur in 0:.getMaxPermutation(GRN)) {
4834
+        
4835
+        permIndex = as.character(permutationCur)
4836
+        GRN@data$TFs$classification[[permIndex]]$TF_cor_median_foreground = NULL
4837
+        GRN@data$TFs$classification[[permIndex]]$TF_cor_median_background = NULL
4838
+        GRN@data$TFs$classification[[permIndex]]$TF_peak_cor_foreground = NULL
4839
+        GRN@data$TFs$classification[[permIndex]]$TF_peak_cor_background = NULL
4840
+        GRN@data$TFs$classification[[permIndex]]$act.rep.thres.l = NULL
4841
+    }
4842
+    
4843
+    GRN@stats$connectionDetails.l = NULL
4844
+    
4845
+    GRN
4846
+    
4847
+    
4848
+}
4849
+
4850
+#' Change the output directory of a GRN object
4851
+#' 
4852
+#' @export
4853
+#' @template GRN
4854
+#' @param outputDirectory Character. Default \code{.}. New output directory for all output files unless overwritten via the parameter \code{outputFolder}.
4855
+#' @return An updated \code{\linkS4class{GRN}} object, with the output directory being adjusted accordingly
4856
+#' @examples 
4857
+#' GRN = loadExampleObject()
4858
+#' GRN = changeOutputDirectory(GRN, outputDirectory = ".")
4859
+changeOutputDirectory <- function(GRN, outputDirectory = ".") {
4860
+    
4861
+    start = Sys.time()  
4862
+    
4863
+    checkmate::assertClass(GRN, "GRN")
4864
+    GRN = .addFunctionLogToObject(GRN)
4865
+    
4866
+    GRN = .makeObjectCompatible(GRN)
4867
+    
4868
+    checkmate::assertCharacter(outputDirectory, len = 1, min.chars = 1)
4869
+    
4870
+    GRN@config$directories$outputRoot   = outputDirectory
4871
+    GRN@config$directories$output_plots = paste0(outputDirectory, .Platform$file.sep, "plots", .Platform$file.sep)
4872
+    GRN@config$files$output_log         = paste0(outputDirectory, .Platform$file.sep, "GRN.log")
4873
+    
4874
+    futile.logger::flog.info(paste0("Output directory changed in the object to " , outputDirectory))
4875
+    
4876
+    
4877
+    GRN
4878
+    
4879
+}
4880
+
4881
+####### Internal functions #########
4882
+
4883
+
4884
+
4885
+# Converts from an older GRN object format to the most current one due to internal optimizations
4886
+.makeObjectCompatible <- function(GRN) {
4887
+    
4888
+    if (is.null(GRN@annotation$TFs) & !is.null(GRN@data$TFs$translationTable)) {
4889
+        GRN@annotation$TFs = GRN@data$TFs$translationTable
4890
+        GRN@data$TFs$translationTable = NULL
4891
+    }
4892
+    
4893
+    if (!is.null(GRN@annotation$TFs)) {
4894
+        
4895
+        if (!"TF.ENSEMBL" %in% colnames(GRN@annotation$TFs)) {
4896
+            GRN@annotation$TFs = dplyr::rename(GRN@annotation$TFs, TF.ENSEMBL = .data$ENSEMBL)
4897
+        }
4898
+        if (!"TF.HOCOID" %in% colnames(GRN@annotation$TFs)) {
4899
+            GRN@annotation$TFs = dplyr::rename(GRN@annotation$TFs, TF.HOCOID = .data$HOCOID)
4900
+        }
4901
+        
4902
+        if ("ENSEMBL" %in% colnames(GRN@annotation$TFs)) {
4903
+            GRN@annotation$TFs = dplyr::select(GRN@annotation$TFs, -.data$ENSEMBL)
4904
+        }
4905
+    }
4906
+    
4907
+    
4908
+    # Temporary fix: Replace lincRNA with lncRNA due to a recent change in biomaRt until we update the object directly
4909
+    if ("lncRNA" %in% levels(GRN@annotation$genes$gene.type)) {
4910
+        GRN@annotation$genes = dplyr::mutate(GRN@annotation$genes, gene.type = dplyr::recode_factor(.data$gene.type, lncRNA = "lincRNA")) 
4911
+    }
4912
+    
4913
+    if (is.null(GRN@annotation$peaks) & !is.null(GRN@annotation$consensusPeaks)) {
4914
+        GRN@annotation[["peaks"]] = GRN@annotation[["consensusPeaks"]]
4915
+        GRN@annotation[["consensusPeaks"]] = NULL
4916
+        GRN@annotation[["peaks_obj"]] = GRN@annotation[["consensusPeaks_obj"]]
4917
+        GRN@annotation[["consensusPeaks_obj"]] = NULL
4918
+        
4919
+    }
4920
+    
4921
+    # Renamed count slots and their structure
4922
+    # 1. peaks
4923
+    if (!is.null(GRN@data$peaks[["counts_orig"]])) {
4924
+        GRN@data$peaks[["counts_orig"]] = NULL
4925
+    }
4926
+    if (is.null(GRN@data$peaks[["counts"]])) {
4927
+        GRN@data$peaks[["counts"]] = .storeAsMatrixOrSparseMatrix(GRN, df = GRN@data$peaks$counts_norm %>% dplyr::select(-.data$isFiltered), 
4928
+                                                                  ID_column = "peakID", slotName = "GRN@data$peaks$counts")
4929
+    }
4930
+    if (!is.null(GRN@data$peaks[["counts_norm"]])) {
4931
+        GRN@data$peaks[["counts_norm"]] = NULL
4932
+    }
4933
+    
4934
+    if (is.null(GRN@data$peaks[["counts_metadata"]])) {
4935
+        GRN@data$peaks[["counts_metadata"]] = .createConsensusPeaksDF(rownames(GRN@data$peaks[["counts"]])) 
4936
+        stopifnot(c("chr", "start", "end", "peakID", "isFiltered") %in% colnames(GRN@data$peaks$counts_metadata))
4937
+    }
4938
+    if (!is.null(GRN@data$peaks[["consensusPeaks"]])) {
4939
+        GRN@data$peaks[["consensusPeaks"]] = NULL
4940
+    }
4941
+    
4942
+    # 2. RNA
4943
+    if (!is.null(GRN@data$RNA[["counts_orig"]])) {
4944
+        GRN@data$RNA[["counts_orig"]] = NULL
4945
+    }
4946
+    if (is.null(GRN@data$RNA[["counts"]]) & !is.null(GRN@data$RNA$counts_norm.l[["0"]])) {
4947
+        GRN@data$RNA[["counts"]] = .storeAsMatrixOrSparseMatrix(GRN, df = GRN@data$RNA$counts_norm.l[["0"]] %>% dplyr::select(-.data$isFiltered), 
4948
+                                                                ID_column = "ENSEMBL", slotName = "GRN@data$RNA$counts")
4949
+    }
4950
+    if (!is.null(GRN@data$RNA[["counts_norm.l"]])) {
4951
+        GRN@data$RNA[["counts_norm.l"]] = NULL
4952
+    }
4953
+    if (is.null(GRN@data$RNA[["counts_metadata"]]) & !is.null(GRN@data$RNA$counts)) {
4954
+        GRN@data$RNA[["counts_metadata"]] = tibble::tibble(ID = rownames(GRN@data$RNA$counts), isFiltered = FALSE)
4955
+    }
4956
+    
4957
+    if (is.null(GRN@data$RNA[["counts_permuted_index"]]) & !is.null(GRN@data$RNA$counts)) {
4958
+        GRN@data$RNA[["counts_permuted_index"]] = sample.int(ncol(GRN@data$RNA$counts), ncol(GRN@data$RNA$counts))
4959
+    }
4960
+    
4961
+    
4962
+    GRN
4963
+}
4964
+
4965
+
4966
+.checkExistanceFilteredConnections <- function(GRN) {
4967
+    
4968
+    if (is.null(GRN@connections$all.filtered[["0"]])) {
4969
+        message = "Could not find filtered connections (the slot GRN@connections$all.filtered[[\"0\"]] is NULL). Run the function filterGRNAndConnectGenes."
4970
+        .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
4971
+    }
4972
+}
4973
+
4974
+
4975
+.getAll_TF_peak_connectionTypes <- function (GRN) {
4976
+  
4977
+  TF_peak.connectionTypesAll = unique(as.character(GRN@connections$TF_peaks[["0"]]$main$TF_peak.connectionType))
4978
+  if (length(TF_peak.connectionTypesAll) > 1) {
4979
+    # Dont use all, always separate between the different connection types
4980
+    # TF_peak.connectionTypesAll = c(TF_peak.connectionTypesAll, "all")
4981
+  }
4982
+  TF_peak.connectionTypesAll 
4983
+}
4984
+
4985
+
4986
+.checkOutputFolder <- function (GRN, outputFolder) {
4987
+  
4988
+  if (!is.null(outputFolder)) {
4989
+    
4525 4990
     checkmate::assertDirectory(outputFolder, access = "w")
4526 4991
     
4527 4992
     if (!endsWith(outputFolder, .Platform$file.sep)) {
... ...
@@ -4642,56 +5107,6 @@ getGRNConnections <- function(GRN, type = "all.filtered",  permuted = FALSE,
4642 5107
   GRN
4643 5108
 }
4644 5109
 
4645
-
4646
-#' Retrieve parameters for previously used function calls and general parameters for a \code{\linkS4class{GRN}} object. 
4647
-#' 
4648
-#' \strong{Note: This function, as all \code{get} functions from this package, does NOT return a \code{\linkS4class{GRN}} object.}
4649
-#' 
4650
-#' @export
4651
-#' @template GRN 
4652
-#' @param name Character. Default \code{all}. Name of parameter or function name to retrieve. Set to the special keyword \code{all} to retrieve all parameters.
4653
-#' @param type Character. Either \code{function} or \code{parameter}. Default \code{parameter}. When set to \code{function}, a valid \code{GRaNIE} function name must be given that has been run before. When set to \code{parameter}, in combination with \code{name}, returns a specific parameter (as specified in \code{GRN@config})).
4654
-#' @return The requested parameters. This function does **NOT** return a \code{\linkS4class{GRN}} object.
4655
-#' @examples 
4656
-#' # See the Workflow vignette on the GRaNIE website for examples
4657
-#' GRN = loadExampleObject()
4658
-#' params.l = getParameters(GRN, type = "parameter", name = "all")
4659
-getParameters <- function (GRN, type = "parameter", name = "all") {
4660
-  
4661
-  checkmate::assertClass(GRN, "GRN")
4662
-  GRN = .addFunctionLogToObject(GRN)
4663
-    
4664
-  GRN = .makeObjectCompatible(GRN)
4665
-    
4666
-  checkmate::assertChoice(type, c("function", "parameter"))
4667
-  
4668
-  if (type == "function") {
4669
-    
4670
-    checkmate::assertCharacter(name, any.missing = FALSE, len = 1)
4671
-    functionParameters = GRN@config$functionParameters[[name]]
4672
-    if (is.null(functionParameters)) {
4673
-      checkmate::assertChoice(name, ls(paste0("package:", utils::packageName())))
4674
-    } 
4675
-    
4676
-    return(functionParameters)
4677
-    
4678
-  } else {
4679
-    
4680
-    if (name == "all") {
4681
-      return(GRN@config)
4682
-    } else {
4683
-      parameters = GRN@config[[name]]
4684
-      if (is.null(parameters)) {
4685
-        checkmate::assertChoice(name, names(GRN@config$parameters))
4686
-      } 
4687
-      
4688
-      return(parameters)
4689
-    }
4690
-    
4691
-  }
4692
-  
4693
-}
4694
-
4695 5110
 .getMaxPermutation <- function (GRN) {
4696 5111
   
4697 5112
   if (!is.null(GRN@config$parameters$internal$nPermutations)) {
... ...
@@ -4704,37 +5119,6 @@ getParameters <- function (GRN, type = "parameter", name = "all") {
4704 5119
 }
4705 5120
 
4706 5121
 
4707
-#' Optional convenience function to delete intermediate data from the function \code{\link{AR_classification_wrapper}} and summary statistics that may occupy a lot of space
4708
-#' @export
4709
-#' @template GRN
4710
-#' @return An updated \code{\linkS4class{GRN}} object, with some slots being deleted (\code{GRN@data$TFs$classification} as well as \code{GRN@stats$connectionDetails.l})
4711
-#' @examples 
4712
-#' # See the Workflow vignette on the GRaNIE website for examples
4713
-#' GRN = loadExampleObject()
4714
-#' GRN = deleteIntermediateData(GRN)
4715
-deleteIntermediateData <- function(GRN) {
4716
-  
4717
-  checkmate::assertClass(GRN, "GRN")
4718
-  GRN = .addFunctionLogToObject(GRN)
4719
-  
4720
-  GRN = .makeObjectCompatible(GRN)
4721
-  
4722
-  for (permutationCur in 0:.getMaxPermutation(GRN)) {
4723
-    
4724
-    permIndex = as.character(permutationCur)
4725
-    GRN@data$TFs$classification[[permIndex]]$TF_cor_median_foreground = NULL
4726
-    GRN@data$TFs$classification[[permIndex]]$TF_cor_median_background = NULL
4727
-    GRN@data$TFs$classification[[permIndex]]$TF_peak_cor_foreground = NULL
4728
-    GRN@data$TFs$classification[[permIndex]]$TF_peak_cor_background = NULL
4729
-    GRN@data$TFs$classification[[permIndex]]$act.rep.thres.l = NULL
4730
-  }
4731
-  
4732
-  GRN@stats$connectionDetails.l = NULL
4733
-  
4734
-  GRN
4735
-  
4736
-  
4737
-}
4738 5122
 
4739 5123
 .checkForbiddenNames <- function(name, forbiddenNames) {
4740 5124
   
... ...
@@ -4745,74 +5129,6 @@ deleteIntermediateData <- function(GRN) {
4745 5129
 }
4746 5130
 
4747 5131
 
4748
-getBasic_metadata_visualization <- function(GRN, forceRerun = FALSE) {
4749
-  
4750
-  checkmate::assertClass(GRN, "GRN")
4751
-  GRN = .addFunctionLogToObject(GRN) 
4752
-  checkmate::assertFlag(forceRerun)
4753
-  
4754
-  # Get base mean expression for genes and TFs and mean accessibility from peaks
4755
-  
4756
-  
4757
-  # TODO: Do we need this for the shuffled one?
4758
-  
4759
-  if (is.null(GRN@visualization$metadata) | forceRerun) {
4760
-    
4761
-    expMeans.m = getCounts(GRN, type = "rna", permuted = FALSE, asMatrix = TRUE)
4762
-    
4763
-    baseMean = rowMeans(expMeans.m)
4764
-    expression.df = tibble::tibble(ENSEMBL_ID = getCounts(GRN, type = "rna", permuted = FALSE, includeIDColumn = TRUE)$ENSEMBL, baseMean = baseMean) %>%
4765
-      dplyr::mutate(ENSEMBL_ID = gsub("\\..+", "", .data$ENSEMBL_ID, perl = TRUE),
4766
-                    baseMean_log = log2(baseMean + 0.01))
4767
-    
4768
-    expression_TF.df = dplyr::filter(expression.df, .data$ENSEMBL_ID %in% GRN@annotation$TFs$TF.ENSEMBL) %>%
4769
-      dplyr::left_join(GRN@annotation$TFs, by = c("ENSEMBL_ID" = "TF.ENSEMBL"))
4770
-    
4771
-    meanPeaks.df = tibble::tibble(peakID = getCounts(GRN, type = "peaks", permuted = FALSE)$peakID, 
4772
-                                  mean = rowMeans(getCounts(GRN, type = "peaks", permuted = FALSE, asMatrix = TRUE))) %>%
4773
-      dplyr::mutate(mean_log = log2(mean + 0.01))
4774
-    
4775
-    GRN@visualization$metadata = list("RNA_expression_genes" = expression.df,
4776
-                    "RNA_expression_TF"    = expression_TF.df,
4777
-                    "Peaks_accessibility"   = meanPeaks.df)
4778
-    
4779
-  } 
4780
-  
4781
-  GRN
4782
-  
4783
-}
4784
-
4785
-#' Change the output directory of a GRN object
4786
-#' 
4787
-#' @export
4788
-#' @template GRN
4789
-#' @param outputDirectory Character. Default \code{.}. New output directory for all output files unless overwritten via the parameter \code{outputFolder}.
4790
-#' @return An updated \code{\linkS4class{GRN}} object, with the output directory being adjusted accordingly
4791
-#' @examples 
4792
-#' GRN = loadExampleObject()
4793
-#' GRN = changeOutputDirectory(GRN, outputDirectory = ".")
4794
-changeOutputDirectory <- function(GRN, outputDirectory = ".") {
4795
-  
4796
-    start = Sys.time()  
4797
-    
4798
-    checkmate::assertClass(GRN, "GRN")
4799
-    GRN = .addFunctionLogToObject(GRN)
4800
-    
4801
-    GRN = .makeObjectCompatible(GRN)
4802
-    
4803
-    checkmate::assertCharacter(outputDirectory, len = 1, min.chars = 1)
4804
-    
4805
-    GRN@config$directories$outputRoot   = outputDirectory
4806
-    GRN@config$directories$output_plots = paste0(outputDirectory, .Platform$file.sep, "plots", .Platform$file.sep)
4807
-    GRN@config$files$output_log         = paste0(outputDirectory, .Platform$file.sep, "GRN.log")
4808
-    
4809
-    futile.logger::flog.info(paste0("Output directory changed in the object to " , outputDirectory))
4810
-    
4811
-    
4812
-    GRN
4813
-  
4814
-}
4815
-
4816 5132
 
4817 5133
 .createTables_peakGeneQC <- function(peakGeneCorrelations.all.cur, networkType_details, colors_vec, range) {
4818 5134
     
... ...
@@ -4920,6 +5236,45 @@ changeOutputDirectory <- function(GRN, outputDirectory = ".") {
4920 5236
     paste0(names(tbl_freq), " (", tbl_freq, ")")
4921 5237
 }
4922 5238
 
5239
+
5240
+.getBasic_metadata_visualization <- function(GRN, forceRerun = FALSE) {
5241
+    
5242
+    checkmate::assertClass(GRN, "GRN")
5243
+    GRN = .addFunctionLogToObject(GRN) 
5244
+    checkmate::assertFlag(forceRerun)
5245
+    
5246
+    # Get base mean expression for genes and TFs and mean accessibility from peaks
5247
+    
5248
+    
5249
+    # TODO: Do we need this for the shuffled one?
5250
+    
5251
+    if (is.null(GRN@visualization$metadata) | forceRerun) {
5252
+        
5253
+        expMeans.m = getCounts(GRN, type = "rna", permuted = FALSE, asMatrix = TRUE)
5254
+        
5255
+        baseMean = rowMeans(expMeans.m)
5256
+        expression.df = tibble::tibble(ENSEMBL_ID = getCounts(GRN, type = "rna", permuted = FALSE, includeIDColumn = TRUE)$ENSEMBL, baseMean = baseMean) %>%
5257
+            dplyr::mutate(ENSEMBL_ID = gsub("\\..+", "", .data$ENSEMBL_ID, perl = TRUE),
5258
+                          baseMean_log = log2(baseMean + 0.01))
5259
+        
5260
+        expression_TF.df = dplyr::filter(expression.df, .data$ENSEMBL_ID %in% GRN@annotation$TFs$TF.ENSEMBL) %>%
5261
+            dplyr::left_join(GRN@annotation$TFs, by = c("ENSEMBL_ID" = "TF.ENSEMBL"))
5262
+        
5263
+        meanPeaks.df = tibble::tibble(peakID = getCounts(GRN, type = "peaks", permuted = FALSE)$peakID, 
5264
+                                      mean = rowMeans(getCounts(GRN, type = "peaks", permuted = FALSE, asMatrix = TRUE))) %>%
5265
+            dplyr::mutate(mean_log = log2(mean + 0.01))
5266
+        
5267
+        GRN@visualization$metadata = list("RNA_expression_genes" = expression.df,
5268
+                                          "RNA_expression_TF"    = expression_TF.df,
5269
+                                          "Peaks_accessibility"   = meanPeaks.df)
5270
+        
5271
+    } 
5272
+    
5273
+    GRN
5274
+    
5275
+}
5276
+
5277
+
4923 5278
 ######## Quantify source of variation ########
4924 5279
 
4925 5280
 #' Quantify and interpret multiple sources of biological and technical variation for features (TFs, peaks, and genes) in a \code{\linkS4class{GRN}} object
... ...
@@ -4946,8 +5301,8 @@ changeOutputDirectory <- function(GRN, outputDirectory = ".") {
4946 5301
 #' As noted above, the results are not added to \code{GRN@connections$all.filtered}; rerun the function \code{\link{getGRNConnections}} and set \code{include_variancePartitionResults} to \code{TRUE} to include the results in the eGRN output table.
4947 5302
 #' @examples 
4948 5303
 #' # See the Workflow vignette on the GRaNIE website for examples
4949
-#' GRN = loadExampleObject()
4950
-#' GRN = add_featureVariation(GRN, metadata = c("mt_frac"), forceRerun = TRUE)
5304
+#' # GRN = loadExampleObject()
5305
+#' # GRN = add_featureVariation(GRN, metadata = c("mt_frac"), forceRerun = TRUE)
4951 5306
 #' @export
4952 5307
 add_featureVariation <- function (GRN, 
4953 5308
                                     formula = "auto", 
... ...
@@ -5034,7 +5389,7 @@ add_featureVariation <- function (GRN,
5034 5389
                 # Numeric variables are modeled as fixed effect
5035 5390
                 formulaElems2 = paste0(numericVariablesNames)
5036 5391
                 
5037
-                message = paste0("Make sure that all variables from the metadata that are numeric are indeed really numeric and not (hidden) factors. The following variables will be treated as numeric: ", paste0(numericVariablesNames, collapse = ",", ". If one of these is in fact a factor, change the type in GRN@data$metadata and re-run, or provide the design formula manually"))
5392
+                message = paste0("Make sure that all variables from the metadata that are numeric are indeed really numeric and not (hidden) factors. The following variables will be treated as numeric: ", paste0(numericVariablesNames, collapse = ","), ". If one of these is in fact a factor, change the type in GRN@data$metadata and re-run, or provide the design formula manually")
5038 5393
                 .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE)
5039 5394
                 
5040 5395
             } else {
... ...
@@ -5134,95 +5489,7 @@ add_featureVariation <- function (GRN,
5134 5489
 }
5135 5490
 
5136 5491
 
5137
-.checkExistanceFilteredConnections <- function(GRN) {
5138
-    
5139
-    if (is.null(GRN@connections$all.filtered[["0"]])) {
5140
-        message = "Could not find filtered connections (the slot GRN@connections$all.filtered[[\"0\"]] is NULL). Run the function filterGRNAndConnectGenes."
5141
-        .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE)
5142
-    }
5143
-}
5144
-
5145
-# Converts from an older GRN object format to the most current one due to internal optimizations
5146
-.makeObjectCompatible <- function(GRN) {
5147
-    
5148
-    if (is.null(GRN@annotation$TFs) & !is.null(GRN@data$TFs$translationTable)) {
5149
-        GRN@annotation$TFs = GRN@data$TFs$translationTable
5150
-        GRN@data$TFs$translationTable = NULL
5151
-    }
5152
-    
5153
-    if (!is.null(GRN@annotation$TFs)) {
5154
-    
5155
-        if (!"TF.ENSEMBL" %in% colnames(GRN@annotation$TFs)) {
5156
-          GRN@annotation$TFs = dplyr::rename(GRN@annotation$TFs, TF.ENSEMBL = .data$ENSEMBL)
5157
-        }
5158
-        if (!"TF.HOCOID" %in% colnames(GRN@annotation$TFs)) {
5159
-          GRN@annotation$TFs = dplyr::rename(GRN@annotation$TFs, TF.HOCOID = .data$HOCOID)
5160
-        }
5161
-        
5162
-        if ("ENSEMBL" %in% colnames(GRN@annotation$TFs)) {
5163
-          GRN@annotation$TFs = dplyr::select(GRN@annotation$TFs, -.data$ENSEMBL)
5164
-        }
5165
-    }
5166
-
5167
-    
5168
-    # Temporary fix: Replace lincRNA with lncRNA due to a recent change in biomaRt until we update the object directly
5169
-    if ("lncRNA" %in% levels(GRN@annotation$genes$gene.type)) {
5170
-        GRN@annotation$genes = dplyr::mutate(GRN@annotation$genes, gene.type = dplyr::recode_factor(.data$gene.type, lncRNA = "lincRNA")) 
5171
-    }
5172
-    
5173
-    if (is.null(GRN@annotation$peaks) & !is.null(GRN@annotation$consensusPeaks)) {
5174
-        GRN@annotation[["peaks"]] = GRN@annotation[["consensusPeaks"]]
5175
-        GRN@annotation[["consensusPeaks"]] = NULL
5176
-        GRN@annotation[["peaks_obj"]] = GRN@annotation[["consensusPeaks_obj"]]
5177
-        GRN@annotation[["consensusPeaks_obj"]] = NULL
5178
-        
5179
-    }
5180
-    
5181
-    # Renamed count slots and their structure
5182
-    # 1. peaks
5183
-    if (!is.null(GRN@data$peaks[["counts_orig"]])) {
5184
-        GRN@data$peaks[["counts_orig"]] = NULL
5185
-    }
5186
-    if (is.null(GRN@data$peaks[["counts"]])) {
5187
-        GRN@data$peaks[["counts"]] = .storeAsMatrixOrSparseMatrix(GRN, df = GRN@data$peaks$counts_norm %>% dplyr::select(-.data$isFiltered), 
5188
-                                                                  ID_column = "peakID", slotName = "GRN@data$peaks$counts")
5189
-    }
5190
-    if (!is.null(GRN@data$peaks[["counts_norm"]])) {
5191
-        GRN@data$peaks[["counts_norm"]] = NULL
5192
-    }
5193
-    
5194
-    if (is.null(GRN@data$peaks[["counts_metadata"]])) {
5195
-        GRN@data$peaks[["counts_metadata"]] = .createConsensusPeaksDF(GRN@data$peaks[["counts"]] %>% as.data.frame() %>% tibble::rownames_to_column("peakID")) 
5196
-        stopifnot(c("chr", "start", "end", "peakID", "isFiltered") %in% colnames(GRN@data$peaks$counts_metadata))
5197
-    }
5198
-    if (!is.null(GRN@data$peaks[["consensusPeaks"]])) {
5199
-        GRN@data$peaks[["consensusPeaks"]] = NULL
5200
-    }
5201
-    
5202
-    # 2. RNA
5203
-    if (!is.null(GRN@data$RNA[["counts_orig"]])) {
5204
-        GRN@data$RNA[["counts_orig"]] = NULL
5205
-    }
5206
-    if (is.null(GRN@data$RNA[["counts"]]) & !is.null(GRN@data$RNA$counts_norm.l[["0"]])) {
5207
-        GRN@data$RNA[["counts"]] = .storeAsMatrixOrSparseMatrix(GRN, df = GRN@data$RNA$counts_norm.l[["0"]] %>% dplyr::select(-.data$isFiltered), 
5208
-                                                                ID_column = "ENSEMBL", slotName = "GRN@data$RNA$counts")
5209
-    }
5210
-    if (!is.null(GRN@data$RNA[["counts_norm.l"]])) {
5211
-        GRN@data$RNA[["counts_norm.l"]] = NULL
5212
-    }
5213
-    
5214
-    if (is.null(GRN@data$RNA[["counts_metadata"]]) & !is.null(GRN@data$RNA$counts)) {
5215
-        GRN@data$RNA[["counts_metadata"]] = tibble::tibble(ID = GRN@data$RNA$counts %>% as.data.frame() %>% tibble::rownames_to_column("ENSEMBL"), isFiltered = FALSE)
5216
-    }
5217
-    
5218
-    if (is.null(GRN@data$RNA[["counts_permuted_index"]]) & !is.null(GRN@data$RNA$counts)) {
5219
-        GRN@data$RNA[["counts_permuted_index"]] = .shuffleColumns(as.data.frame(GRN@data$RNA$counts), 
5220
-                                                             .getMaxPermutation(GRN), returnUnshuffled = FALSE, returnAsList = FALSE)
5221
-    }
5222 5492
 
5223
-    
5224
-    GRN
5225
-}
5226 5493
 
5227 5494
 
5228 5495
 
... ...
@@ -472,46 +472,6 @@
472 472
   GenomeInfoDb::seqlengths(txdb)
473 473
 }
474 474
 
475
-.shuffleColumns <- function(df, nPermutations, returnUnshuffled = TRUE, returnAsList = TRUE, saveMemory = TRUE) {
476
-    
477
-    start = Sys.time()
478
-   
479
-    df.shuffled.l = list()
480
-    
481
-    futile.logger::flog.info(paste0("Shuffling columns ", nPermutations, " times"))
482
-    
483
-    # Shuffle RNA-Seq data for permutations
484
-    for (permutationCur in 0:nPermutations) {
485
-        
486
-        index = as.character(permutationCur)
487
-        
488
-        if (permutationCur == 0 & returnUnshuffled) {
489
-            df.shuffled.l[[index]]  = df
490
-        } else if (permutationCur > 0) {
491
-          
492
-            sampleOrder = sample(ncol(df))
493
-            
494
-            if (saveMemory) {
495
-              df.shuffled.l[[index]] =  sampleOrder
496
-            } else {
497
-              df.shuffled.l[[index]]  = df[,sampleOrder]
498
-            }
499
-            
500
-        } else {
501
-          # nothing to do
502
-        }
503
-        
504
-    }
505
-
506
-    .printExecutionTime(start)
507
-    
508
-    if (nPermutations == 1 & !returnAsList & !returnUnshuffled) {
509
-      df.shuffled.l[["1"]]
510
-    } else {
511
-      df.shuffled.l
512
-    }
513
-    
514
-}
515 475
 
516 476
 .shuffleRowsPerColumn <- function(df) {
517 477
   
... ...
@@ -3384,9 +3384,9 @@ visualizeGRN <- function(GRN, outputFolder = NULL,  basenameOutput = NULL, plotA
3384 3384
     }
3385 3385
     
3386 3386
     # Construct GRN@visualization$metadata
3387
-    GRN = getBasic_metadata_visualization(GRN, forceRerun = forceRerun)
3387
+    GRN = .getBasic_metadata_visualization(GRN, forceRerun = forceRerun)
3388 3388
     # if (useDefaultMetadata) {
3389
-    #   metadata_visualization.l = getBasic_metadata_visualization(GRN)
3389
+    #   metadata_visualization.l = .getBasic_metadata_visualization(GRN)
3390 3390
     #   vertice_color_TFs   = list(metadata_visualization.l[["RNA_expression_TF"]],    "HOCOID",     "baseMean_log")
3391 3391
     #   vertice_color_genes = list(metadata_visualization.l[["RNA_expression_genes"]], "ENSEMBL_ID", "baseMean_log")
3392 3392
     #   vertice_color_peaks = list(metadata_visualization.l[["Peaks_accessibility"]],   "peakID",     "mean_log")
... ...
@@ -3988,135 +3988,6 @@ visualizeGRN <- function(GRN, outputFolder = NULL,  basenameOutput = NULL, plotA
3988 3988
 
3989 3989
 
3990 3990
 
3991
-plotCorrelations <- function(GRN, TF_peak.fdr.threshold = 0.2, TF_peak.r.abs.threshold = 0.3, sortby = "TF_peak.r",
3992
-                             topn = 100, TF.names = NULL, peak.IDs = NULL,
3993
-                             corMethod = "pearson") {
3994
-    
3995
-    checkmate::assertChoice(sortby, colnames(GRN@connections$TF_peaks$`0`$main))
3996
-    
3997
-    con.filt = GRN@connections$TF_peaks$`0`$main %>%
3998
-        dplyr::filter(.data$TF_peak.fdr <= TF_peak.fdr.threshold,
3999
-                      abs(.data$TF_peak.r) >= TF_peak.r.abs.threshold)
4000
-      
4001
-    
4002
-    if (!is.null(TF.names)) {
4003
-        con.filt = dplyr::filter(con.filt, .data$Tf.name %in% TF.names)
4004
-    }
4005
-    
4006
-    if (!is.null(peak.IDs)) {
4007
-        con.filt = dplyr::filter(con.filt, .data$peak.ID %in% peak.IDs)
4008
-    }
4009
-    
4010
-    
4011
-    con.filt = con.filt %>%
4012
-        dplyr::arrange_at(sortby) %>%
4013
-        dplyr::slice_head(n = topn)
4014
-    
4015
-    
4016
-    for (i in seq_len(topn)) {
4017
-        
4018
-        TF.name = con.filt$TF.name[i]
4019
-        TF.ENSEMBL = GRN@annotation$TFs$TF.ENSEMBL[which(GRN@annotation$TFs$TF.name == TF.name)]
4020
-        peak.peakID = con.filt$peak.ID[i]
4021
-        corCalc = con.filt$TF_peak.r[i]
4022
-        fdrCur = con.filt$TF_peak.fdr[i]
4023
-        
4024
-        cor_cur = cor(GRN@data$RNA$counts[TF.ENSEMBL, ], GRN@data$peaks$counts[peak.peakID,], method = corMethod)
4025
-        
4026
-        data.df = tibble(TF.exp.norm = GRN@data$RNA$counts[TF.ENSEMBL, ], peak.acc.norm = GRN@data$peaks$counts[peak.peakID,])
4027
-        
4028
-        # For testing only
4029
-        stopifnot(abs(cor_cur - corCalc) < 0.01)
4030
-        
4031
-        colorscale = scale_fill_gradientn(
4032
-            colors = RColorBrewer::brewer.pal(9, "YlGnBu"),
4033
-            values = c(0, exp(seq(-5, 0, length.out = 100))))
4034
-        
4035
-        g1 = ggplot(data.df, aes(.data$TF.exp.norm, .data$peak.acc.norm)) + 
4036
-            geom_smooth(method = "lm", formula = "y ~ x", col = "black") + 
4037
-            geom_hex(bins = 50) + colorscale  +
4038
-            xlim(-0.01,NA) + ylim(-0.01,NA) + 
4039
-            ggtitle(paste0("n = ", nrow(data.df), " (all), Cor = ", round(corCalc,2)))
4040
-        
4041
-        # Remove entries with pure zeroes
4042
-        data.filt.df = dplyr::filter(data.df, .data$TF.exp.norm > 0, .data$peak.acc.norm > 0)
4043
-        
4044
-        cor_cur = cor(dplyr::pull(data.filt.df, .data$TF.exp.norm), dplyr::pull(data.filt.df, .data$peak.acc.norm), method = corMethod)
4045
-
4046
-        g2 = ggplot(data.filt.df, aes(TF.exp.norm, peak.acc.norm)) + 
4047
-            geom_smooth(method = "lm", formula = "y ~ x", col = "black") + 
4048
-            geom_hex(bins = 50) + colorscale  +
4049
-            xlim(-0.01,NA) + ylim(-0.01,NA) + 
4050
-            ggtitle(paste0("n = ", nrow(data.filt.df), " (only >0 for both TF expr. & peak acc.), Cor = ", round(cor_cur,2)))    
4051
-        
4052
-        data.filt2.df = dplyr::filter(data.df, TF.exp.norm > 0 | peak.acc.norm > 0)
4053
-        
4054
-        cor_cur = cor(dplyr::pull(data.filt2.df, .data$TF.exp.norm), dplyr::pull(data.filt2.df, .data$peak.acc.norm), method = corMethod)
4055
-
4056
-        g3 = ggplot(data.filt2.df, aes(TF.exp.norm, peak.acc.norm)) + 
4057
-            geom_smooth(method = "lm", formula = "y ~ x", col = "black") + 
4058
-            geom_hex(bins = 50) + colorscale  +
4059
-            xlim(-0.01,NA) + ylim(-0.01,NA) + 
4060
-            ggtitle(paste0("n = ", nrow(data.filt2.df), " (only excluding (0,0) for both TF expr. & peak acc.), Cor = ", round(cor_cur,2)))    
4061
-
4062
- 
4063
-        mainTitle = paste0("TF: ", TF.name, "(", TF.ENSEMBL, "), peak: ", peak.peakID, " (FDR: ",  round(fdrCur, 2), ")")
4064
-
4065
-        plots_all =  g1 / g2 / g3 + 
4066
-            patchwork::plot_annotation(title = mainTitle, theme = ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)))
4067
-        plot(plots_all)
4068
-        # 
4069
-        # r1 = rnorm(1000)
4070
-        # r2 = rnorm(1000, sd = 1)
4071
-        # plot(r1,r2, main = paste0("Cor = ", round(cor(r1,r2, method = corMethod), 2)))
4072
-        # 
4073
-        # r1 = c(r1, rep(0,5000))
4074
-        # r2 = c(r2, rep(0,5000))
4075
-        # plot(r1,r2, main = paste0("Cor = ", round(cor(r1,r2, method = corMethod), 2)))
4076
-    }
4077
-    
4078
-    con.filt = GRN@connections$TF_peaks$`0`$main
4079
-    
4080
-    res.df = tribble(~TF.name, ~peak.peakID, ~cor_all, ~cor_nonZeroAll, ~cor_nonZero)
4081
-    for (i in seq_len(nrow(con.filt))) {
4082
-        
4083
-        if (i %% 100 == 0) futile.logger::flog.info(paste0("Row ", i, " out of ", nrow(con.filt)))
4084
-        TF.name = con.filt$TF.name[i]
4085
-        TF.ENSEMBL = GRN@annotation$TFs$TF.ENSEMBL[which(GRN@annotation$TFs$TF.name == TF.name)]
4086
-        peak.peakID = con.filt$peak.ID[i]
4087
-        corCalc = con.filt$TF_peak.r[i]
4088
-        fdrCur = con.filt$TF_peak.fdr[i]
4089
-        
4090
-        cor_cur1 = cor(GRN@data$RNA$counts[TF.ENSEMBL, ], GRN@data$peaks$counts[peak.peakID,], method = corMethod)
4091
-        
4092
-        data.df = tibble(TF.exp.norm = GRN@data$RNA$counts[TF.ENSEMBL, ], peak.acc.norm = GRN@data$peaks$counts[peak.peakID,])
4093
-        
4094
-        data.filt.df = dplyr::filter(data.df, .data$TF.exp.norm > 0, .data$peak.acc.norm > 0)
4095
-        
4096
-        cor_cur2 = cor(dplyr::pull(data.filt.df, .data$TF.exp.norm), dplyr::pull(data.filt.df, .data$peak.acc.norm), method = corMethod)
4097
-        
4098
-        data.filt2.df = dplyr::filter(data.df, .data$TF.exp.norm > 0 | peak.acc.norm > 0)
4099
-        
4100
-        cor_cur3 = cor(dplyr::pull(data.filt2.df, .data$TF.exp.norm), dplyr::pull(data.filt2.df, .data$peak.acc.norm), method = corMethod)
4101
-        
4102
-        res.df = add_row(res.df, 
4103
-                         TF.name = TF.name, peak.peakID = peak.peakID, 
4104
-                         cor_all = cor_cur1, cor_nonZeroAll = cor_cur2, cor_nonZero = cor_cur3)
4105
-    }
4106
-    cor(res.df$cor_all, res.df$cor_nonZeroAll)
4107
-    cor(res.df$cor_all, res.df$cor_nonZero)
4108
-    
4109
-    res.df = res.df %>%
4110
-        dplyr::mutate(diff_all_nonZeroAll = cor_all - cor_nonZeroAll,
4111
-                      diff_all_nonZero = cor_all - cor_nonZero)
4112
-    
4113
-    ggplot(res.df, aes(.data$diff_all_nonZeroAll)) + geom_histogram(binwidth = 0.05)
4114
-    ggplot(res.df, aes(.data$diff_all_nonZero)) + geom_histogram(binwidth = 0.05)
4115
-    
4116
-    GRN
4117
-  
4118
-}
4119
-
4120 3991
 .checkGraphExistance <- function (GRN) {
4121 3992
   
4122 3993
   if (is.null(GRN@graph$TF_peak_gene) | is.null(GRN@graph$TF_gene)) {
... ...
@@ -46,8 +46,8 @@ The normalized count matrices are used as input for \code{fitExtractVarPartModel
46 46
 }
47 47
 \examples{
48 48
 # See the Workflow vignette on the GRaNIE website for examples
49
-GRN = loadExampleObject()
50
-GRN = add_featureVariation(GRN, metadata = c("mt_frac"), forceRerun = TRUE)
49
+# GRN = loadExampleObject()
50
+# GRN = add_featureVariation(GRN, metadata = c("mt_frac"), forceRerun = TRUE)
51 51
 }
52 52
 \seealso{
53 53
 \code{\link{addData}}
... ...
@@ -4,6 +4,15 @@
4 4
 \alias{getCounts}
5 5
 \title{Get counts for the various data defined in a \code{\linkS4class{GRN}} object}
6 6
 \usage{
7
+getCounts(
8
+  GRN,
9
+  type,
10
+  permuted = FALSE,
11
+  asMatrix = FALSE,
12
+  includeIDColumn = TRUE,
13
+  includeFiltered = FALSE
14
+)
15
+
7 16
 getCounts(
8 17
   GRN,
9 18
   type,
... ...
@@ -27,9 +36,14 @@ getCounts(
27 36
 \item{includeFiltered}{Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. If set to \code{FALSE}, genes or peaks marked as filtered (after running the function \code{filterData}) will not be returned. If set to \code{TRUE}, all elements are returned regardless of the currently active filter status.}
28 37
 }
29 38
 \value{
39
+Data frame of counts, with the type as indicated by the function parameters. This function does **NOT** return a \code{\linkS4class{GRN}} object.
40
+
30 41
 Data frame of counts, with the type as indicated by the function parameters. This function does **NOT** return a \code{\linkS4class{GRN}} object.
31 42
 }
32 43
 \description{
44
+Get counts for the various data defined in a \code{\linkS4class{GRN}} object.
45
+\strong{Note: This function, as all \code{get} functions from this package, does NOT return a \code{\linkS4class{GRN}} object.}
46
+
33 47
 Get counts for the various data defined in a \code{\linkS4class{GRN}} object.
34 48
 \strong{Note: This function, as all \code{get} functions from this package, does NOT return a \code{\linkS4class{GRN}} object.}
35 49
 }
... ...
@@ -37,4 +51,7 @@ Get counts for the various data defined in a \code{\linkS4class{GRN}} object.
37 51
 # See the Workflow vignette on the GRaNIE website for examples
38 52
 GRN = loadExampleObject()
39 53
 counts.df = getCounts(GRN, type = "peaks", permuted = FALSE)
54
+# See the Workflow vignette on the GRaNIE website for examples
55
+GRN = loadExampleObject()
56
+counts.df = getCounts(GRN, type = "peaks", permuted = FALSE)
40 57
 }
... ...
@@ -4,6 +4,17 @@
4 4
 \alias{getGRNConnections}
5 5
 \title{Extract connections or links from a \code{\linkS4class{GRN}} object as da data frame.}
6 6
 \usage{
7
+getGRNConnections(
8
+  GRN,
9
+  type = "all.filtered",
10
+  permuted = FALSE,
11
+  include_TF_gene_correlations = FALSE,
12
+  include_TFMetadata = FALSE,
13
+  include_peakMetadata = FALSE,
14
+  include_geneMetadata = FALSE,
15
+  include_variancePartitionResults = FALSE
16
+)
17
+
7 18
 getGRNConnections(
8 19
   GRN,
9 20
   type = "all.filtered",
... ...
@@ -36,9 +47,14 @@ If set to \code{TRUE}, they must have been computed beforehand with \code{\link{
36 47
 Only relevant for type = "all.filtered"}
37 48
 }
38 49
 \value{
50
+A data frame with the requested connections. This function does **NOT** return a \code{\linkS4class{GRN}} object.
51
+
39 52
 A data frame with the requested connections. This function does **NOT** return a \code{\linkS4class{GRN}} object.
40 53
 }
41 54
 \description{
55
+Returns stored connections/links (either TF-peak, peak-genes or the filtered set of connections as produced by \code{\link{filterGRNAndConnectGenes}}). 
56
+\strong{Note: This function, as all \code{get} functions from this package, does NOT return a \code{\linkS4class{GRN}} object.}
57
+
42 58
 Returns stored connections/links (either TF-peak, peak-genes or the filtered set of connections as produced by \code{\link{filterGRNAndConnectGenes}}). 
43 59
 \strong{Note: This function, as all \code{get} functions from this package, does NOT return a \code{\linkS4class{GRN}} object.}
44 60
 }
... ...
@@ -46,11 +62,20 @@ Returns stored connections/links (either TF-peak, peak-genes or the filtered set
46 62
 # See the Workflow vignette on the GRaNIE website for examples
47 63
 GRN = loadExampleObject()
48 64
 GRN_con.all.df = getGRNConnections(GRN)
65
+# See the Workflow vignette on the GRaNIE website for examples
66
+GRN = loadExampleObject()
67
+GRN_con.all.df = getGRNConnections(GRN)
49 68
 }
50 69
 \seealso{
51 70
 \code{\link{filterGRNAndConnectGenes}}
52 71
 
53 72
 \code{\link{add_featureVariation}}
54 73
 
74
+\code{\link{add_TF_gene_correlation}}
75
+
76
+\code{\link{filterGRNAndConnectGenes}}
77
+
78
+\code{\link{add_featureVariation}}
79
+
55 80
 \code{\link{add_TF_gene_correlation}}
56 81
 }