... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
Package: GRaNIE |
2 | 2 |
Title: GRaNIE: Reconstruction cell type specific gene regulatory networks including enhancers using chromatin accessibility and RNA-seq data |
3 |
-Version: 1.1.14 |
|
3 |
+Version: 1.1.15 |
|
4 | 4 |
Encoding: UTF-8 |
5 | 5 |
Authors@R: c(person("Christian", "Arnold", email = |
6 | 6 |
"chrarnold@web.de", role = c("cre","aut")), |
... | ... |
@@ -3,7 +3,7 @@ |
3 | 3 |
#' Genetic variants associated with diseases often affect non-coding regions, thus likely having a regulatory role. To understand the effects of genetic variants in these regulatory regions, identifying genes that are modulated by specific regulatory elements (REs) is crucial. The effect of gene regulatory elements, such as enhancers, is often cell-type specific, likely because the combinations of transcription factors (TFs) that are regulating a given enhancer have celltype specific activity. This TF activity can be quantified with existing tools such as \code{diffTF} and captures differences in binding of a TF in open chromatin regions. Collectively, this forms a gene regulatory network (\code{eGRN}) with cell-type and data-specific TF-RE and RE-gene links. Here, we reconstruct such a \code{eGRN} using bulk RNAseq and open chromatin (e.g., using ATACseq or ChIPseq for open chromatin marks) and optionally TF activity data. Our network contains different types of links, connecting TFs to regulatory elements, the latter of which is connected to genes in the vicinity or within the same chromatin domain (TAD). We use a statistical framework to assign empirical FDRs and weights to all links using a permutation-based approach. |
4 | 4 |
#' |
5 | 5 |
#' @section Package functions: |
6 |
-#' See the Vignettes for a workflow example and more generally \url{https://grp-zaugg.embl-community.io/GRaNIE/articles/} for all project-related information. |
|
6 |
+#' See the Vignettes for a workflow example and more generally \url{https://grp-zaugg.embl-community.io/GRaNIE} for all project-related information. |
|
7 | 7 |
#' |
8 | 8 |
#' @section GRN object: |
9 | 9 |
#' The \code{GRaNIE} package works with \code{GRN} objects. See \code{\linkS4class{GRN}} for details. |
... | ... |
@@ -39,8 +39,14 @@ initializeGRN <- function(objectMetadata = list(), |
39 | 39 |
.checkAndLoadPackagesGenomeAssembly(genomeAssembly) |
40 | 40 |
|
41 | 41 |
# Create the folder first if not yet existing |
42 |
+ checkmate::assertCharacter(outputFolder, min.chars = 1, len = 1) |
|
42 | 43 |
if (!dir.exists(outputFolder)) { |
43 |
- dir.create(outputFolder) |
|
44 |
+ res = dir.create(outputFolder) |
|
45 |
+ if (!res) { |
|
46 |
+ message = paste0("Could not create the output directory ", outputFolder, ". Check the path /and/or access rights.") |
|
47 |
+ .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) |
|
48 |
+ } |
|
49 |
+ checkmate::assertDirectoryExists(outputFolder, access = "w") |
|
44 | 50 |
} |
45 | 51 |
|
46 | 52 |
# Create an absolute path out of the given outputFolder now that it exists |
... | ... |
@@ -109,6 +115,9 @@ initializeGRN <- function(objectMetadata = list(), |
109 | 115 |
|
110 | 116 |
futile.logger::flog.info(paste0("Empty GRN object created successfully. Type the object name (e.g., GRN) to retrieve summary information about it at any time.")) |
111 | 117 |
|
118 |
+ futile.logger::flog.info(paste0(" Default output folder: ", GRN@config$directories$outputRoot)) |
|
119 |
+ futile.logger::flog.info(paste0(" Genome assembly: ", genomeAssembly)) |
|
120 |
+ |
|
112 | 121 |
.printExecutionTime(start, prefix = "") |
113 | 122 |
|
114 | 123 |
GRN |
... | ... |
@@ -187,11 +196,17 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
187 | 196 |
# Normalize ID column names |
188 | 197 |
if (idColumn_peaks != "peakID") { |
189 | 198 |
counts_peaks = dplyr::rename(counts_peaks, peakID = !!(idColumn_peaks)) |
199 |
+ idColumn_peaks ="peakID" |
|
190 | 200 |
} |
191 | 201 |
if (idColumn_RNA != "ENSEMBL") { |
192 | 202 |
counts_rna = dplyr::rename(counts_rna, ENSEMBL = !!(idColumn_RNA)) |
203 |
+ idColumn_RNA = "ENSEMBL" |
|
193 | 204 |
} |
194 |
- |
|
205 |
+ |
|
206 |
+ # Check existence of correct ID column now |
|
207 |
+ checkmate::assertSubset(idColumn_peaks, colnames(counts_peaks)) |
|
208 |
+ checkmate::assertSubset(idColumn_RNA, colnames(counts_rna)) |
|
209 |
+ |
|
195 | 210 |
# Check ID columns for missing values and remove |
196 | 211 |
rna_missing_ID = which(is.na(counts_rna$ENSEMBL)) |
197 | 212 |
if (length(rna_missing_ID) > 0) { |
... | ... |
@@ -257,8 +272,11 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
257 | 272 |
GRN@data$peaks$counts_norm = data.l[["peaks"]] %>% dplyr::mutate(isFiltered = FALSE) |
258 | 273 |
countsRNA.norm.df = data.l[["RNA"]] %>% dplyr::mutate(isFiltered = FALSE) |
259 | 274 |
|
275 |
+ futile.logger::flog.info(paste0( "Final dimensions of data:")) |
|
276 |
+ futile.logger::flog.info(paste0( " RNA : ", nrow(countsRNA.norm.df) , " x ", ncol(countsRNA.norm.df) - 2, " (rows x columns)")) |
|
277 |
+ futile.logger::flog.info(paste0( " peaks: ", nrow(countsPeaks.norm.df), " x ", ncol(countsPeaks.norm.df) - 1, " (rows x columns)")) |
|
260 | 278 |
# Create permutations for RNA |
261 |
- futile.logger::flog.info(paste0( "Produce ", .getMaxPermutation(GRN), " permutations of RNA-counts")) |
|
279 |
+ futile.logger::flog.info(paste0( "Generate ", .getMaxPermutation(GRN), " permutations of RNA-counts")) |
|
262 | 280 |
GRN@data$RNA$counts_norm.l = .shuffleColumns(countsRNA.norm.df, .getMaxPermutation(GRN), returnUnshuffled = TRUE, returnAsList = TRUE) |
263 | 281 |
|
264 | 282 |
if (!is.null(sampleMetadata)) { |
... | ... |
@@ -341,7 +359,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
341 | 359 |
|
342 | 360 |
overlappingPeaks = which(GenomicRanges::countOverlaps(consensus.gr ,consensus.gr) >1) |
343 | 361 |
|
344 |
- if (length(overlappingPeaks) > 0){ |
|
362 |
+ if (length(overlappingPeaks) > 0) { |
|
345 | 363 |
|
346 | 364 |
ids = (consensus.gr[overlappingPeaks] %>% as.data.frame())$peakID |
347 | 365 |
|
... | ... |
@@ -574,11 +592,12 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
574 | 592 |
} |
575 | 593 |
|
576 | 594 |
.populateGeneAnnotation <- function (GRN) { |
577 |
- |
|
578 |
- futile.logger::flog.info(paste0(" Calculate statistics for each gene (mean and CV)")) |
|
579 |
- |
|
595 |
+ |
|
596 |
+ |
|
580 | 597 |
countsRNA.clean = getCounts(GRN, type = "rna", norm = TRUE, permuted = FALSE) |
581 | 598 |
|
599 |
+ futile.logger::flog.info(paste0(" Calculate statistics for each of the ", nrow(countsRNA.clean), " genes that were provided with the RNA-seq data (mean and CV)")) |
|
600 |
+ |
|
582 | 601 |
countsRNA.m = as.matrix(dplyr::select(countsRNA.clean, -.data$ENSEMBL)) |
583 | 602 |
|
584 | 603 |
rowMeans_rna = rowMeans(countsRNA.m) |
... | ... |
@@ -591,7 +610,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
591 | 610 |
gene.mean = rowMeans_rna, |
592 | 611 |
gene.median = rowMedians_rna, |
593 | 612 |
gene.CV = CV_rna) %>% |
594 |
- dplyr::full_join(genomeAnnotation.df, by = c("gene.ENSEMBL")) %>% |
|
613 |
+ dplyr::left_join(genomeAnnotation.df, by = c("gene.ENSEMBL")) %>% |
|
595 | 614 |
dplyr::mutate(gene.type = forcats::fct_explicit_na(.data$gene.type, na_level = "unknown/missing")) |
596 | 615 |
|
597 | 616 |
GRN@annotation$genes = metadata_rna |
... | ... |
@@ -704,6 +723,8 @@ filterData <- function (GRN, |
704 | 723 |
checkmate::assertNumber(maxNormalizedMean_peaks, lower = minNormalizedMean_peaks , null.ok = TRUE) |
705 | 724 |
checkmate::assertNumber(maxNormalizedMeanRNA, lower = minNormalizedMeanRNA, null.ok = TRUE) |
706 | 725 |
checkmate::assertCharacter(chrToKeep_peaks, min.len = 1, any.missing = FALSE, null.ok = TRUE) |
726 |
+ checkmate::assertSubset(chrToKeep_peaks, GRN@data$peaks$consensusPeaks %>% dplyr::pull(.data$chr) %>% unique() %>% as.character()) |
|
727 |
+ |
|
707 | 728 |
checkmate::assertIntegerish(minSize_peaks, lower = 1, null.ok = TRUE) |
708 | 729 |
checkmate::assertIntegerish(maxSize_peaks, lower = dplyr::if_else(is.null(minSize_peaks), 1, minSize_peaks), null.ok = TRUE) |
709 | 730 |
checkmate::assertNumber(minCV_peaks, lower = 0, null.ok = TRUE) |
... | ... |
@@ -739,6 +760,11 @@ filterData <- function (GRN, |
739 | 760 |
futile.logger::flog.info(paste0("Collectively, filter ", nPeaksBefore -length(peaks_toKeep), " out of ", nPeaksBefore, " peaks.")) |
740 | 761 |
futile.logger::flog.info(paste0("Number of remaining peaks: ", length(peaks_toKeep))) |
741 | 762 |
|
763 |
+ if (length(peaks_toKeep) < 1000) { |
|
764 |
+ message = paste0("Too few peaks (", length(peaks_toKeep), ") remain after filtering. At least 1000 peaks must remain. Adjust the filtering settings.") |
|
765 |
+ .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) |
|
766 |
+ } |
|
767 |
+ |
|
742 | 768 |
GRN@data$peaks$consensusPeaks$isFiltered = ! GRN@data$peaks$consensusPeaks$peakID %in% peaks_toKeep |
743 | 769 |
#GRN@data$peaks$counts_raw$isFiltered = ! GRN@data$peaks$counts_raw$peakID %in% peaks_toKeep |
744 | 770 |
GRN@data$peaks$counts_norm$isFiltered = ! GRN@data$peaks$counts_norm$peakID %in% peaks_toKeep |
... | ... |
@@ -757,7 +783,12 @@ filterData <- function (GRN, |
757 | 783 |
minMean = minNormalizedMeanRNA, maxMean = maxNormalizedMeanRNA, |
758 | 784 |
minCV = minCV_genes, maxCV = maxCV_genes) |
759 | 785 |
|
760 |
- futile.logger::flog.info(paste0(" Number of rows in total: ", nrow(GRN@data$RNA$counts_norm.l[["0"]]))) |
|
786 |
+ |
|
787 |
+ if (length(genes.CV) < 100) { |
|
788 |
+ message = paste0("Too few genes (", length(genes.CV), ") remain after filtering. At least 100 genes must remain. Adjust the filtering settings.") |
|
789 |
+ .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) |
|
790 |
+ } |
|
791 |
+ |
|
761 | 792 |
for (permutationCur in c(0)) { |
762 | 793 |
permIndex = as.character(permutationCur) |
763 | 794 |
rowMeans = rowMeans(dplyr::select(GRN@data$RNA$counts_norm.l[[permIndex]], -.data$ENSEMBL)) |
... | ... |
@@ -768,8 +799,6 @@ filterData <- function (GRN, |
768 | 799 |
# Raw counts are left untouched and filtered where needed only |
769 | 800 |
futile.logger::flog.info(paste0(" Flagged ", nRowsFlagged, " rows because the row mean was smaller than ", minNormalizedMeanRNA)) |
770 | 801 |
|
771 |
- # TODO: Filter genes by CV |
|
772 |
- |
|
773 | 802 |
GRN@config$isFiltered = TRUE |
774 | 803 |
} |
775 | 804 |
|
... | ... |
@@ -1093,7 +1122,7 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) { |
1093 | 1122 |
if (is.null(GRN@data$TFs$TF_peak_overlap) | forceRerun) { |
1094 | 1123 |
|
1095 | 1124 |
|
1096 |
- futile.logger::flog.info(paste0("Overlap peaks and TFBS using ", nCores, " cores. This may take a few minutes...")) |
|
1125 |
+ futile.logger::flog.info(paste0("Overlap peaks and TFBS using ", nCores, " cores. This may take a while, particularly if the number of samples is large...")) |
|
1097 | 1126 |
|
1098 | 1127 |
genomeAssembly = GRN@config$parameters$genomeAssembly |
1099 | 1128 |
seqlengths = .getChrLengths(genomeAssembly) |
... | ... |
@@ -1254,17 +1283,25 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) { |
1254 | 1283 |
|
1255 | 1284 |
start = Sys.time() |
1256 | 1285 |
|
1286 |
+ # Set the column name to just ENSEMBL to avoid column mismatch issues |
|
1287 |
+ HOCOMOCO_mapping$ENSEMBL = HOCOMOCO_mapping$TF.ENSEMBL |
|
1288 |
+ |
|
1257 | 1289 |
# Filter to only the TFs |
1258 | 1290 |
# In addition, the no of TF because multiple TFs can map to the same gene/ ENSEMBL ID |
1259 | 1291 |
# Also filter 0 count genes because they otherwise cause errors downstream |
1260 | 1292 |
rowSums = rowSums(dplyr::select(matrix1, -.data$ENSEMBL)) |
1261 | 1293 |
|
1262 | 1294 |
# Keep only Ensembl IDs from TFs we have data from |
1263 |
- matrix1.norm.TFs.df = dplyr::filter(matrix1, .data$ENSEMBL %in% HOCOMOCO_mapping$ENSEMBL, rowSums != 0) |
|
1295 |
+ matrix1.norm.TFs.df = dplyr::filter(matrix1, .data$ENSEMBL %in% HOCOMOCO_mapping$TF.ENSEMBL, rowSums != 0) |
|
1296 |
+ |
|
1297 |
+ nFiltered1 = dplyr::filter(matrix1, ! .data$ENSEMBL %in% HOCOMOCO_mapping$TF.ENSEMBL) %>% nrow() |
|
1298 |
+ nFiltered2 = dplyr::filter(matrix1, rowSums == 0) %>% nrow() |
|
1264 | 1299 |
|
1265 | 1300 |
diff = nrow(matrix1) - nrow(matrix1.norm.TFs.df) |
1266 | 1301 |
if (diff > 0) { |
1267 |
- message = paste0(whitespacePrefix, "Retain ", nrow(matrix1.norm.TFs.df), " rows from TF/gene data out of ", nrow(matrix1), " (filter non-TF genes and TF genes with 0 counts throughout and keep only unique ENSEMBL IDs).") |
|
1302 |
+ message = paste0(whitespacePrefix, "Retain ", nrow(matrix1.norm.TFs.df), " unique genes from TF/gene data out of ", nrow(matrix1), |
|
1303 |
+ " (filter ", nFiltered1, " non-TF genes and ", nFiltered2, |
|
1304 |
+ " TF genes with 0 counts throughout).") |
|
1268 | 1305 |
futile.logger::flog.info(message) |
1269 | 1306 |
} |
1270 | 1307 |
|
... | ... |
@@ -1273,7 +1310,7 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) { |
1273 | 1310 |
.checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) |
1274 | 1311 |
} |
1275 | 1312 |
|
1276 |
- HOCOMOCO_mapping.exp = dplyr::filter(HOCOMOCO_mapping, .data$ENSEMBL %in% matrix1.norm.TFs.df$ENSEMBL) |
|
1313 |
+ HOCOMOCO_mapping.exp = dplyr::filter(HOCOMOCO_mapping, .data$TF.ENSEMBL %in% matrix1.norm.TFs.df$ENSEMBL) |
|
1277 | 1314 |
futile.logger::flog.info(paste0(whitespacePrefix, "Correlate TF/gene data for ", nrow(matrix1.norm.TFs.df), " unique Ensembl IDs (TFs) and peak counts for ", nrow(matrix_peaks), " peaks.")) |
1278 | 1315 |
futile.logger::flog.info(paste0(whitespacePrefix, "Note: For subsequent steps, the same gene may be associated with multiple TF, depending on the translation table.")) |
1279 | 1316 |
# Correlate TF gene counts with peak counts |
... | ... |
@@ -1301,10 +1338,10 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) { |
1301 | 1338 |
# Reorder to make sure the order is the same. Due to the duplication ID issue, the number of columns may increase after the column selection |
1302 | 1339 |
|
1303 | 1340 |
# Some columns may be removed here due to zero standard deviation |
1304 |
- HOCOMOCO_mapping.exp.filt = HOCOMOCO_mapping.exp %>% dplyr::filter(.data$ENSEMBL %in% colnames(sort.cor.m)) |
|
1341 |
+ HOCOMOCO_mapping.exp.filt = HOCOMOCO_mapping.exp %>% dplyr::filter(.data$TF.ENSEMBL %in% colnames(sort.cor.m)) |
|
1305 | 1342 |
|
1306 | 1343 |
sort.cor.m = sort.cor.m[,as.character(HOCOMOCO_mapping.exp.filt$ENSEMBL)] |
1307 |
- colnames(sort.cor.m) = as.character(HOCOMOCO_mapping.exp.filt$HOCOID) |
|
1344 |
+ colnames(sort.cor.m) = as.character(HOCOMOCO_mapping.exp.filt$TF.HOCOID) |
|
1308 | 1345 |
|
1309 | 1346 |
.printExecutionTime(start, prefix = whitespacePrefix) |
1310 | 1347 |
sort.cor.m |
... | ... |
@@ -2007,14 +2044,16 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
2007 | 2044 |
corMethod, |
2008 | 2045 |
whitespacePrefix = " ") |
2009 | 2046 |
|
2010 |
- # TODO:Check here for the TFs |
|
2047 |
+ # TODO:Check here for the TFs BMAL1.0.A |
|
2011 | 2048 |
allTF = intersect(colnames(peak_TF_overlap.df), colnames(peaksCor.m)) |
2049 |
+ checkmate::assertIntegerish(length(allTF), lower = 1) |
|
2050 |
+ |
|
2012 | 2051 |
futile.logger::flog.info(paste0(" Run FDR calculations for ", length(allTF), " TFs for which TFBS predictions and ", |
2013 | 2052 |
connectionTypeCur, " data for the corresponding gene are available.")) |
2014 | 2053 |
if (length(allTF) < ncol(peak_TF_overlap.df) | length(allTF) < ncol(peaksCor.m)) { |
2015 | 2054 |
|
2016 | 2055 |
TF_missing = setdiff(colnames(peak_TF_overlap.df), colnames(peaksCor.m)) |
2017 |
- if (length(TF_missing) > 0) futile.logger::flog.info(paste0(" Skip the following ", length(TF_missing), " TF due to missing data: ", paste0(TF_missing, collapse = ","))) |
|
2056 |
+ if (length(TF_missing) > 0) futile.logger::flog.info(paste0(" Skip the following ", length(TF_missing), " TF due to missing dataor because they are marked as filtered: ", paste0(TF_missing, collapse = ","))) |
|
2018 | 2057 |
} |
2019 | 2058 |
|
2020 | 2059 |
stopifnot(nrow(peaksCor.m) == nrow(peak_TF_overlap.df)) |
... | ... |
@@ -160,7 +160,9 @@ |
160 | 160 |
countsRNA.df = dplyr::select(countsRNA, tidyselect::all_of(c(idColumn_RNA, sharedColumns))) |
161 | 161 |
countsPeaks.df = dplyr::select(countsPeaks, tidyselect::all_of(c(idColumn_peaks, sharedColumns))) |
162 | 162 |
|
163 |
- futile.logger::flog.info(paste0(" ", length(sharedColumns), " samples (", paste0(sharedColumns, collapse = ","), ") are shared between the peaks and RNA-Seq data")) |
|
163 |
+ futile.logger::flog.info(paste0(" ", length(sharedColumns), " samples (", |
|
164 |
+ dplyr::if_else(length(sharedColumns) > 100, "names omitted here", paste0(sharedColumns, collapse = ",")), |
|
165 |
+ ") are shared between the peaks and RNA-Seq data")) |
|
164 | 166 |
|
165 | 167 |
notIntersecting_peaks = setdiff(colnames(countsPeaks),c(sharedColumns,idColumn_peaks)) |
166 | 168 |
notIntersecting_RNA = setdiff(colnames(countsRNA),c(sharedColumns,idColumn_RNA)) |
... | ... |
@@ -998,6 +998,12 @@ calculateCommunitiesEnrichment <- function(GRN, |
998 | 998 |
.checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) |
999 | 999 |
} |
1000 | 1000 |
|
1001 |
+ if (length(selCommunities) < length(communities)) { |
|
1002 |
+ missingCommunities = setdiff(communities, selCommunities) |
|
1003 |
+ message = paste0("Some of the requested communities (", paste0(missingCommunities , collapse = ","), ") were not found. Only the following communities are available: ", paste0(selCommunities, collapse = ",")) |
|
1004 |
+ .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE) |
|
1005 |
+ } |
|
1006 |
+ |
|
1001 | 1007 |
selCommunities |
1002 | 1008 |
|
1003 | 1009 |
} |
... | ... |
@@ -326,7 +326,9 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL, |
326 | 326 |
plot(g) |
327 | 327 |
|
328 | 328 |
}, error = function(e) { |
329 |
- futile.logger::flog.warn(paste0(" Could not plot PCA with variable ", varCur)) |
|
329 |
+ message = paste0(" Could not plot PCA with variable ", varCur) |
|
330 |
+ .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE) |
|
331 |
+ |
|
330 | 332 |
plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n') |
331 | 333 |
message = paste0(" Could not plot PCA with variable ", varCur) |
332 | 334 |
text(x = 0.5, y = 0.5, message, cex = 1.6, col = "red") |
... | ... |
@@ -493,7 +495,7 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL, |
493 | 495 |
#' @template basenameOutput |
494 | 496 |
#' @template plotDetails |
495 | 497 |
#' @param dataType Character vector. One of, or both of, \code{"real"} or \code{"permuted"}. For which data type, real or permuted data, to produce the diagnostic plots? |
496 |
-#' @param nTFMax \code{NULL} or Integer. Default \code{NULL}. Maximum number of TFs to process. Can be used for testing purposes by setting this to a small number i(.e., 10) |
|
498 |
+#' @param nTFMax \code{NULL} or Integer > 0. Default \code{NULL}. Maximum number of TFs to process. Can be used for testing purposes by setting this to a small number i(.e., 10) |
|
497 | 499 |
#' @template plotAsPDF |
498 | 500 |
#' @template pdf_width |
499 | 501 |
#' @param pdf_height_base Number. Default 8. Base height of the PDF, in cm, per connection type. The total height is automatically determined based on the number of connection types that are found in the object (e.g., expression or TF activity). For example, when two connection types are found, the base height is multiplied by 2. |
... | ... |
@@ -524,7 +526,7 @@ plotDiagnosticPlots_TFPeaks <- function(GRN, |
524 | 526 |
checkmate::assert(checkmate::checkNull(basenameOutput), checkmate::checkCharacter(basenameOutput, len = 1, min.chars = 1, any.missing = FALSE)) |
525 | 527 |
checkmate::assertFlag(plotDetails) |
526 | 528 |
checkmate::assertSubset(dataType, c("real", "permuted"), empty.ok = FALSE) |
527 |
- checkmate::assert(checkmate::checkNull(nTFMax), checkmate::checkIntegerish(nTFMax)) |
|
529 |
+ checkmate::assert(checkmate::checkNull(nTFMax), checkmate::checkIntegerish(nTFMax, lower = 1)) |
|
528 | 530 |
checkmate::assertFlag(plotAsPDF) |
529 | 531 |
checkmate::assertNumeric(pdf_width, lower = 5, upper = 99) |
530 | 532 |
checkmate::assertNumeric(pdf_height_base, lower = 5, upper = 99) |
... | ... |
@@ -917,6 +919,10 @@ plotDiagnosticPlots_peakGene <- function(GRN, |
917 | 919 |
checkmate::assert(checkmate::checkNull(outputFolder), checkmate::checkCharacter(outputFolder, min.chars = 1)) |
918 | 920 |
checkmate::assert(checkmate::checkNull(basenameOutput), checkmate::checkCharacter(basenameOutput, len = 1, min.chars = 1, any.missing = FALSE)) |
919 | 921 |
checkmate::assertList(gene.types, any.missing = FALSE, min.len = 1, types = "character") |
922 |
+ for (geneTypesCur in gene.types) { |
|
923 |
+ checkmate::assertSubset(geneTypesCur, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE) |
|
924 |
+ } |
|
925 |
+ |
|
920 | 926 |
checkmate::assertFlag(useFiltered) |
921 | 927 |
checkmate::assertFlag(plotDetails) |
922 | 928 |
checkmate::assertFlag(plotPerTF) |
... | ... |
@@ -1122,6 +1128,9 @@ plotDiagnosticPlots_peakGene <- function(GRN, |
1122 | 1128 |
filenameCur = paste0(fileBase, paste0(geneTypesSelected, collapse = "+"), filteredStr, ".pdf") |
1123 | 1129 |
.checkOutputFile(filenameCur) |
1124 | 1130 |
grDevices::pdf(file = filenameCur, width = pdf_width, height = pdf_height) |
1131 |
+ |
|
1132 |
+ futile.logger::flog.info(paste0(" Plotting to file ", filenameCur)) |
|
1133 |
+ |
|
1125 | 1134 |
} |
1126 | 1135 |
|
1127 | 1136 |
if (plotPerTF) { |
... | ... |
@@ -2342,8 +2351,8 @@ plotGeneralEnrichment <- function(GRN, outputFolder = NULL, basenameOutput = NUL |
2342 | 2351 |
#' @inheritParams plotCommunitiesEnrichment |
2343 | 2352 |
#' @param communities Numeric vector. Default \code{seq_len(10)}. Depending on what was specified in the \code{display} parameter, this parameter would indicate either the rank or the label of the communities to be plotted. i.e. for \code{communities = c(1,4)}, if \code{display = "byRank"} the results for the first and fourth largest communities will be plotted. if \code{display = "byLabel"}, the results for the communities labeled \code{"1"}, and \code{"4"} will be plotted. If set to \code{NULL}, all communities will be plotted |
2344 | 2353 |
#' @template forceRerun |
2345 |
-#' @param topnGenes Integer. Default 20. Number of genes to plot, sorted by their rank or label. |
|
2346 |
-#' @param topnTFs Integer. Default 20. Number of TFs to plot, sorted by their rank or label. |
|
2354 |
+#' @param topnGenes Integer > 0. Default 20. Number of genes to plot, sorted by their rank or label. |
|
2355 |
+#' @param topnTFs Integer > 0. Default 20. Number of TFs to plot, sorted by their rank or label. |
|
2347 | 2356 |
#' @return The same \code{\linkS4class{GRN}} object, without modifications. |
2348 | 2357 |
#' @seealso \code{\link{plotGeneralGraphStats}} |
2349 | 2358 |
#' @seealso \code{\link{calculateCommunitiesStats}} |
... | ... |
@@ -2354,7 +2363,7 @@ plotGeneralEnrichment <- function(GRN, outputFolder = NULL, basenameOutput = NUL |
2354 | 2363 |
#' GRN = plotCommunitiesStats(GRN, plotAsPDF = FALSE, pages = 1) |
2355 | 2364 |
#' @export |
2356 | 2365 |
plotCommunitiesStats <- function(GRN, outputFolder = NULL, basenameOutput = NULL, |
2357 |
- display = "byRank", communities = seq_len(10), |
|
2366 |
+ display = "byRank", communities = seq_len(5), |
|
2358 | 2367 |
topnGenes = 20, topnTFs = 20, |
2359 | 2368 |
plotAsPDF = TRUE, pdf_width = 12, pdf_height = 12, pages = NULL, |
2360 | 2369 |
forceRerun = FALSE){ |
... | ... |
@@ -2368,8 +2377,8 @@ plotCommunitiesStats <- function(GRN, outputFolder = NULL, basenameOutput = NULL |
2368 | 2377 |
checkmate::assert(checkmate::checkNull(outputFolder), checkmate::checkCharacter(outputFolder, min.chars = 1)) |
2369 | 2378 |
checkmate::assert(checkmate::checkNull(basenameOutput), checkmate::checkCharacter(basenameOutput, len = 1, min.chars = 1, any.missing = FALSE)) |
2370 | 2379 |
|
2371 |
- checkmate::assertIntegerish(topnGenes) |
|
2372 |
- checkmate::assertIntegerish(topnTFs) |
|
2380 |
+ checkmate::assertIntegerish(topnGenes, lower = 1) |
|
2381 |
+ checkmate::assertIntegerish(topnTFs, lower = 1) |
|
2373 | 2382 |
checkmate::assertChoice(display , c("byRank", "byLabel")) |
2374 | 2383 |
checkmate::assert(checkmate::checkNull(communities), checkmate::checkNumeric(communities, lower = 1, any.missing = FALSE, min.len = 1)) |
2375 | 2384 |
checkmate::assertFlag(plotAsPDF) |
... | ... |
@@ -2542,8 +2551,8 @@ plotCommunitiesStats <- function(GRN, outputFolder = NULL, basenameOutput = NULL |
2542 | 2551 |
#' @inheritParams plotGeneralEnrichment |
2543 | 2552 |
#' @param display Character. Default \code{"byRank"}. One of: \code{"byRank"}, \code{"byLabel"}. Specify whether the communities will be displayed based on their rank, where the largest community (with most vertices) would have a rank of 1, or by their label. Note that the label is independent of the rank. |
2544 | 2553 |
#' @param communities \code{NULL} or numeric vector. Default \code{NULL}. If set to \code{NULL}, the default, all communities enrichments that have been calculated before are plotted. If a numeric vector is specified: Depending on what was specified in the \code{display} parameter, this parameter indicates either the rank or the label of the communities to be plotted. i.e. for \code{communities = c(1,4)}, if \code{display = "byRank"} the results for the first and fourth largest communities are plotted. if \code{display = "byLabel"}, the results for the communities labeled \code{"1"}, and \code{"4"} are plotted. |
2545 |
-#' @param nSignificant Numeric >0. Default 3. Threshold to filter out an ontology term with less than \code{nSignificant} overlapping genes. |
|
2546 |
-#' @param nID Numeric >0. Default 10. For the reduced summary heatmap, number of top terms to select per community / for the general enrichment. |
|
2554 |
+#' @param nSignificant Numeric > 0. Default 3. Threshold to filter out an ontology term with less than \code{nSignificant} overlapping genes. |
|
2555 |
+#' @param nID Numeric > 0. Default 10. For the reduced summary heatmap, number of top terms to select per community / for the general enrichment. |
|
2547 | 2556 |
#' @template maxWidth_nchar_plot |
2548 | 2557 |
#' @return The same \code{\linkS4class{GRN}} object, without modifications. |
2549 | 2558 |
#' @seealso \code{\link{plotGeneralEnrichment}} |
... | ... |
@@ -2921,8 +2930,8 @@ plotTFEnrichment <- function(GRN, rankType = "degree", n = NULL, TF.names = NULL |
2921 | 2930 |
checkmate::assert(checkmate::checkNull(n), checkmate::checkNumeric(n)) |
2922 | 2931 |
checkmate::assertNumeric(p, lower = 0, upper = 1) |
2923 | 2932 |
checkmate::assertNumeric(topn_pvalue, lower = 1) |
2924 |
- checkmate::assertNumeric(nSignificant, lower = 0) |
|
2925 |
- checkmate::assertNumeric(nID, lower = 0) |
|
2933 |
+ checkmate::assertNumeric(nSignificant, lower = 1) |
|
2934 |
+ checkmate::assertNumeric(nID, lower = 1) |
|
2926 | 2935 |
checkmate::assertFlag(plotAsPDF) |
2927 | 2936 |
checkmate::assertNumeric(pdf_width, lower = 5, upper = 99) |
2928 | 2937 |
checkmate::assertNumeric(pdf_height, lower = 5, upper = 99) |
... | ... |
@@ -2938,13 +2947,16 @@ plotTFEnrichment <- function(GRN, rankType = "degree", n = NULL, TF.names = NULL |
2938 | 2947 |
outputFolder = .checkOutputFolder(GRN, outputFolder) |
2939 | 2948 |
|
2940 | 2949 |
|
2941 |
- if (rankType == "custom"){ |
|
2950 |
+ if (rankType == "custom") { |
|
2942 | 2951 |
if(is.null(TF.names)){ |
2943 |
- futile.logger::flog.error("To plot the GO enrichment for a custom set of TFs, you must provide the TF names in the 'TF.names' parameter.") |
|
2952 |
+ message = "To plot the GO enrichment for a custom set of TFs, you must provide the TF names in the 'TF.names' parameter." |
|
2953 |
+ .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) |
|
2944 | 2954 |
} |
2945 | 2955 |
wrongTFs = setdiff(TF.names, unique(GRN@connections$all.filtered$`0`$TF.name)) |
2946 | 2956 |
if (length(wrongTFs)>0){ |
2947 |
- futile.logger::flog.warn(paste0("The TFs ", paste0(wrongTFs, collapse = " + "), " are not in the filtered GRN. They will be ommited from the results.")) |
|
2957 |
+ message = paste0("The TFs ", paste0(wrongTFs, collapse = " + "), " are not in the filtered GRN. They will be ommited from the results.") |
|
2958 |
+ .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE) |
|
2959 |
+ |
|
2948 | 2960 |
} |
2949 | 2961 |
TFset = setdiff(TF.names, wrongTFs) |
2950 | 2962 |
} else{ #rankType = "degree" |
... | ... |
@@ -3467,7 +3479,8 @@ visualizeGRN <- function(GRN, outputFolder = NULL, basenameOutput = NULL, plotA |
3467 | 3479 |
|
3468 | 3480 |
if (nrow(grn.merged) == 0) { |
3469 | 3481 |
|
3470 |
- futile.logger::flog.warn(paste0("No rows left in the GRN. Creating empty plot.")) |
|
3482 |
+ message = paste0("No rows left in the GRN. Creating empty plot.") |
|
3483 |
+ .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE) |
|
3471 | 3484 |
|
3472 | 3485 |
plot(c(0, 1), c(0, 1), ann = FALSE, bty = 'n', type = 'n', xaxt = 'n', yaxt = 'n', main = title) |
3473 | 3486 |
message = paste0(title, "\n\nThe GRN has no edges that pass the filter criteria.") |
... | ... |
@@ -43,9 +43,9 @@ As they are listed under \code{Suggests}, they may not yet be installed, and the |
43 | 43 |
|
44 | 44 |
\item{display}{Character. Default \code{"byRank"}. One of: \code{"byRank"}, \code{"byLabel"}. Specify whether the communities will be displayed based on their rank, where the largest community (with most vertices) would have a rank of 1, or by their label. Note that the label is independent of the rank.} |
45 | 45 |
|
46 |
-\item{topnGenes}{Integer. Default 20. Number of genes to plot, sorted by their rank or label.} |
|
46 |
+\item{topnGenes}{Integer > 0. Default 20. Number of genes to plot, sorted by their rank or label.} |
|
47 | 47 |
|
48 |
-\item{topnTFs}{Integer. Default 20. Number of TFs to plot, sorted by their rank or label.} |
|
48 |
+\item{topnTFs}{Integer > 0. Default 20. Number of TFs to plot, sorted by their rank or label.} |
|
49 | 49 |
|
50 | 50 |
\item{maxWidth_nchar_plot}{Integer (>=10). Default 50. Maximum number of characters for a term before it is truncated.} |
51 | 51 |
|
... | ... |
@@ -9,7 +9,7 @@ plotCommunitiesStats( |
9 | 9 |
outputFolder = NULL, |
10 | 10 |
basenameOutput = NULL, |
11 | 11 |
display = "byRank", |
12 |
- communities = seq_len(10), |
|
12 |
+ communities = seq_len(5), |
|
13 | 13 |
topnGenes = 20, |
14 | 14 |
topnTFs = 20, |
15 | 15 |
plotAsPDF = TRUE, |
... | ... |
@@ -30,9 +30,9 @@ plotCommunitiesStats( |
30 | 30 |
|
31 | 31 |
\item{communities}{Numeric vector. Default \code{seq_len(10)}. Depending on what was specified in the \code{display} parameter, this parameter would indicate either the rank or the label of the communities to be plotted. i.e. for \code{communities = c(1,4)}, if \code{display = "byRank"} the results for the first and fourth largest communities will be plotted. if \code{display = "byLabel"}, the results for the communities labeled \code{"1"}, and \code{"4"} will be plotted. If set to \code{NULL}, all communities will be plotted} |
32 | 32 |
|
33 |
-\item{topnGenes}{Integer. Default 20. Number of genes to plot, sorted by their rank or label.} |
|
33 |
+\item{topnGenes}{Integer > 0. Default 20. Number of genes to plot, sorted by their rank or label.} |
|
34 | 34 |
|
35 |
-\item{topnTFs}{Integer. Default 20. Number of TFs to plot, sorted by their rank or label.} |
|
35 |
+\item{topnTFs}{Integer > 0. Default 20. Number of TFs to plot, sorted by their rank or label.} |
|
36 | 36 |
|
37 | 37 |
\item{plotAsPDF}{\code{TRUE} or \code{FALSE}. Default \code{TRUE}.Should the plots be printed to a PDF file? If set to \code{TRUE}, a PDF file is generated, the name of which depends on the value of \code{basenameOutput}. If set to \code{FALSE}, all plots are printed to the currently active device. Note that most functions print more than one plot, which means you may only see the last plot depending on your active graphics device.} |
38 | 38 |
|