... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
Package: GRaNIE |
2 | 2 |
Title: GRaNIE: Reconstruction cell type specific gene regulatory networks including enhancers using chromatin accessibility and RNA-seq data |
3 |
-Version: 1.1.15 |
|
3 |
+Version: 1.1.16 |
|
4 | 4 |
Encoding: UTF-8 |
5 | 5 |
Authors@R: c(person("Christian", "Arnold", email = |
6 | 6 |
"chrarnold@web.de", role = c("cre","aut")), |
... | ... |
@@ -119,9 +119,9 @@ setMethod("show", |
119 | 119 |
if (!is.null(GRN@data$peaks$consensusPeaks)) { |
120 | 120 |
nPeaks_filt = nPeaks(GRN, filter = TRUE) |
121 | 121 |
nPeaks_all = nPeaks(GRN, filter = FALSE) |
122 |
- cat(" Number of peaks (filtered, all): ", nPeaks_filt, ", ", nPeaks_all, "\n", sep = "") |
|
122 |
+ cat(" # peaks (filtered, all): ", nPeaks_filt, ", ", nPeaks_all, "\n", sep = "") |
|
123 | 123 |
} else { |
124 |
- cat (" Number of peaks: No peak data found.\n") |
|
124 |
+ cat (" # peaks: No peak data found.\n") |
|
125 | 125 |
} |
126 | 126 |
|
127 | 127 |
if (!is.null(GRN@data$RNA$counts_orig)) { |
... | ... |
@@ -135,16 +135,30 @@ setMethod("show", |
135 | 135 |
|
136 | 136 |
} |
137 | 137 |
|
138 |
- cat(" Number of genes (filtered, all): ", nGenes_filt, ", ", nGenes_all, "\n", sep = "") |
|
138 |
+ cat(" # genes (filtered, all): ", nGenes_filt, ", ", nGenes_all, "\n", sep = "") |
|
139 | 139 |
} else { |
140 |
- cat (" Number of genes: No RNA-seq data found.\n") |
|
140 |
+ cat (" # genes: No RNA-seq data found.\n") |
|
141 | 141 |
} |
142 | 142 |
|
143 |
+ if (!is.null(GRN@data$RNA$counts_orig) & !is.null(GRN@data$peaks$consensusPeaks)) { |
|
144 |
+ |
|
145 |
+ cat(" # shared samples: ", nrow(GRN@data$metadata), "\n", sep = "") |
|
146 |
+ } |
|
147 |
+ |
|
148 |
+ if (!is.null(GRN@annotation$TFs)) { |
|
149 |
+ |
|
150 |
+ cat(" # TFs (with expression data): ", nrow(GRN@annotation$TFs), "\n", sep = "") |
|
151 |
+ } |
|
152 |
+ |
|
153 |
+ |
|
143 | 154 |
cat("Parameters:\n") |
144 | 155 |
if (!is.null(GRN@config$metadata$genomeAsembly)) { |
145 | 156 |
cat(" Genome assembly: ", GRN@config$parameters$genomeAssembly, "\n") |
146 | 157 |
} |
147 | 158 |
|
159 |
+ cat(" Output directory: ", GRN@config$directories$outputRoot, "\n") |
|
160 |
+ |
|
161 |
+ |
|
148 | 162 |
cat("Provided metadata:\n") |
149 | 163 |
|
150 | 164 |
for (nameCur in names(GRN@config$metadata)) { |
... | ... |
@@ -203,7 +217,7 @@ setMethod("show", |
203 | 217 |
df = igraph::vertex.attributes(GRN@graph[["TF_gene"]]$graph) |
204 | 218 |
if (!is.null(df) & "community" %in% names(df)) { |
205 | 219 |
communities = df %>% as.data.frame() %>% dplyr::count(.data$community) %>% dplyr::arrange(dplyr::desc(.data$n)) |
206 |
- cat(" Communities, sorted by size (n = Number of nodes): ", paste0(communities$community, " (n=", communities$n, collapse = "), "), ")\n", sep = "") |
|
220 |
+ cat(" Communities, sorted by size (n = # nodes): ", paste0(communities$community, " (n=", communities$n, collapse = "), "), ")\n", sep = "") |
|
207 | 221 |
} else { |
208 | 222 |
cat(" None found\n") |
209 | 223 |
} |
... | ... |
@@ -323,7 +323,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
323 | 323 |
rownames(counts_rna) = counts_rna$ENSEMBL |
324 | 324 |
|
325 | 325 |
|
326 |
- |
|
326 |
+ futile.logger::flog.info("Storing count matrices...") |
|
327 | 327 |
# Store the raw peaks data efficiently as DESeq object only if it contains only integers, otherwise store as normal matrix |
328 | 328 |
|
329 | 329 |
if (isIntegerMatrix(counts_peaks[, GRN@config$sharedSamples])) { |
... | ... |
@@ -331,8 +331,21 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
331 | 331 |
colData = dplyr::filter(GRN@data$metadata, .data$has_both) %>% tibble::column_to_rownames("sampleID"), |
332 | 332 |
design = ~1) |
333 | 333 |
} else { |
334 |
+ |
|
335 |
+ # Store as sparse matrix if enough 0s |
|
336 |
+ rowSample = min(100, nrow(counts_peaks)) |
|
337 |
+ zeroEntries = length(which(counts_peaks[1:rowSample, GRN@config$sharedSamples] %>% unlist(use.names = FALSE) == 0)) |
|
338 |
+ fractionZero = zeroEntries / (rowSample * length(GRN@config$sharedSamples)) |
|
339 |
+ if (fractionZero> 0.1) { |
|
340 |
+ futile.logger::flog.info(paste0("Storing GRN@data$peaks$counts_orig matrix as sparse matrix because fraction of 0s is > 0.1 (", fractionZero, ")")) |
|
341 |
+ GRN@data$peaks$counts_orig = .asSparseMatrix(as.matrix(counts_peaks %>% dplyr::select(-.data$peakID)), |
|
342 |
+ convertNA_to_zero = FALSE, |
|
343 |
+ dimnames = list(counts_peaks$peakID, colnames(GRN@config$sharedSamples))) |
|
344 |
+ } else { |
|
345 |
+ GRN@data$peaks$counts_orig = as.matrix(counts_peaks[, GRN@config$sharedSamples]) |
|
346 |
+ } |
|
334 | 347 |
|
335 |
- GRN@data$peaks$counts_orig = as.matrix(counts_peaks[, GRN@config$sharedSamples]) |
|
348 |
+ |
|
336 | 349 |
} |
337 | 350 |
|
338 | 351 |
if (isIntegerMatrix(counts_rna[, GRN@config$sharedSamples])) { |
... | ... |
@@ -340,17 +353,30 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
340 | 353 |
colData = dplyr::filter(GRN@data$metadata, .data$has_both) %>% tibble::column_to_rownames("sampleID"), |
341 | 354 |
design = ~1) |
342 | 355 |
} else { |
343 |
- GRN@data$RNA$counts_orig = as.matrix(counts_rna[, GRN@config$sharedSamples]) |
|
356 |
+ |
|
357 |
+ |
|
358 |
+ # Store as sparse matrix if enough 0s |
|
359 |
+ rowSample = min(100, nrow(counts_rna)) |
|
360 |
+ zeroEntries = length(which(counts_rna[1:rowSample, GRN@config$sharedSamples] %>% unlist(use.names = FALSE) == 0)) |
|
361 |
+ fractionZero = zeroEntries / (rowSample * length(GRN@config$sharedSamples)) |
|
362 |
+ if (fractionZero> 0.1) { |
|
363 |
+ futile.logger::flog.info(paste0("Storing GRN@data$RNA$counts_orig matrix as sparse matrix because fraction of 0s is > 0.1 (", fractionZero, ")")) |
|
364 |
+ GRN@data$RNA$counts_orig = .asSparseMatrix(as.matrix(counts_rna %>% dplyr::select(-.data$ENSEMBL)), |
|
365 |
+ convertNA_to_zero = FALSE, |
|
366 |
+ dimnames = list(counts_rna$ENSEMBL, colnames(GRN@config$sharedSamples))) |
|
367 |
+ } else { |
|
368 |
+ GRN@data$RNA$counts_orig = as.matrix(counts_rna[, GRN@config$sharedSamples]) |
|
369 |
+ } |
|
344 | 370 |
} |
345 | 371 |
|
346 |
- |
|
372 |
+ futile.logger::flog.info(paste0("Creating consensus peaks and check for overlapping peaks...")) |
|
347 | 373 |
|
348 | 374 |
# Consensus peaks |
349 | 375 |
GRN@data$peaks$consensusPeaks = .createConsensusPeaksDF(getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE)) |
350 | 376 |
GRN@data$peaks$consensusPeaks = GRN@data$peaks$consensusPeaks[match(getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE)$peakID, GRN@data$peaks$consensusPeaks$peakID), ] |
351 | 377 |
stopifnot(c("chr", "start", "end", "peakID", "isFiltered") %in% colnames(GRN@data$peaks$consensusPeaks)) |
352 | 378 |
|
353 |
- futile.logger::flog.info(paste0("Check for overlapping peaks...")) |
|
379 |
+ |
|
354 | 380 |
|
355 | 381 |
# Assume 0-based exclusive format, see https://arnaudceol.wordpress.com/2014/09/18/chromosome-coordinate-systems-0-based-1-based/ and http://genome.ucsc.edu/FAQ/FAQformat.html#format1 for details |
356 | 382 |
consensus.gr = .constructGRanges(GRN@data$peaks$consensusPeaks, seqlengths = .getChrLengths(GRN@config$parameters$genomeAssembly), GRN@config$parameters$genomeAssembly, zeroBased = TRUE) |
... | ... |
@@ -381,6 +407,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
381 | 407 |
} |
382 | 408 |
|
383 | 409 |
} |
410 |
+ futile.logger::flog.info(paste0("Adding peak and gene annotation...")) |
|
384 | 411 |
|
385 | 412 |
# Add peak annotation once |
386 | 413 |
GRN = .populatePeakAnnotation(GRN) |
... | ... |
@@ -632,7 +659,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
632 | 659 |
futile.logger::flog.info(paste0("Calculate GC-content for peaks. This may take a while")) |
633 | 660 |
start = Sys.time() |
634 | 661 |
genomeAssembly = GRN@config$parameters$genomeAssembly |
635 |
- #TODO: GC content for all peaks |
|
662 |
+ |
|
636 | 663 |
genome = .getGenomeObject(genomeAssembly, type = "BSgenome") |
637 | 664 |
|
638 | 665 |
# Get peaks as GRanges object |
... | ... |
@@ -1351,7 +1378,7 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) { |
1351 | 1378 |
|
1352 | 1379 |
peak_TF_overlapCur.df = .asMatrixFromSparse(GRN@data$TFs$TF_peak_overlap, convertZero_to_NA = FALSE) %>% |
1353 | 1380 |
tibble::as_tibble() %>% |
1354 |
- dplyr::filter(!.data$isFiltered) %>% |
|
1381 |
+ dplyr::filter(!.data$isFiltered) %>% # Works because 1 / 0 is interpreted here as logical and not 1/0 |
|
1355 | 1382 |
dplyr::select(-.data$isFiltered) |
1356 | 1383 |
|
1357 | 1384 |
if (perm > 0 & shuffle) { |
... | ... |
@@ -1401,6 +1428,7 @@ addData_TFActivity <- function(GRN, normalization = "cyclicLoess", name = "TF_ac |
1401 | 1428 |
counts.df = getCounts(GRN, type = "peaks", norm = FALSE, permuted = FALSE) %>% |
1402 | 1429 |
tibble::as_tibble() |
1403 | 1430 |
|
1431 |
+ # TODO replace by getter |
|
1404 | 1432 |
countsPeaks = .normalizeNewDataWrapper(GRN@data$peaks$counts_orig, normalization = normalization) |
1405 | 1433 |
|
1406 | 1434 |
stopifnot(identical(nrow(countsPeaks), nrow(GRN@data$TFs$TF_peak_overlap))) |
... | ... |
@@ -2044,7 +2072,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
2044 | 2072 |
corMethod, |
2045 | 2073 |
whitespacePrefix = " ") |
2046 | 2074 |
|
2047 |
- # TODO:Check here for the TFs BMAL1.0.A |
|
2075 |
+ |
|
2048 | 2076 |
allTF = intersect(colnames(peak_TF_overlap.df), colnames(peaksCor.m)) |
2049 | 2077 |
checkmate::assertIntegerish(length(allTF), lower = 1) |
2050 | 2078 |
|
... | ... |
@@ -2053,7 +2081,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
2053 | 2081 |
if (length(allTF) < ncol(peak_TF_overlap.df) | length(allTF) < ncol(peaksCor.m)) { |
2054 | 2082 |
|
2055 | 2083 |
TF_missing = setdiff(colnames(peak_TF_overlap.df), colnames(peaksCor.m)) |
2056 |
- if (length(TF_missing) > 0) futile.logger::flog.info(paste0(" Skip the following ", length(TF_missing), " TF due to missing dataor because they are marked as filtered: ", paste0(TF_missing, collapse = ","))) |
|
2084 |
+ if (length(TF_missing) > 0) futile.logger::flog.info(paste0(" Skip the following ", length(TF_missing), " TF due to missing data or because they are marked as filtered: ", paste0(TF_missing, collapse = ","))) |
|
2057 | 2085 |
} |
2058 | 2086 |
|
2059 | 2087 |
stopifnot(nrow(peaksCor.m) == nrow(peak_TF_overlap.df)) |
... | ... |
@@ -3755,8 +3783,8 @@ add_TF_gene_correlation <- function(GRN, corMethod = "pearson", addRobustRegress |
3755 | 3783 |
} |
3756 | 3784 |
|
3757 | 3785 |
} else { |
3758 |
- message =paste0(" Nothing to do, skip.") |
|
3759 |
- .checkAndLogWarningsAndErrors(NULL, message, isWarning = TRUE) |
|
3786 |
+ futile.logger::flog.info(paste0(" Nothing to do, skip.")) |
|
3787 |
+ |
|
3760 | 3788 |
|
3761 | 3789 |
if (addRobustRegression) { |
3762 | 3790 |
res.df = tibble::tribble(~TF.name, ~TF.ENSEMBL, ~gene.ENSEMBL, ~TF_gene.r, ~TF_gene.p_raw, ~TF_gene.p_raw.robust, |
... | ... |
@@ -4277,7 +4305,9 @@ loadExampleObject <- function(forceDownload = FALSE, fileURL = "https://www.embl |
4277 | 4305 |
#' |
4278 | 4306 |
#' @template GRN |
4279 | 4307 |
#' @param type Character. Either \code{peaks} or \code{rna}. \code{peaks} corresponds to the counts for the open chromatin data, while \code{rna} refers to th RNA-seq counts. If set to \code{rna}, both permuted and non-permuted data can be retrieved, while for \code{peaks}, only the non-permuted one (i.e., 0) can be retrieved. |
4280 |
-#' @param norm Logical. \code{TRUE} or \code{FALSE}. Should original (often raw, but this may not necessarily be the case) or normalized counts be returned? |
|
4308 |
+#' @param norm Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should original (often raw, but this may not necessarily be the case) or normalized counts be returned? |
|
4309 |
+#' @param asMatrix Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. If set to \code{FALSE}, counts are returned as a data frame with or without an ID column (see \code{includeIDColumn}). If set to \code{TRUE}, counts are returned as a matrix with the ID column as row names. |
|
4310 |
+#' @param includeIDColumn Logical. \code{TRUE} or \code{FALSE}. Default \code{TRUE}. Only relevant if \code{asMatrix = FALSE}. If set to \code{TRUE}, an explicit ID column is returned (no row names). If set to \code{FALSE}, the IDs are in the row names instead. |
|
4281 | 4311 |
#' @template permuted |
4282 | 4312 |
#' @export |
4283 | 4313 |
#' @import tibble |
... | ... |
@@ -4287,7 +4317,8 @@ loadExampleObject <- function(forceDownload = FALSE, fileURL = "https://www.embl |
4287 | 4317 |
#' counts.df = getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE) |
4288 | 4318 |
#' @return Data frame of counts, with the type as indicated by the function parameters. This function does **NOT** return a \code{\linkS4class{GRN}} object. |
4289 | 4319 |
|
4290 |
-getCounts <- function(GRN, type, norm, permuted = FALSE) { |
|
4320 |
+# TODO document and implement two new arguments here |
|
4321 |
+getCounts <- function(GRN, type, norm = FALSE, permuted = FALSE, asMatrix = FALSE, includeIDColumn = TRUE) { |
|
4291 | 4322 |
|
4292 | 4323 |
checkmate::assertClass(GRN, "GRN") |
4293 | 4324 |
GRN = .addFunctionLogToObject(GRN) |
... | ... |
@@ -4307,36 +4338,50 @@ getCounts <- function(GRN, type, norm, permuted = FALSE) { |
4307 | 4338 |
result = GRN@data$peaks$counts_norm |
4308 | 4339 |
|
4309 | 4340 |
} else { |
4310 |
- # Handle case when this is null due to already norm. counts as input |
|
4311 |
- countsOrig = GRN@data$peaks$counts_orig |
|
4312 |
- if (checkmate::testClass(countsOrig, "DESeqDataSet")) { |
|
4313 |
- result = DESeq2::counts(countsOrig, norm = FALSE) %>% as.data.frame() %>% tibble::rownames_to_column("peakID") |
|
4314 |
- } else { |
|
4315 |
- result = countsOrig %>% dplyr::select(-.data$isFiltered) %>% as.data.frame() |
|
4341 |
+ # Handle case when this is null due to already norm. counts as input |
|
4342 |
+ |
|
4343 |
+ # TODO: Unify the annotation column |
|
4344 |
+ classPeaks = class(GRN@data$peaks$counts_orig) |
|
4345 |
+ if ("DESeqDataSet" %in% classPeaks) { |
|
4346 |
+ result = DESeq2::counts(GRN@data$peaks$counts_orig, norm = FALSE) %>% as.data.frame() %>% tibble::rownames_to_column("peakID") |
|
4347 |
+ } else if ("matrix" %in% classPeaks) { |
|
4348 |
+ result = GRN@data$peaks$counts_orig %>% dplyr::select(-.data$isFiltered) %>% as.data.frame() |
|
4349 |
+ } else if ("dgCMatrix" %in% classPeaks) { |
|
4350 |
+ result = .asMatrixFromSparse(GRN@data$peaks$counts_orig, convertZero_to_NA = FALSE) %>% dplyr::select(-.data$isFiltered) %>% as.data.frame() |
|
4351 |
+ } else { |
|
4352 |
+ message = paste0("Unsupported class for GRN@data$peaks$counts_orig. Contact the authors.") |
|
4353 |
+ .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) |
|
4354 |
+ } |
|
4355 |
+ |
|
4316 | 4356 |
} |
4317 |
- |
|
4318 |
- } |
|
4319 |
- |
|
4357 |
+ |
|
4320 | 4358 |
|
4321 | 4359 |
} else if (type == "rna") { |
4322 | 4360 |
|
4323 | 4361 |
if (norm) { |
4324 |
- result = GRN@data$RNA$counts_norm.l[[permIndex]] |
|
4362 |
+ result = GRN@data$RNA$counts_norm.l[[permIndex]] |
|
4325 | 4363 |
|
4326 | 4364 |
} else { |
4327 |
- |
|
4328 |
- # Handle case when this is null due to already norm. counts as input |
|
4329 |
- countsOrig = GRN@data$RNA$counts_orig |
|
4330 |
- if (checkmate::testClass(countsOrig, "DESeqDataSet")) { |
|
4331 |
- result = DESeq2::counts(countsOrig, norm = FALSE) %>% as.data.frame() %>% tibble::rownames_to_column("ENSEMBL") |
|
4332 |
- } else { |
|
4333 |
- result = countsOrig %>% dplyr::select(-.data$isFiltered) %>% as.data.frame() |
|
4334 |
- # TODO: isFiltered column? |
|
4335 |
- } |
|
4365 |
+ # TODO: Unify the annotation column |
|
4366 |
+ classRNA = class(GRN@data$RNA$counts_orig) |
|
4367 |
+ if ("DESeqDataSet" %in% classRNA) { |
|
4368 |
+ result = DESeq2::counts(GRN@data$RNA$counts_orig, norm = FALSE) %>% |
|
4369 |
+ as.data.frame() %>% tibble::rownames_to_column("ENSEMBL") |
|
4370 |
+ } else if ("matrix" %in% classRNA) { |
|
4371 |
+ result = GRN@data$RNA$counts_orig %>% |
|
4372 |
+ dplyr::select(-.data$isFiltered) %>% as.data.frame() |
|
4373 |
+ } else if ("dgCMatrix" %in% classRNA) { |
|
4374 |
+ result = .asMatrixFromSparse(GRN@data$RNA$counts_orig, convertZero_to_NA = FALSE) %>% |
|
4375 |
+ dplyr::select(-.data$isFiltered) %>% as.data.frame() %>% tibble::rownames_to_column("ENSEMBL") |
|
4376 |
+ } else { |
|
4377 |
+ message = paste0("Unsupported class for GRN@data$RNA$counts_orig. Contact the authors.") |
|
4378 |
+ .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) |
|
4379 |
+ } |
|
4336 | 4380 |
|
4337 | 4381 |
} |
4338 |
- |
|
4382 |
+ |
|
4339 | 4383 |
} |
4384 |
+ |
|
4340 | 4385 |
} else { # permutation 1 |
4341 | 4386 |
|
4342 | 4387 |
if (type == "peaks") { |
... | ... |
@@ -4540,7 +4585,7 @@ getGRNConnections <- function(GRN, type = "all.filtered", permuted = FALSE, |
4540 | 4585 |
} |
4541 | 4586 |
} else { |
4542 | 4587 |
|
4543 |
- # TODO: Re-create the output folder here nd adjust to the OS-specific path separator, do not rely on what is stored in the object |
|
4588 |
+ # Re-create the output folder here and adjust to the OS-specific path separator, do not rely on what is stored in the object |
|
4544 | 4589 |
if (.Platform$OS.type == "windows") { |
4545 | 4590 |
GRN@config$directories$output_plots = gsub('/', ('\\'), GRN@config$directories$output_plots, fixed = TRUE) |
4546 | 4591 |
} else { |
... | ... |
@@ -627,14 +627,21 @@ |
627 | 627 |
|
628 | 628 |
|
629 | 629 |
isIntegerMatrix <- function(df) { |
630 |
- |
|
631 |
- res = all.equal(unlist(df), as.integer(unlist(df)), check.attributes = FALSE) |
|
632 |
- |
|
633 |
- if (is.logical(res)) { |
|
634 |
- return(TRUE) |
|
630 |
+ |
|
631 |
+ # Quick test first: test only first row. |
|
632 |
+ res = all.equal(unlist(df[1,]), as.integer(unlist(df[1,])), check.attributes = FALSE) |
|
633 |
+ if (!is.logical(res)) { |
|
634 |
+ return(FALSE) |
|
635 | 635 |
} else { |
636 |
- return(FALSE) |
|
636 |
+ res = all.equal(unlist(df), as.integer(unlist(df)), check.attributes = FALSE) |
|
637 |
+ if (is.logical(res)) { |
|
638 |
+ return(TRUE) |
|
639 |
+ } else { |
|
640 |
+ return(FALSE) |
|
641 |
+ } |
|
637 | 642 |
} |
643 |
+ |
|
644 |
+ |
|
638 | 645 |
} |
639 | 646 |
|
640 | 647 |
.findSuitableColumnsToPlot <- function(metadataTable, remove1LevelFactors = TRUE, verbose = TRUE) { |
... | ... |
@@ -10,6 +10,7 @@ |
10 | 10 |
#' @param directed \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should the network be directed? |
11 | 11 |
#' @template forceRerun |
12 | 12 |
#' @export |
13 |
+#' @seealso \code{\link{filterGRNAndConnectGenes}} |
|
13 | 14 |
#' @examples |
14 | 15 |
#' # See the Workflow vignette on the GRaNIE website for examples |
15 | 16 |
#' GRN = loadExampleObject() |
... | ... |
@@ -3340,7 +3340,7 @@ plotTFEnrichment <- function(GRN, rankType = "degree", n = NULL, TF.names = NULL |
3340 | 3340 |
|
3341 | 3341 |
#' Visualize a filtered eGRN in a flexible manner. |
3342 | 3342 |
#' |
3343 |
-#' This function can visualize a filtered eGRN in a very flexible manner and requires a filtered set of connections in the \code{\linkS4class{GRN}} object as generated by \code{\link{filterGRNAndConnectGenes}}. |
|
3343 |
+#' This function can visualize a filtered eGRN in a very flexible manner and requires a \code{\linkS4class{GRN}} object as generated by \code{\link{build_eGRN_graph}}. |
|
3344 | 3344 |
#' |
3345 | 3345 |
#' @template GRN |
3346 | 3346 |
#' @template outputFolder |
... | ... |
@@ -3360,7 +3360,7 @@ plotTFEnrichment <- function(GRN, rankType = "degree", n = NULL, TF.names = NULL |
3360 | 3360 |
#' @param vertexLabel_cex Numeric. Default \code{0.4}. Font size (multiplication factor, device-dependent) |
3361 | 3361 |
#' @param vertexLabel_dist Numeric. Default \code{0} vertex. Distance between the label and the vertex. |
3362 | 3362 |
#' @template forceRerun |
3363 |
-#' @seealso \code{\link{filterGRNAndConnectGenes}} |
|
3363 |
+#' @seealso \code{\link{build_eGRN_graph}} |
|
3364 | 3364 |
#' @examples |
3365 | 3365 |
#' GRN = loadExampleObject() |
3366 | 3366 |
#' GRN = visualizeGRN(GRN, maxRowsToPlot = 700, graph = "TF-gene", colorby = "type") |
... | ... |
@@ -3368,7 +3368,8 @@ plotTFEnrichment <- function(GRN, rankType = "degree", n = NULL, TF.names = NULL |
3368 | 3368 |
#' @export |
3369 | 3369 |
visualizeGRN <- function(GRN, outputFolder = NULL, basenameOutput = NULL, plotAsPDF = TRUE, pdf_width = 12, pdf_height = 12, |
3370 | 3370 |
title = NULL, maxRowsToPlot = 500, nCommunitiesMax = 8, graph = "TF-gene" , colorby = "type", layout = "fr", |
3371 |
- vertice_color_TFs = list(h = 10, c = 85, l = c(25, 95)), vertice_color_peaks = list(h = 135, c = 45, l = c(35, 95)), vertice_color_genes = list(h = 260, c = 80, l = c(30, 90)), |
|
3371 |
+ vertice_color_TFs = list(h = 10, c = 85, l = c(25, 95)), vertice_color_peaks = list(h = 135, c = 45, l = c(35, 95)), |
|
3372 |
+ vertice_color_genes = list(h = 260, c = 80, l = c(30, 90)), |
|
3372 | 3373 |
vertexLabel_cex = 0.4, vertexLabel_dist = 0, forceRerun = FALSE |
3373 | 3374 |
) { |
3374 | 3375 |
|
... | ... |
@@ -3404,6 +3405,11 @@ visualizeGRN <- function(GRN, outputFolder = NULL, basenameOutput = NULL, plotA |
3404 | 3405 |
|
3405 | 3406 |
outputFolder = .checkOutputFolder(GRN, outputFolder) |
3406 | 3407 |
|
3408 |
+ if (is.null(GRN@graph$TF_gene) | is.null(GRN@graph$TF_peak_gene)) { |
|
3409 |
+ message = paste0("Could not find information in the graph slot. Make sure you run the function build_eGRN_graph") |
|
3410 |
+ .checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) |
|
3411 |
+ } |
|
3412 |
+ |
|
3407 | 3413 |
metadata_visualization.l = getBasic_metadata_visualization(GRN) |
3408 | 3414 |
# if (useDefaultMetadata) { |
3409 | 3415 |
# metadata_visualization.l = getBasic_metadata_visualization(GRN) |
... | ... |
@@ -3522,7 +3528,7 @@ visualizeGRN <- function(GRN, outputFolder = NULL, basenameOutput = NULL, plotA |
3522 | 3528 |
colors_categories.l[["TF"]] = c(color_gradient[1], color_gradient[nBins_real]) |
3523 | 3529 |
colors_categories.l[["TF"]] = color_gradient |
3524 | 3530 |
symbols_categories.l[["TF"]] = c(15,NA,15) |
3525 |
- vertice_color_TFs = append(list(metadata_visualization.l[["RNA_expression_TF"]], "HOCOID", "baseMean_log"), vertice_color_TFs) |
|
3531 |
+ vertice_color_TFs = append(list(metadata_visualization.l[["RNA_expression_TF"]], "TF.HOCOID", "baseMean_log"), vertice_color_TFs) |
|
3526 | 3532 |
#} |
3527 | 3533 |
|
3528 | 3534 |
if(graph == "TF-peak-gene"){ |
... | ... |
@@ -9,7 +9,7 @@ Genetic variants associated with diseases often affect non-coding regions, thus |
9 | 9 |
} |
10 | 10 |
\section{Package functions}{ |
11 | 11 |
|
12 |
-See the Vignettes for a workflow example and more generally \url{https://grp-zaugg.embl-community.io/GRaNIE/articles/} for all project-related information. |
|
12 |
+See the Vignettes for a workflow example and more generally \url{https://grp-zaugg.embl-community.io/GRaNIE} for all project-related information. |
|
13 | 13 |
} |
14 | 14 |
|
15 | 15 |
\section{GRN object}{ |
... | ... |
@@ -4,16 +4,27 @@ |
4 | 4 |
\alias{getCounts} |
5 | 5 |
\title{Get counts for the various data defined in a \code{\linkS4class{GRN}} object} |
6 | 6 |
\usage{ |
7 |
-getCounts(GRN, type, norm, permuted = FALSE) |
|
7 |
+getCounts( |
|
8 |
+ GRN, |
|
9 |
+ type, |
|
10 |
+ norm = FALSE, |
|
11 |
+ permuted = FALSE, |
|
12 |
+ asMatrix = FALSE, |
|
13 |
+ includeIDColumn = TRUE |
|
14 |
+) |
|
8 | 15 |
} |
9 | 16 |
\arguments{ |
10 | 17 |
\item{GRN}{Object of class \code{\linkS4class{GRN}}} |
11 | 18 |
|
12 | 19 |
\item{type}{Character. Either \code{peaks} or \code{rna}. \code{peaks} corresponds to the counts for the open chromatin data, while \code{rna} refers to th RNA-seq counts. If set to \code{rna}, both permuted and non-permuted data can be retrieved, while for \code{peaks}, only the non-permuted one (i.e., 0) can be retrieved.} |
13 | 20 |
|
14 |
-\item{norm}{Logical. \code{TRUE} or \code{FALSE}. Should original (often raw, but this may not necessarily be the case) or normalized counts be returned?} |
|
21 |
+\item{norm}{Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should original (often raw, but this may not necessarily be the case) or normalized counts be returned?} |
|
15 | 22 |
|
16 | 23 |
\item{permuted}{\code{TRUE} or \code{FALSE}. Default \code{FALSE}. Should the permuted data be taken (\code{TRUE}) or the non-permuted, original one (\code{FALSE})?} |
24 |
+ |
|
25 |
+\item{asMatrix}{Logical. \code{TRUE} or \code{FALSE}. Default \code{FALSE}. If set to \code{FALSE}, counts are returned as a data frame with or without an ID column (see \code{includeIDColumn}). If set to \code{TRUE}, counts are returned as a matrix with the ID column as row names.} |
|
26 |
+ |
|
27 |
+\item{includeIDColumn}{Logical. \code{TRUE} or \code{FALSE}. Default \code{TRUE}. Only relevant if \code{asMatrix = FALSE}. If set to \code{TRUE}, an explicit ID column is returned (no row names). If set to \code{FALSE}, the IDs are in the row names instead.} |
|
17 | 28 |
} |
18 | 29 |
\value{ |
19 | 30 |
Data frame of counts, with the type as indicated by the function parameters. This function does **NOT** return a \code{\linkS4class{GRN}} object. |
... | ... |
@@ -38,9 +38,9 @@ plotCommunitiesEnrichment( |
38 | 38 |
|
39 | 39 |
\item{p}{Numeric. Default 0.05. p-value threshold to determine significance.} |
40 | 40 |
|
41 |
-\item{nSignificant}{Numeric >0. Default 3. Threshold to filter out an ontology term with less than \code{nSignificant} overlapping genes.} |
|
41 |
+\item{nSignificant}{Numeric > 0. Default 3. Threshold to filter out an ontology term with less than \code{nSignificant} overlapping genes.} |
|
42 | 42 |
|
43 |
-\item{nID}{Numeric >0. Default 10. For the reduced summary heatmap, number of top terms to select per community / for the general enrichment.} |
|
43 |
+\item{nID}{Numeric > 0. Default 10. For the reduced summary heatmap, number of top terms to select per community / for the general enrichment.} |
|
44 | 44 |
|
45 | 45 |
\item{maxWidth_nchar_plot}{Integer (>=10). Default 50. Maximum number of characters for a term before it is truncated.} |
46 | 46 |
|
... | ... |
@@ -29,7 +29,7 @@ plotDiagnosticPlots_TFPeaks( |
29 | 29 |
|
30 | 30 |
\item{dataType}{Character vector. One of, or both of, \code{"real"} or \code{"permuted"}. For which data type, real or permuted data, to produce the diagnostic plots?} |
31 | 31 |
|
32 |
-\item{nTFMax}{\code{NULL} or Integer. Default \code{NULL}. Maximum number of TFs to process. Can be used for testing purposes by setting this to a small number i(.e., 10)} |
|
32 |
+\item{nTFMax}{\code{NULL} or Integer > 0. Default \code{NULL}. Maximum number of TFs to process. Can be used for testing purposes by setting this to a small number i(.e., 10)} |
|
33 | 33 |
|
34 | 34 |
\item{plotAsPDF}{\code{TRUE} or \code{FALSE}. Default \code{TRUE}.Should the plots be printed to a PDF file? If set to \code{TRUE}, a PDF file is generated, the name of which depends on the value of \code{basenameOutput}. If set to \code{FALSE}, all plots are printed to the currently active device. Note that most functions print more than one plot, which means you may only see the last plot depending on your active graphics device.} |
35 | 35 |
|
... | ... |
@@ -37,9 +37,9 @@ plotTFEnrichment( |
37 | 37 |
|
38 | 38 |
\item{p}{Numeric. Default 0.05. p-value threshold to determine significance.} |
39 | 39 |
|
40 |
-\item{nSignificant}{Numeric >0. Default 3. Threshold to filter out an ontology term with less than \code{nSignificant} overlapping genes.} |
|
40 |
+\item{nSignificant}{Numeric > 0. Default 3. Threshold to filter out an ontology term with less than \code{nSignificant} overlapping genes.} |
|
41 | 41 |
|
42 |
-\item{nID}{Numeric >0. Default 10. For the reduced summary heatmap, number of top terms to select per community / for the general enrichment.} |
|
42 |
+\item{nID}{Numeric > 0. Default 10. For the reduced summary heatmap, number of top terms to select per community / for the general enrichment.} |
|
43 | 43 |
|
44 | 44 |
\item{display_pAdj}{\code{TRUE} or \code{FALSE}. Default \code{FALSE}. Is the p-value being displayed in the plots the adjusted p-value? This parameter is relevant for KEGG, Disease Ontology, and Reactome enrichments, and does not affect GO enrichments.} |
45 | 45 |
|
... | ... |
@@ -66,12 +66,12 @@ visualizeGRN( |
66 | 66 |
The same \code{\linkS4class{GRN}} object, without modifications. |
67 | 67 |
} |
68 | 68 |
\description{ |
69 |
-This function can visualize a filtered eGRN in a very flexible manner and requires a filtered set of connections in the \code{\linkS4class{GRN}} object as generated by \code{\link{filterGRNAndConnectGenes}}. |
|
69 |
+This function can visualize a filtered eGRN in a very flexible manner and requires a \code{\linkS4class{GRN}} object as generated by \code{\link{build_eGRN_graph}}. |
|
70 | 70 |
} |
71 | 71 |
\examples{ |
72 | 72 |
GRN = loadExampleObject() |
73 | 73 |
GRN = visualizeGRN(GRN, maxRowsToPlot = 700, graph = "TF-gene", colorby = "type") |
74 | 74 |
} |
75 | 75 |
\seealso{ |
76 |
-\code{\link{filterGRNAndConnectGenes}} |
|
76 |
+\code{\link{build_eGRN_graph}} |
|
77 | 77 |
} |
... | ... |
@@ -269,7 +269,7 @@ For more parameter details, see the R help (`?filterData`). |
269 | 269 |
|
270 | 270 |
```{r filterData, echo=TRUE, include=TRUE, eval = TRUE, class.output="scroll-200", results='hold'} |
271 | 271 |
# Chromosomes to keep for peaks. This should be a vector of chromosome names |
272 |
-chrToKeep_peaks = c(paste0("chr", 1:22), "chrX", "chrY") |
|
272 |
+chrToKeep_peaks = c(paste0("chr", 1:22), "chrX") |
|
273 | 273 |
GRN = filterData(GRN, minNormalizedMean_peaks = 5, minNormalizedMeanRNA = 1, chrToKeep_peaks = chrToKeep_peaks, maxSize_peaks = 10000, forceRerun = TRUE) |
274 | 274 |
``` |
275 | 275 |
We can see from the output that no peaks have been filtered due to their size and almost 11,000 have been filtered due to their small mean read counts, which collectively leaves around 64,000 peaks out of 75,000 originally. For the RNA data, almost half of the data has been filtered (16,211 out of around 35,000 genes). |