... | ... |
@@ -2,16 +2,15 @@ |
2 | 2 |
|
3 | 3 |
## Major changes |
4 | 4 |
|
5 |
-- many Documentation and R help updates, Package Details Vignette is online |
|
5 |
+- many Documentation and R help updates, the [Package Details Vignette](https://grp-zaugg.embl-community.io/GRaNIE/articles/GRaNIE_packageDetails.html#installation) is online |
|
6 | 6 |
- The workflow vignette is now improved: better figure resolution, figure aspect ratios are optimized, and a few other changes |
7 | 7 |
- the eGRN graph structure as built by `build_eGRN_graph()` in the `GRaNIE` object is now reset whenever the function `filterGRNAndConnectGenes()` is successfully executed to make sure that enrichment functions etc are not using an outdated graph structure. |
8 | 8 |
- the landing page of the website has been extended and overhauled |
9 |
-- removed some dependency packages and moved others into `Suggests` to lower the installation burden of the package. In addition, removed `topGO` from the `Depends` section (now in `Suggests`) and removed `tidyverse` altogether (before in `Depends`). Detailed explanations when and how the packages listed under `Suggests` are needed can now be found in the new [Package Details Vignette](https://grp-zaugg.embl-community.io/GRaNIE/articles/GRaNIE_packageDetails.html#installation). |
|
10 |
-- new helper function `installSuggestedPackages()` to install all packages listed under `Suggests` |
|
9 |
+- removed some dependency packages and moved others into `Suggests` to lower the installation burden of the package. In addition, removed `topGO` from the `Depends` section (now in `Suggests`) and removed `tidyverse` altogether (before in `Depends`). Detailed explanations when and how the packages listed under `Suggests` are needed can now be found in the new [Package Details Vignette](https://grp-zaugg.embl-community.io/GRaNIE/articles/GRaNIE_packageDetails.html#installation) and are clearly given to the user when executing the respective functions |
|
11 | 10 |
|
12 | 11 |
## Minor changes |
13 | 12 |
|
14 |
-- many small changes in the code, better argument checking, and preparing rigorous unit test inclusion |
|
13 |
+- many small changes in the code, updated argument checking, and preparing rigorous unit test inclusion |
|
15 | 14 |
- internally renaming the (recently changed / renamed) gene type `lncRNA` from `biomaRt` to `lincRNA` to be compatible with older versions of `GRaNIE` |
16 | 15 |
|
17 | 16 |
|
... | ... |
@@ -202,7 +202,7 @@ setMethod("show", |
202 | 202 |
cat(" Communities (TF-gene):\n") |
203 | 203 |
df = igraph::vertex.attributes(GRN@graph[["TF_gene"]]$graph) |
204 | 204 |
if (!is.null(df) & "community" %in% names(df)) { |
205 |
- communities = df %>% as.data.frame() %>% dplyr::count(community) %>% dplyr::arrange(dplyr::desc(.data$n)) |
|
205 |
+ communities = df %>% as.data.frame() %>% dplyr::count(.data$community) %>% dplyr::arrange(dplyr::desc(.data$n)) |
|
206 | 206 |
cat(" Communities, sorted by size (n = Number of nodes): ", paste0(communities$community, " (n=", communities$n, collapse = "), "), ")\n", sep = "") |
207 | 207 |
} else { |
208 | 208 |
cat(" None found\n") |
... | ... |
@@ -265,7 +265,7 @@ nPeaks <- function(GRN, filter = TRUE) { |
265 | 265 |
} |
266 | 266 |
|
267 | 267 |
if (filter) { |
268 |
- nPeaks = GRN@data$peaks$consensusPeaks %>% dplyr::filter(!isFiltered) %>% nrow() |
|
268 |
+ nPeaks = GRN@data$peaks$consensusPeaks %>% dplyr::filter(!.data$isFiltered) %>% nrow() |
|
269 | 269 |
} else { |
270 | 270 |
nPeaks = GRN@data$peaks$consensusPeaks %>% nrow() |
271 | 271 |
} |
... | ... |
@@ -301,7 +301,7 @@ nGenes <- function(GRN, filter = TRUE) { |
301 | 301 |
} |
302 | 302 |
|
303 | 303 |
if (filter) { |
304 |
- nGenes = GRN@data$RNA$counts_norm.l[["0"]] %>% dplyr::filter(!isFiltered) %>% nrow() |
|
304 |
+ nGenes = GRN@data$RNA$counts_norm.l[["0"]] %>% dplyr::filter(!.data$isFiltered) %>% nrow() |
|
305 | 305 |
} else { |
306 | 306 |
nGenes = GRN@data$RNA$counts_norm.l[["0"]] %>% nrow() |
307 | 307 |
} |
... | ... |
@@ -226,7 +226,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
226 | 226 |
if (nDuplicateRows > 0) { |
227 | 227 |
futile.logger::flog.warn(paste0(" Found ", nDuplicateRows, " duplicate rows in RNA-Seq data, consolidating them by summing them up.")) |
228 | 228 |
counts_rna = counts_rna %>% |
229 |
- dplyr::group_by(ENSEMBL) %>% |
|
229 |
+ dplyr::group_by(.data$ENSEMBL) %>% |
|
230 | 230 |
dplyr::summarise_if(is.numeric, sum) |
231 | 231 |
# dplyr::summarise_if(is.numeric, sum, .groups = 'drop') # the .drop caused an error with dplyr 1.0.5 |
232 | 232 |
} |
... | ... |
@@ -243,8 +243,8 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
243 | 243 |
GRN@config$TF_peak_connectionTypes = "expression" |
244 | 244 |
|
245 | 245 |
# Make sure ENSEMBL is the first column |
246 |
- countsRNA.norm.df = dplyr::select(countsRNA.norm.df, ENSEMBL, tidyselect::everything()) |
|
247 |
- countsPeaks.norm.df = dplyr::select(countsPeaks.norm.df, peakID, tidyselect::everything()) |
|
246 |
+ countsRNA.norm.df = dplyr::select(countsRNA.norm.df, .data$ENSEMBL, tidyselect::everything()) |
|
247 |
+ countsPeaks.norm.df = dplyr::select(countsPeaks.norm.df, .data$peakID, tidyselect::everything()) |
|
248 | 248 |
|
249 | 249 |
samples_rna = colnames(countsRNA.norm.df) |
250 | 250 |
samples_peaks = colnames(countsPeaks.norm.df) |
... | ... |
@@ -262,7 +262,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
262 | 262 |
if (!is.null(sampleMetadata)) { |
263 | 263 |
|
264 | 264 |
futile.logger::flog.info("Parsing provided metadata...") |
265 |
- GRN@data$metadata = sampleMetadata %>% dplyr::distinct() %>% tibble::set_tidy_names(syntactic = TRUE, quiet = TRUE) |
|
265 |
+ GRN@data$metadata = sampleMetadata %>% dplyr::distinct() %>% tibble::tibble(.name_repair = "universal") |
|
266 | 266 |
|
267 | 267 |
# Force the first column to be the ID column |
268 | 268 |
if ("sampleID" %in% colnames(GRN@data$metadata)) { |
... | ... |
@@ -288,12 +288,12 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
288 | 288 |
} |
289 | 289 |
|
290 | 290 |
GRN@data$metadata = GRN@data$metadata %>% |
291 |
- dplyr::mutate(has_RNA = sampleID %in% samples_rna, |
|
292 |
- has_peaks = sampleID %in% samples_peaks, |
|
293 |
- has_both = has_RNA & has_peaks |
|
291 |
+ dplyr::mutate(has_RNA = .data$sampleID %in% samples_rna, |
|
292 |
+ has_peaks = .data$sampleID %in% samples_peaks, |
|
293 |
+ has_both = .data$has_RNA & .data$has_peaks |
|
294 | 294 |
) |
295 | 295 |
|
296 |
- GRN@config$sharedSamples = dplyr::filter(GRN@data$metadata, has_both) %>% dplyr::pull(sampleID) %>% as.character() |
|
296 |
+ GRN@config$sharedSamples = dplyr::filter(GRN@data$metadata, .data$has_both) %>% dplyr::pull(.data$sampleID) %>% as.character() |
|
297 | 297 |
|
298 | 298 |
counts_peaks = as.data.frame(counts_peaks) |
299 | 299 |
counts_rna = as.data.frame(counts_rna) |
... | ... |
@@ -306,7 +306,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
306 | 306 |
|
307 | 307 |
if (isIntegerMatrix(counts_peaks[, GRN@config$sharedSamples])) { |
308 | 308 |
GRN@data$peaks$counts_orig = DESeq2::DESeqDataSetFromMatrix(counts_peaks[, GRN@config$sharedSamples], |
309 |
- colData = dplyr::filter(GRN@data$metadata, has_both) %>% tibble::column_to_rownames("sampleID"), |
|
309 |
+ colData = dplyr::filter(GRN@data$metadata, .data$has_both) %>% tibble::column_to_rownames("sampleID"), |
|
310 | 310 |
design = ~1) |
311 | 311 |
} else { |
312 | 312 |
|
... | ... |
@@ -315,7 +315,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
315 | 315 |
|
316 | 316 |
if (isIntegerMatrix(counts_rna[, GRN@config$sharedSamples])) { |
317 | 317 |
GRN@data$RNA$counts_orig = DESeq2::DESeqDataSetFromMatrix(counts_rna[, GRN@config$sharedSamples], |
318 |
- colData = dplyr::filter(GRN@data$metadata, has_both) %>% tibble::column_to_rownames("sampleID"), |
|
318 |
+ colData = dplyr::filter(GRN@data$metadata, .data$has_both) %>% tibble::column_to_rownames("sampleID"), |
|
319 | 319 |
design = ~1) |
320 | 320 |
} else { |
321 | 321 |
GRN@data$RNA$counts_orig = as.matrix(counts_rna[, GRN@config$sharedSamples]) |
... | ... |
@@ -442,15 +442,15 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
442 | 442 |
} |
443 | 443 |
|
444 | 444 |
geneAnnotation %>% |
445 |
- as_tibble() %>% |
|
446 |
- dplyr::filter(stringr::str_length(chromosome_name) <= 5) %>% |
|
447 |
- dplyr::mutate(chromosome_name = paste0("chr", chromosome_name)) %>% |
|
448 |
- dplyr::rename(gene.chr = chromosome_name, gene.start = start_position, gene.end = end_position, |
|
449 |
- gene.strand = strand, gene.ENSEMBL = ensembl_gene_id, gene.type = gene_biotype, gene.name = external_gene_name) %>% |
|
445 |
+ tibble::as_tibble() %>% |
|
446 |
+ dplyr::filter(stringr::str_length(.data$chromosome_name) <= 5) %>% |
|
447 |
+ dplyr::mutate(chromosome_name = paste0("chr", .data$chromosome_name)) %>% |
|
448 |
+ dplyr::rename(gene.chr = .data$chromosome_name, gene.start = .data$start_position, gene.end = .data$end_position, |
|
449 |
+ gene.strand = .data$strand, gene.ENSEMBL = .data$ensembl_gene_id, gene.type = .data$gene_biotype, gene.name = .data$external_gene_name) %>% |
|
450 | 450 |
tidyr::replace_na(list(gene.type = "unknown")) %>% |
451 | 451 |
dplyr::mutate_if(is.character, as.factor) %>% |
452 |
- dplyr::mutate(gene.type = dplyr::recode_factor(gene.type, lncRNA = "lincRNA")) %>% # there seems to be a name change from lincRNA -> lncRNA, lets change it here |
|
453 |
- dplyr::mutate(gene.strand = factor(gene.strand, levels = c(1,-1), labels = c("+", "-"))) |
|
452 |
+ dplyr::mutate(gene.type = dplyr::recode_factor(.data$gene.type, lncRNA = "lincRNA")) %>% # there seems to be a name change from lincRNA -> lncRNA, lets change it here |
|
453 |
+ dplyr::mutate(gene.strand = factor(.data$gene.strand, levels = c(1,-1), labels = c("+", "-"))) |
|
454 | 454 |
|
455 | 455 |
} |
456 | 456 |
|
... | ... |
@@ -473,7 +473,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
473 | 473 |
|
474 | 474 |
if (!is.null(gene.types)) { |
475 | 475 |
if (! "all" %in% gene.types) { |
476 |
- genes.filt.df = dplyr::filter(genes.filt.df, gene.type %in% gene.types) |
|
476 |
+ genes.filt.df = dplyr::filter(genes.filt.df, .data$gene.type %in% gene.types) |
|
477 | 477 |
} |
478 | 478 |
} |
479 | 479 |
|
... | ... |
@@ -488,7 +488,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
488 | 488 |
|
489 | 489 |
futile.logger::flog.info(paste0(" Calculate statistics for each peak (mean and CV)")) |
490 | 490 |
|
491 |
- countsPeaks.m = as.matrix(dplyr::select(countsPeaks.clean, -peakID)) |
|
491 |
+ countsPeaks.m = as.matrix(dplyr::select(countsPeaks.clean, -.data$peakID)) |
|
492 | 492 |
|
493 | 493 |
rowMeans_peaks = rowMeans(countsPeaks.m) |
494 | 494 |
rowMedians_peaks = matrixStats::rowMedians(countsPeaks.m) |
... | ... |
@@ -509,7 +509,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
509 | 509 |
|
510 | 510 |
futile.logger::flog.info(paste0(" Retrieve peak annotation using ChipSeeker. This may take a while")) |
511 | 511 |
genomeAssembly = GRN@config$parameters$genomeAssembly |
512 |
- consensusPeaks = GRN@data$peaks$consensusPeaks %>% dplyr::filter(!isFiltered) |
|
512 |
+ consensusPeaks = GRN@data$peaks$consensusPeaks %>% dplyr::filter(!.data$isFiltered) |
|
513 | 513 |
consensusPeaks.gr = .constructGRanges(consensusPeaks, seqlengths = .getChrLengths(genomeAssembly), genomeAssembly) |
514 | 514 |
|
515 | 515 |
# Add ChIPSeeker anotation |
... | ... |
@@ -538,22 +538,22 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
538 | 538 |
|
539 | 539 |
GRN@annotation$consensusPeaks = dplyr::left_join(GRN@annotation$consensusPeaks, |
540 | 540 |
peaks.annotated.df %>% |
541 |
- dplyr::select(peakID, annotation, tidyselect::starts_with("gene"), -geneId, distanceToTSS, ENSEMBL, SYMBOL, GENENAME) %>% |
|
542 |
- dplyr::mutate(annotation = as.factor(annotation), |
|
543 |
- ENSEMBL = as.factor(ENSEMBL), |
|
544 |
- GENENAME = as.factor(GENENAME), |
|
545 |
- SYMBOL = as.factor(SYMBOL)), |
|
541 |
+ dplyr::select(.data$peakID, .data$annotation, tidyselect::starts_with("gene"), -.data$geneId, .data$distanceToTSS, .data$ENSEMBL, .data$SYMBOL, .data$GENENAME) %>% |
|
542 |
+ dplyr::mutate(annotation = as.factor(.data$annotation), |
|
543 |
+ ENSEMBL = as.factor(.data$ENSEMBL), |
|
544 |
+ GENENAME = as.factor(.data$GENENAME), |
|
545 |
+ SYMBOL = as.factor(.data$SYMBOL)), |
|
546 | 546 |
by = c("peak.ID" = "peakID")) %>% |
547 |
- dplyr::rename(peak.gene.chr = geneChr, |
|
548 |
- peak.gene.start = geneStart, |
|
549 |
- peak.gene.end = geneEnd, |
|
550 |
- peak.gene.length = geneLength, |
|
551 |
- peak.gene.strand = geneStrand, |
|
552 |
- peak.gene.name = GENENAME, |
|
553 |
- peak.gene.distanceToTSS = distanceToTSS, |
|
554 |
- peak.gene.ENSEMBL = ENSEMBL, |
|
555 |
- peak.gene.symbol = SYMBOL, |
|
556 |
- peak.annotation = annotation |
|
547 |
+ dplyr::rename(peak.gene.chr = .data$geneChr, |
|
548 |
+ peak.gene.start = .data$geneStart, |
|
549 |
+ peak.gene.end = .data$geneEnd, |
|
550 |
+ peak.gene.length = .data$geneLength, |
|
551 |
+ peak.gene.strand = .data$geneStrand, |
|
552 |
+ peak.gene.name = .data$GENENAME, |
|
553 |
+ peak.gene.distanceToTSS = .data$distanceToTSS, |
|
554 |
+ peak.gene.ENSEMBL = .data$ENSEMBL, |
|
555 |
+ peak.gene.symbol = .data$SYMBOL, |
|
556 |
+ peak.annotation = .data$annotation |
|
557 | 557 |
) |
558 | 558 |
|
559 | 559 |
|
... | ... |
@@ -575,7 +575,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
575 | 575 |
|
576 | 576 |
countsRNA.clean = getCounts(GRN, type = "rna", norm = TRUE, permuted = FALSE) |
577 | 577 |
|
578 |
- countsRNA.m = as.matrix(dplyr::select(countsRNA.clean, -ENSEMBL)) |
|
578 |
+ countsRNA.m = as.matrix(dplyr::select(countsRNA.clean, -.data$ENSEMBL)) |
|
579 | 579 |
|
580 | 580 |
rowMeans_rna = rowMeans(countsRNA.m) |
581 | 581 |
rowMedians_rna = matrixStats::rowMedians(countsRNA.m) |
... | ... |
@@ -588,7 +588,7 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
588 | 588 |
gene.median = rowMedians_rna, |
589 | 589 |
gene.CV = CV_rna) %>% |
590 | 590 |
dplyr::full_join(genomeAnnotation.df, by = c("gene.ENSEMBL")) %>% |
591 |
- dplyr::mutate(gene.type = forcats::fct_explicit_na(gene.type, na_level = "unknown/missing")) |
|
591 |
+ dplyr::mutate(gene.type = forcats::fct_explicit_na(.data$gene.type, na_level = "unknown/missing")) |
|
592 | 592 |
|
593 | 593 |
GRN@annotation$genes = metadata_rna |
594 | 594 |
|
... | ... |
@@ -626,24 +626,24 @@ addData <- function(GRN, counts_peaks, normalization_peaks = "DESeq_sizeFactor", |
626 | 626 |
tibble::as_tibble() %>% |
627 | 627 |
dplyr::mutate(length = GenomicRanges::width(query), |
628 | 628 |
peak.ID = query$peakID, |
629 |
- GC_class = cut(`G|C`, breaks = seq(0,1,0.1), include.lowest = TRUE, ordered_result = TRUE)) |
|
629 |
+ GC_class = cut(.data$`G|C`, breaks = seq(0,1,0.1), include.lowest = TRUE, ordered_result = TRUE)) |
|
630 | 630 |
|
631 | 631 |
GC_classes.df = GC_content.df %>% |
632 |
- dplyr::group_by(GC_class) %>% |
|
632 |
+ dplyr::group_by(.data$GC_class) %>% |
|
633 | 633 |
dplyr::summarise(n= dplyr::n(), peak_width_mean = mean(length), peak_width_sd = sd(length)) %>% |
634 | 634 |
dplyr::ungroup() %>% |
635 |
- tidyr::complete(GC_class, fill = list(n = 0)) %>% |
|
635 |
+ tidyr::complete(.data$GC_class, fill = list(n = 0)) %>% |
|
636 | 636 |
dplyr::mutate(n_rel = .data$n / length(query)) |
637 | 637 |
|
638 | 638 |
# TODO: Put where |
639 |
- #ggplot(GC_content.df, aes(GC.class)) + geom_histogram(stat = "count") + theme_bw() |
|
639 |
+ #ggplot2::ggplot(GC_content.df, ggplot2::aes(GC.class)) + geom_histogram(stat = "count") + ggplot2::theme_bw() |
|
640 | 640 |
|
641 |
- #ggplot(GC_classes.df , aes(GC.class, n_rel)) + geom_bar(stat = "identity") + theme_bw() |
|
641 |
+ #ggplot2::ggplot(GC_classes.df , ggplot2::aes(GC.class, n_rel)) + geom_bar(stat = "identity") + ggplot2::theme_bw() |
|
642 | 642 |
|
643 | 643 |
GRN@annotation$consensusPeaks = dplyr::left_join(GRN@annotation$consensusPeaks, GC_content.df, by = "peak.ID") %>% |
644 |
- dplyr::rename( peak.GC.perc = `G|C`, |
|
645 |
- peak.width = length, |
|
646 |
- peak.GC.class = GC_class) |
|
644 |
+ dplyr::rename( peak.GC.perc = .data$`G|C`, |
|
645 |
+ peak.width = .data$length, |
|
646 |
+ peak.GC.class = .data$GC_class) |
|
647 | 647 |
|
648 | 648 |
GRN@stats$peaks = list() |
649 | 649 |
GRN@stats$peaks$GC = GC_classes.df |
... | ... |
@@ -749,7 +749,7 @@ filterData <- function (GRN, |
749 | 749 |
futile.logger::flog.info(paste0(" Number of rows in total: ", nrow(GRN@data$RNA$counts_norm.l[["0"]]))) |
750 | 750 |
for (permutationCur in c(0)) { |
751 | 751 |
permIndex = as.character(permutationCur) |
752 |
- rowMeans = rowMeans(dplyr::select(GRN@data$RNA$counts_norm.l[[permIndex]], -ENSEMBL)) |
|
752 |
+ rowMeans = rowMeans(dplyr::select(GRN@data$RNA$counts_norm.l[[permIndex]], -.data$ENSEMBL)) |
|
753 | 753 |
GRN@data$RNA$counts_norm.l[[permIndex]]$isFiltered = rowMeans < minNormalizedMeanRNA |
754 | 754 |
} |
755 | 755 |
nRowsFlagged = length(which(GRN@data$RNA$counts_norm.l[["0"]]$isFiltered)) |
... | ... |
@@ -816,12 +816,12 @@ filterData <- function (GRN, |
816 | 816 |
if (is.null(maxCV)) { |
817 | 817 |
futile.logger::flog.info(paste0("Filter peaks by CV: Min CV = ", minCV)) |
818 | 818 |
|
819 |
- peaksFiltered = dplyr::filter(GRN@annotation$consensusPeaks, peak.CV >= minCV) |
|
819 |
+ peaksFiltered = dplyr::filter(GRN@annotation$consensusPeaks, .data$peak.CV >= minCV) |
|
820 | 820 |
|
821 | 821 |
} else { |
822 | 822 |
futile.logger::flog.info(paste0("Filter peaks by CV: Min CV = ", minCV, ", max CV = ", maxCV)) |
823 | 823 |
|
824 |
- peaksFiltered = dplyr::filter(GRN@annotation$consensusPeaks, peak.CV >= minCV, peak.CV <= maxCV) |
|
824 |
+ peaksFiltered = dplyr::filter(GRN@annotation$consensusPeaks, .data$peak.CV >= minCV, .data$peak.CV <= maxCV) |
|
825 | 825 |
} |
826 | 826 |
|
827 | 827 |
futile.logger::flog.info(paste0(" Number of peaks after filtering : ", nrow(peaksFiltered))) |
... | ... |
@@ -865,8 +865,8 @@ filterData <- function (GRN, |
865 | 865 |
|
866 | 866 |
|
867 | 867 |
peaksFiltered = dplyr::filter(GRN@annotation$consensusPeaks, |
868 |
- peak.CV >= minCV, peak.CV <= maxCV, |
|
869 |
- peak.mean >= minMean, peak.mean <= maxMean) |
|
868 |
+ .data$peak.CV >= minCV, .data$peak.CV <= maxCV, |
|
869 |
+ .data$peak.mean >= minMean, .data$peak.mean <= maxMean) |
|
870 | 870 |
|
871 | 871 |
futile.logger::flog.info(paste0(" Number of peaks after filtering : ", nrow(peaksFiltered))) |
872 | 872 |
|
... | ... |
@@ -912,8 +912,8 @@ filterData <- function (GRN, |
912 | 912 |
|
913 | 913 |
|
914 | 914 |
genesFiltered = dplyr::filter(GRN@annotation$genes, |
915 |
- gene.CV >= minCV, gene.CV <= maxCV, |
|
916 |
- gene.mean >= minMean, gene.mean <= maxMean) |
|
915 |
+ .data$gene.CV >= minCV, .data$gene.CV <= maxCV, |
|
916 |
+ .data$gene.mean >= minMean, .data$gene.mean <= maxMean) |
|
917 | 917 |
|
918 | 918 |
|
919 | 919 |
futile.logger::flog.info(paste0(" Number of genes after filtering : ", nrow(genesFiltered))) |
... | ... |
@@ -962,7 +962,7 @@ addTFBS <- function(GRN, motifFolder, TFs = "all", nTFMax = NULL, filesTFBSPatte |
962 | 962 |
|
963 | 963 |
GRN@data$TFs$translationTable = GRN@data$TFs$translationTable %>% |
964 | 964 |
dplyr::select(c("HOCOID", "ENSEMBL")) %>% |
965 |
- dplyr::mutate(TF.name = HOCOID, TF.ENSEMBL = ENSEMBL) |
|
965 |
+ dplyr::mutate(TF.name = .data$HOCOID, TF.ENSEMBL = .data$ENSEMBL) |
|
966 | 966 |
|
967 | 967 |
# TODO: Change here and make it more logical what to put where |
968 | 968 |
GRN@config$allTF = GRN@data$TFs$translationTable$TF.name |
... | ... |
@@ -1013,14 +1013,16 @@ addTFBS <- function(GRN, motifFolder, TFs = "all", nTFMax = NULL, filesTFBSPatte |
1013 | 1013 |
file_input_HOCOMOCO = paste0(folder_input_TFBS, .Platform$file.sep, "translationTable.csv") |
1014 | 1014 |
HOCOMOCO_mapping.df = .readHOCOMOCOTable(file_input_HOCOMOCO, delim = " ") |
1015 | 1015 |
|
1016 |
- TF_notExpressed = sort(dplyr::filter(HOCOMOCO_mapping.df, ! ENSEMBL %in% countsRNA$ENSEMBL, HOCOID %in% TFsWithTFBSPredictions) %>% dplyr::pull(HOCOID)) |
|
1016 |
+ TF_notExpressed = sort(dplyr::filter(HOCOMOCO_mapping.df, ! .data$ENSEMBL %in% countsRNA$ENSEMBL, .data$HOCOID %in% TFsWithTFBSPredictions) %>% dplyr::pull(.data$HOCOID)) |
|
1017 | 1017 |
|
1018 | 1018 |
if (length(TF_notExpressed) > 0) { |
1019 | 1019 |
futile.logger::flog.info(paste0("Filtering the following ", length(TF_notExpressed), " TFs as they are not present in the RNA-Seq data: ", paste0(TF_notExpressed, collapse = ","))) |
1020 | 1020 |
|
1021 | 1021 |
} |
1022 | 1022 |
|
1023 |
- allTF = sort(dplyr::filter(HOCOMOCO_mapping.df, ENSEMBL %in% countsRNA$ENSEMBL, HOCOID %in% TFsWithTFBSPredictions) %>% dplyr::pull(HOCOID)) |
|
1023 |
+ allTF = sort(dplyr::filter(HOCOMOCO_mapping.df, |
|
1024 |
+ .data$ENSEMBL %in% countsRNA$ENSEMBL, |
|
1025 |
+ .data$HOCOID %in% TFsWithTFBSPredictions) %>% dplyr::pull(.data$HOCOID)) |
|
1024 | 1026 |
|
1025 | 1027 |
nTF = length(allTF) |
1026 | 1028 |
if (nTF == 0) { |
... | ... |
@@ -1041,7 +1043,7 @@ addTFBS <- function(GRN, motifFolder, TFs = "all", nTFMax = NULL, filesTFBSPatte |
1041 | 1043 |
nTF = length(allTF) |
1042 | 1044 |
futile.logger::flog.info(paste0("Running the pipeline for ", nTF, " TF in total.")) |
1043 | 1045 |
|
1044 |
- HOCOMOCO_mapping.df.exp = dplyr::filter(HOCOMOCO_mapping.df, HOCOID %in% allTF) |
|
1046 |
+ HOCOMOCO_mapping.df.exp = dplyr::filter(HOCOMOCO_mapping.df, .data$HOCOID %in% allTF) |
|
1045 | 1047 |
if (nrow(HOCOMOCO_mapping.df.exp) == 0) { |
1046 | 1048 |
message = paste0("Number of rows of HOCOMOCO_mapping.df.exp is 0. Something is wrong with the mapping table or the filtering") |
1047 | 1049 |
.checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) |
... | ... |
@@ -1125,13 +1127,13 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) { |
1125 | 1127 |
} |
1126 | 1128 |
|
1127 | 1129 |
# new |
1128 |
- filteredPeaks = dplyr::filter(GRN@data$peaks$counts_norm, isFiltered) %>% dplyr::pull(peakID) |
|
1130 |
+ filteredPeaks = dplyr::filter(GRN@data$peaks$counts_norm, .data$isFiltered) %>% dplyr::pull(.data$peakID) |
|
1129 | 1131 |
# Collect binary 0/1 binding matrix from all TF and concatenate |
1130 | 1132 |
GRN@data$TFs$TF_peak_overlap = TFBS_bindingMatrix.df %>% |
1131 | 1133 |
dplyr::mutate(peakID = GRN@data$peaks$consensusPeaks$peakID, |
1132 |
- isFiltered = peakID %in% filteredPeaks) %>% |
|
1134 |
+ isFiltered = .data$peakID %in% filteredPeaks) %>% |
|
1133 | 1135 |
dplyr::mutate_if(is.logical, as.numeric) %>% |
1134 |
- dplyr::select(tidyselect::all_of(sort(GRN@config$allTF)), isFiltered) |
|
1136 |
+ dplyr::select(tidyselect::all_of(sort(GRN@config$allTF)), .data$isFiltered) |
|
1135 | 1137 |
|
1136 | 1138 |
GRN@data$TFs$TF_peak_overlap = .asSparseMatrix(as.matrix(GRN@data$TFs$TF_peak_overlap), |
1137 | 1139 |
convertNA_to_zero = FALSE, |
... | ... |
@@ -1186,13 +1188,13 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) { |
1186 | 1188 |
query_overlap_df$peak_end = end(query.gr) [query_row_ids] |
1187 | 1189 |
|
1188 | 1190 |
final.df = cbind.data.frame(query_overlap_df, subject_overlap_df) %>% |
1189 |
- dplyr::select(-score, -annotation) %>% |
|
1190 |
- dplyr::mutate(tfbsID = paste0(tfbs_chr, ":", tfbs_start, "-", tfbs_end), |
|
1191 |
- coordCentTfbs = round((tfbs_start + tfbs_end)/2, 0), |
|
1192 |
- coordSummit = round((peak_start + peak_end)/2, 0), |
|
1193 |
- distance = abs(coordSummit - coordCentTfbs)) %>% |
|
1194 |
- dplyr::group_by(peakID) %>% |
|
1195 |
- dplyr::slice(which.min(distance)) %>% |
|
1191 |
+ dplyr::select(-.data$score, -.data$annotation) %>% |
|
1192 |
+ dplyr::mutate(tfbsID = paste0(.data$tfbs_chr, ":", .data$tfbs_start, "-", .data$tfbs_end), |
|
1193 |
+ coordCentTfbs = round((.data$tfbs_start + .data$tfbs_end)/2, 0), |
|
1194 |
+ coordSummit = round((.data$peak_start + .data$peak_end)/2, 0), |
|
1195 |
+ distance = abs(.data$coordSummit - .data$coordCentTfbs)) %>% |
|
1196 |
+ dplyr::group_by(.data$peakID) %>% |
|
1197 |
+ dplyr::slice(which.min(.data$distance)) %>% |
|
1196 | 1198 |
#arrange(distance, .by_group = TRUE) %>% |
1197 | 1199 |
# top_n(n = 2, dplyr::desc(distance)) %>% |
1198 | 1200 |
dplyr::ungroup() |
... | ... |
@@ -1236,10 +1238,10 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) { |
1236 | 1238 |
# Filter to only the TFs |
1237 | 1239 |
# In addition, the no of TF because multiple TFs can map to the same gene/ ENSEMBL ID |
1238 | 1240 |
# Also filter 0 count genes because they otherwise cause errors downstream |
1239 |
- rowSums = rowSums(dplyr::select(matrix1, -ENSEMBL)) |
|
1241 |
+ rowSums = rowSums(dplyr::select(matrix1, -.data$ENSEMBL)) |
|
1240 | 1242 |
|
1241 | 1243 |
# Keep only Ensembl IDs from TFs we have data from |
1242 |
- matrix1.norm.TFs.df = dplyr::filter(matrix1, ENSEMBL %in% HOCOMOCO_mapping$ENSEMBL, rowSums != 0) |
|
1244 |
+ matrix1.norm.TFs.df = dplyr::filter(matrix1, .data$ENSEMBL %in% HOCOMOCO_mapping$ENSEMBL, rowSums != 0) |
|
1243 | 1245 |
|
1244 | 1246 |
diff = nrow(matrix1) - nrow(matrix1.norm.TFs.df) |
1245 | 1247 |
if (diff > 0) { |
... | ... |
@@ -1252,7 +1254,7 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) { |
1252 | 1254 |
.checkAndLogWarningsAndErrors(NULL, message, isWarning = FALSE) |
1253 | 1255 |
} |
1254 | 1256 |
|
1255 |
- HOCOMOCO_mapping.exp = dplyr::filter(HOCOMOCO_mapping, ENSEMBL %in% matrix1.norm.TFs.df$ENSEMBL) |
|
1257 |
+ HOCOMOCO_mapping.exp = dplyr::filter(HOCOMOCO_mapping, .data$ENSEMBL %in% matrix1.norm.TFs.df$ENSEMBL) |
|
1256 | 1258 |
futile.logger::flog.info(paste0(whitespacePrefix, "Correlate TF/gene data for ", nrow(matrix1.norm.TFs.df), " unique Ensembl IDs (TFs) and peak counts for ", nrow(matrix_peaks), " peaks.")) |
1257 | 1259 |
futile.logger::flog.info(paste0(whitespacePrefix, "Note: For subsequent steps, the same gene may be associated with multiple TF, depending on the translation table.")) |
1258 | 1260 |
# Correlate TF gene counts with peak counts |
... | ... |
@@ -1263,7 +1265,7 @@ overlapPeaksAndTFBS <- function(GRN, nCores = 2, forceRerun = FALSE) { |
1263 | 1265 |
# counts for peaks may be 0 throughout, then a warning is thrown |
1264 | 1266 |
|
1265 | 1267 |
#If the sd is zero, a warning is issued. We suppress it here to not confuse users as this is being dealt with later by ignoring the NA entries |
1266 |
- cor.m = suppressWarnings(t(cor(t(dplyr::select(matrix1.norm.TFs.df, -ENSEMBL)), t(dplyr::select(matrix_peaks, -peakID)), method = corMethod))) |
|
1268 |
+ cor.m = suppressWarnings(t(cor(t(dplyr::select(matrix1.norm.TFs.df, -.data$ENSEMBL)), t(dplyr::select(matrix_peaks, -.data$peakID)), method = corMethod))) |
|
1267 | 1269 |
|
1268 | 1270 |
colnames(cor.m) = matrix1.norm.TFs.df$ENSEMBL |
1269 | 1271 |
rownames(cor.m) = matrix_peaks$peakID |
... | ... |
@@ -1384,7 +1386,7 @@ addData_TFActivity <- function(GRN, normalization = "cyclicLoess", name = "TF_ac |
1384 | 1386 |
tibble::rownames_to_column("TF.name") %>% |
1385 | 1387 |
tibble::as_tibble() %>% |
1386 | 1388 |
dplyr::left_join(GRN@data$TFs$translationTable, by = "TF.name") %>% |
1387 |
- dplyr::select(ENSEMBL, TF.name, tidyselect::all_of(GRN@config$sharedSamples)) |
|
1389 |
+ dplyr::select(.data$ENSEMBL, .data$TF.name, tidyselect::all_of(GRN@config$sharedSamples)) |
|
1388 | 1390 |
|
1389 | 1391 |
# Update available connection types |
1390 | 1392 |
GRN@config$TF_peak_connectionTypes = unique(c(GRN@config$TF_peak_connectionTypes, name)) |
... | ... |
@@ -1542,7 +1544,7 @@ importTFData <- function(GRN, data, name, idColumn = "ENSEMBL", nameColumn = "TF |
1542 | 1544 |
# data = dplyr::select(data, -tidyselect::one_of(nameColumn)) |
1543 | 1545 |
|
1544 | 1546 |
# Only TF.names are unique |
1545 |
- countsNorm = .normalizeNewDataWrapper(data %>% dplyr::select(-ENSEMBL), normalization = normalization, idColumn = "TF.name") |
|
1547 |
+ countsNorm = .normalizeNewDataWrapper(data %>% dplyr::select(-.data$ENSEMBL), normalization = normalization, idColumn = "TF.name") |
|
1546 | 1548 |
|
1547 | 1549 |
# Check overlap of ENSEMBL IDs |
1548 | 1550 |
countsNorm$ENSEMBL = data$ENSEMBL |
... | ... |
@@ -1578,7 +1580,7 @@ importTFData <- function(GRN, data, name, idColumn = "ENSEMBL", nameColumn = "TF |
1578 | 1580 |
# Therefore, use TF names as row names, same as with the TF Activity matrix |
1579 | 1581 |
|
1580 | 1582 |
GRN@data$TFs[[name]] = countsNorm.subset %>% |
1581 |
- dplyr::select(ENSEMBL, TF.name, tidyselect::all_of(GRN@config$sharedSamples)) %>% |
|
1583 |
+ dplyr::select(.data$ENSEMBL, .data$TF.name, tidyselect::all_of(GRN@config$sharedSamples)) %>% |
|
1582 | 1584 |
tibble::as_tibble() |
1583 | 1585 |
|
1584 | 1586 |
# Update available connection types |
... | ... |
@@ -1636,7 +1638,7 @@ AR_classification_wrapper<- function (GRN, significanceThreshold_Wilcoxon = 0.05 |
1636 | 1638 |
outputFolder = .checkOutputFolder(GRN, outputFolder) |
1637 | 1639 |
|
1638 | 1640 |
GRN@data$TFs$classification$TF.translation.orig = GRN@data$TFs$translationTable %>% |
1639 |
- dplyr::mutate(TF.name = HOCOID) |
|
1641 |
+ dplyr::mutate(TF.name = .data$HOCOID) |
|
1640 | 1642 |
|
1641 | 1643 |
if (is.null(GRN@data$TFs$TF_peak_overlap)) { |
1642 | 1644 |
message = paste0("Could not find peak - TF matrix. Run the function overlapPeaksAndTFBS first / again") |
... | ... |
@@ -1683,7 +1685,7 @@ AR_classification_wrapper<- function (GRN, significanceThreshold_Wilcoxon = 0.05 |
1683 | 1685 |
|
1684 | 1686 |
# TF activity data |
1685 | 1687 |
counts1 = GRN@data$TFs[[connectionTypeCur]] %>% |
1686 |
- dplyr::select(-TF.name) |
|
1688 |
+ dplyr::select(-.data$TF.name) |
|
1687 | 1689 |
|
1688 | 1690 |
} |
1689 | 1691 |
|
... | ... |
@@ -1716,7 +1718,7 @@ AR_classification_wrapper<- function (GRN, significanceThreshold_Wilcoxon = 0.05 |
1716 | 1718 |
|
1717 | 1719 |
GRN@data$TFs$classification[[permIndex]] [[connectionTypeCur]]$TF.classification = |
1718 | 1720 |
.finalizeClassificationAndAppend( |
1719 |
- output.global.TFs = GRN@data$TFs$classification$TF.translation.orig %>% dplyr::mutate(TF = TF.name), |
|
1721 |
+ output.global.TFs = GRN@data$TFs$classification$TF.translation.orig %>% dplyr::mutate(TF = .data$TF.name), |
|
1720 | 1722 |
median.cor.tfs = GRN@data$TFs$classification[[permIndex]] [[connectionTypeCur]]$TF_cor_median_foreground, |
1721 | 1723 |
act.rep.thres.l = GRN@data$TFs$classification[[permIndex]] [[connectionTypeCur]]$act.rep.thres.l, |
1722 | 1724 |
par.l = GRN@config$parameters, |
... | ... |
@@ -1764,7 +1766,7 @@ AR_classification_wrapper<- function (GRN, significanceThreshold_Wilcoxon = 0.05 |
1764 | 1766 |
TF_peak_cor = GRN@data$TFs$classification[[permIndex]] [[connectionTypeCur]]$TF_peak_cor |
1765 | 1767 |
peak_TF_overlapCur.df = .filterSortAndShuffle_peakTF_overlapTable(GRN, permutationCur, TF_peak_cor) |
1766 | 1768 |
.plot_heatmapAR(TF.peakMatrix.df = peak_TF_overlapCur.df, |
1767 |
- HOCOMOCO_mapping.df.exp = GRN@data$TFs$translationTable %>% dplyr::mutate(TF = TF.name), |
|
1769 |
+ HOCOMOCO_mapping.df.exp = GRN@data$TFs$translationTable %>% dplyr::mutate(TF = .data$TF.name), |
|
1768 | 1770 |
sort.cor.m = TF_peak_cor, |
1769 | 1771 |
par.l = GRN@config$parameters, |
1770 | 1772 |
corMethod = corMethod, |
... | ... |
@@ -1968,7 +1970,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
1968 | 1970 |
|
1969 | 1971 |
# Keep only Ensembl ID here |
1970 | 1972 |
counts_connectionTypeCur = GRN@data$TFs[[connectionTypeCur]] %>% |
1971 |
- dplyr::select(-TF.name) |
|
1973 |
+ dplyr::select(-.data$TF.name) |
|
1972 | 1974 |
|
1973 | 1975 |
} |
1974 | 1976 |
|
... | ... |
@@ -2040,18 +2042,18 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
2040 | 2042 |
|
2041 | 2043 |
# Get GC info from those peaks from the foreground |
2042 | 2044 |
GC_classes_foreground.df = peaksForeground %>% |
2043 |
- dplyr::group_by(GC_class) %>% |
|
2044 |
- dplyr::summarise(n= dplyr::n(), peak_width_mean = mean(peak_width), peak_width_sd = sd(peak_width)) %>% |
|
2045 |
+ dplyr::group_by(.data$GC_class) %>% |
|
2046 |
+ dplyr::summarise(n= dplyr::n(), peak_width_mean = mean(.data$peak_width), peak_width_sd = sd(.data$peak_width)) %>% |
|
2045 | 2047 |
dplyr::ungroup() %>% |
2046 |
- tidyr::complete(GC_class, fill = list(n = 0)) %>% |
|
2048 |
+ tidyr::complete(.data$GC_class, fill = list(n = 0)) %>% |
|
2047 | 2049 |
dplyr::mutate(n_rel = .data$n / nPeaksForeground, type = "foreground") %>% |
2048 |
- dplyr::arrange(dplyr::desc(n_rel)) |
|
2050 |
+ dplyr::arrange(dplyr::desc(.data$n_rel)) |
|
2049 | 2051 |
|
2050 | 2052 |
GC_classes_background.df = peaksBackground %>% |
2051 |
- dplyr::group_by(GC_class) %>% |
|
2052 |
- dplyr::summarise(n= dplyr::n(), peak_width_mean = mean(peak_width), peak_width_sd = sd(peak_width)) %>% |
|
2053 |
+ dplyr::group_by(.data$GC_class) %>% |
|
2054 |
+ dplyr::summarise(n= dplyr::n(), peak_width_mean = mean(.data$peak_width), peak_width_sd = sd(.data$peak_width)) %>% |
|
2053 | 2055 |
dplyr::ungroup() %>% |
2054 |
- tidyr::complete(GC_class, fill = list(n = 0)) %>% |
|
2056 |
+ tidyr::complete(.data$GC_class, fill = list(n = 0)) %>% |
|
2055 | 2057 |
dplyr::mutate(n_rel = .data$n / nPeaksBackground, type = "background_orig") |
2056 | 2058 |
|
2057 | 2059 |
|
... | ... |
@@ -2062,9 +2064,6 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
2062 | 2064 |
minPerc = percBackground_size |
2063 | 2065 |
} else { |
2064 | 2066 |
|
2065 |
- # Find a matching background of maximal size |
|
2066 |
- minPerc= min(GC_class.all$maxSizeBackground) / nPeaksBackground |
|
2067 |
- |
|
2068 | 2067 |
threshold_percentage = 0.05 |
2069 | 2068 |
minPerc = .findMaxBackgroundSize(GC_classes_foreground.df, GC_classes_background.df, peaksBackground, threshold_percentage = threshold_percentage) |
2070 | 2069 |
|
... | ... |
@@ -2099,9 +2098,9 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
2099 | 2098 |
# Add sel. minPerc to table and calculate required frequencies |
2100 | 2099 |
|
2101 | 2100 |
GC_classes_all.df = dplyr::full_join(GC_classes_foreground.df, GC_classes_background.df, suffix = c(".fg",".bg"), by = "GC_class") %>% |
2102 |
- dplyr::mutate(maxSizeBackground = n.bg / n_rel.fg, |
|
2103 |
- n.bg.needed = floor(n_rel.fg * targetNoPeaks), |
|
2104 |
- n.bg.needed.perc = n.bg / n.bg.needed) |
|
2101 |
+ dplyr::mutate(maxSizeBackground = .data$n.bg / .data$n_rel.fg, |
|
2102 |
+ n.bg.needed = floor(.data$n_rel.fg * targetNoPeaks), |
|
2103 |
+ n.bg.needed.perc = .data$n.bg / .data$n.bg.needed) |
|
2105 | 2104 |
|
2106 | 2105 |
|
2107 | 2106 |
#futile.logger::flog.info(paste0( " GC-adjustment: Randomly select a total of ", round(targetNoPeaks,0), |
... | ... |
@@ -2127,9 +2126,9 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
2127 | 2126 |
} |
2128 | 2127 |
|
2129 | 2128 |
if (GC_classes_all.df$n.bg.needed[i] > nrow(peaksBackgroundGCCur)) { |
2130 |
- peakIDsSel = c(peakIDsSel, peaksBackgroundGCCur %>% dplyr::sample_n(nPeaksCur, replace = percBackground_resample) %>% dplyr::pull(peakID)) |
|
2129 |
+ peakIDsSel = c(peakIDsSel, peaksBackgroundGCCur %>% dplyr::sample_n(nPeaksCur, replace = percBackground_resample) %>% dplyr::pull(.data$peakID)) |
|
2131 | 2130 |
} else { |
2132 |
- peakIDsSel = c(peakIDsSel, peaksBackgroundGCCur %>% dplyr::sample_n(nPeaksCur, replace = FALSE) %>% dplyr::pull(peakID)) |
|
2131 |
+ peakIDsSel = c(peakIDsSel, peaksBackgroundGCCur %>% dplyr::sample_n(nPeaksCur, replace = FALSE) %>% dplyr::pull(.data$peakID)) |
|
2133 | 2132 |
} |
2134 | 2133 |
# Take a sample from the background, and record the row numbers |
2135 | 2134 |
|
... | ... |
@@ -2246,7 +2245,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
2246 | 2245 |
# |
2247 | 2246 |
TF_peak.fdr_direction = directionCur, |
2248 | 2247 |
TF_peak.r_bin = |
2249 |
- as.character(cut(TF_peak.r_bin2, breaks = stepsCur, right = rightOpen, include.lowest = TRUE)) |
|
2248 |
+ as.character(cut(.data$TF_peak.r_bin2, breaks = stepsCur, right = rightOpen, include.lowest = TRUE)) |
|
2250 | 2249 |
) |
2251 | 2250 |
|
2252 | 2251 |
# Derive connection summaries for all TF for both directions |
... | ... |
@@ -2262,7 +2261,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
2262 | 2261 |
} |
2263 | 2262 |
|
2264 | 2263 |
connectionStats_all.l[[indexStr]] = cor.peak.tf %>% |
2265 |
- dplyr::group_by(TF_peak.r_bin) %>% |
|
2264 |
+ dplyr::group_by(.data$TF_peak.r_bin) %>% |
|
2266 | 2265 |
dplyr::summarise(n = dplyr::n()) %>% |
2267 | 2266 |
dplyr::ungroup() %>% |
2268 | 2267 |
dplyr::right_join(fdr.curve, by = "TF_peak.r_bin") %>% |
... | ... |
@@ -2270,7 +2269,7 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
2270 | 2269 |
TF.name = as.factor(TFCur), |
2271 | 2270 |
TF_peak.connectionType = factor(connectionTypeCur, levels = connectionTypes), |
2272 | 2271 |
TF_peak.fdr_direction = factor(directionCur, levels = c("pos", "neg")), |
2273 |
- TF_peak.r_bin = factor(TF_peak.r_bin, levels = levelsBins), |
|
2272 |
+ TF_peak.r_bin = factor(.data$TF_peak.r_bin, levels = levelsBins), |
|
2274 | 2273 |
|
2275 | 2274 |
# Collect extra information, currently however a bit repetitively stored |
2276 | 2275 |
nForeground = nPeaksForeground, |
... | ... |
@@ -2278,15 +2277,15 @@ addConnections_TF_peak <- function (GRN, plotDiagnosticPlots = TRUE, plotDetails |
2278 | 2277 |
nBackground_orig = nPeaksBackground, |
2279 | 2278 |
percBackgroundUsed = minPerc, |
2280 | 2279 |
background_match_success = background_match_success) %>% |
2281 |
- dplyr::select(TF.name, TF_peak.r_bin, |
|
2282 |
- .data$n, tpvalue, fpvalue, fpvalue_norm, |
|
2283 |
- TF_peak.fdr, |
|
2284 |
- TF_peak.fdr_orig, TF_peak.fdr_direction, |
|
2285 |
- TF_peak.connectionType, |
|
2280 |
+ dplyr::select(.data$TF.name, .data$TF_peak.r_bin, |
|
2281 |
+ .data$n, .data$tpvalue, .data$fpvalue, .data$fpvalue_norm, |
|
2282 |
+ .data$TF_peak.fdr, |
|
2283 |
+ .data$TF_peak.fdr_orig, .data$TF_peak.fdr_direction, |
|
2284 |
+ .data$TF_peak.connectionType, |
|
2286 | 2285 |
tidyselect::contains("ground")) %>% |
2287 |
- dplyr::rename(n_tp = tpvalue, n_fp = fpvalue, n_fp_norm = fpvalue_norm) %>% |
|
2286 |
+ dplyr::rename(n_tp = .data$tpvalue, n_fp = .data$fpvalue, n_fp_norm = .data$fpvalue_norm) %>% |
|
2288 | 2287 |
dplyr::distinct() %>% |
2289 |
- dplyr::arrange(TF_peak.r_bin) |
|
2288 |
+ dplyr::arrange(.data$TF_peak.r_bin) |
|
2290 | 2289 |
|
2291 | 2290 |
|
2292 | 2291 |
# Collect data for additional QC plots before they are filtered |
... | ... |
@@ -2498,7 +2497,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2498 | 2497 |
|
2499 | 2498 |
} |
2500 | 2499 |
|
2501 |
-.calculatePeakGeneOverlaps <- function(GRN, allPeaks, peak_TAD_mapping = NULL, par.l, neighborhoodSize = 250000, genomeAssembly, |
|
2500 |
+.calculatePeakGeneOverlaps <- function(GRN, allPeaks, peak_TAD_mapping = NULL, neighborhoodSize = 250000, genomeAssembly, |
|
2502 | 2501 |
gene.types, overlapTypeGene = "TSS", removeEnsemblNA = TRUE) { |
2503 | 2502 |
|
2504 | 2503 |
start = Sys.time() |
... | ... |
@@ -2514,10 +2513,10 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2514 | 2513 |
|
2515 | 2514 |
futile.logger::flog.info(paste0(" For peaks overlapping multiple TADs, use the union of all to define the neighborhood")) |
2516 | 2515 |
peak_TAD_mapping = peak_TAD_mapping %>% |
2517 |
- dplyr::group_by(peakID) %>% |
|
2518 |
- dplyr::mutate(tadStart2 = min(tadStart), # If a peak overlaps multiple TADs, merge them |
|
2519 |
- tadEnd2 = max(tadEnd), # If a peak overlaps multiple TADs, merge them |
|
2520 |
- tad.ID_all = paste0(tad.ID, collapse = "|")) %>% |
|
2516 |
+ dplyr::group_by(.data$peakID) %>% |
|
2517 |
+ dplyr::mutate(tadStart2 = min(.data$tadStart), # If a peak overlaps multiple TADs, merge them |
|
2518 |
+ tadEnd2 = max(.data$tadEnd), # If a peak overlaps multiple TADs, merge them |
|
2519 |
+ tad.ID_all = paste0(.data$tad.ID, collapse = "|")) %>% |
|
2521 | 2520 |
dplyr::slice(1) %>% |
2522 | 2521 |
dplyr::ungroup() |
2523 | 2522 |
|
... | ... |
@@ -2722,7 +2721,7 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2722 | 2721 |
nPeaksWithOutTAD = length(which(is.na(peak.TADs.df$tad.ID))) |
2723 | 2722 |
futile.logger::flog.info(paste0(" Out of the ", nPeaks, " peaks, ", nPeaksWithOutTAD, " peaks are not within a TAD domain. These will be ignored for subsequent overlaps")) |
2724 | 2723 |
|
2725 |
- nPeaksWithMultipleTADs = peak.TADs.df %>% dplyr::group_by(peakID) %>% dplyr::summarize(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>% nrow() |
|
2724 |
+ nPeaksWithMultipleTADs = peak.TADs.df %>% dplyr::group_by(.data$peakID) %>% dplyr::summarize(n = dplyr::n()) %>% dplyr::filter(.data$n > 1) %>% nrow() |
|
2726 | 2725 |
|
2727 | 2726 |
if (nPeaksWithMultipleTADs > 0) { |
2728 | 2727 |
futile.logger::flog.info(paste0(" Out of the ", nPeaks, " peaks, ", nPeaksWithMultipleTADs, " overlap with more than one TAD. This usually means they are crossing TAD borders.")) |
... | ... |
@@ -2736,12 +2735,14 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2736 | 2735 |
# if a peak overlaps with a gene, should the same gene be reported as the connection? |
2737 | 2736 |
|
2738 | 2737 |
# OVERLAP OF PEAKS AND EXTENDED GENES |
2739 |
- overlaps.sub.df = .calculatePeakGeneOverlaps(GRN, allPeaks = consensusPeaks, peak.TADs.df, neighborhoodSize = neighborhoodSize, |
|
2740 |
- genomeAssembly = genomeAssembly, gene.types = gene.types, overlapTypeGene = overlapTypeGene) |
|
2738 |
+ overlaps.sub.df = .calculatePeakGeneOverlaps(GRN, allPeaks = consensusPeaks, peak.TADs.df, |
|
2739 |
+ neighborhoodSize = neighborhoodSize, |
|
2740 |
+ genomeAssembly = genomeAssembly, |
|
2741 |
+ gene.types = gene.types, overlapTypeGene = overlapTypeGene) |
|
2741 | 2742 |
|
2742 | 2743 |
|
2743 | 2744 |
overlaps.sub.filt.df = overlaps.sub.df %>% |
2744 |
- dplyr::mutate(gene.ENSEMBL = gsub("\\..+", "", gene.ENSEMBL, perl = TRUE)) # Clean ENSEMBL IDs |
|
2745 |
+ dplyr::mutate(gene.ENSEMBL = gsub("\\..+", "", .data$gene.ENSEMBL, perl = TRUE)) # Clean ENSEMBL IDs |
|
2745 | 2746 |
|
2746 | 2747 |
# Only now we shuffle to make sure the background of possible connections is the same as in the foreground, as opposed to completely random |
2747 | 2748 |
# which would also include peak-gene connections that are not in the foreground at all |
... | ... |
@@ -2762,8 +2763,8 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2762 | 2763 |
|
2763 | 2764 |
permIndex = as.character(perm) |
2764 | 2765 |
|
2765 |
- countsPeaks.clean = dplyr::select(getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE), -peakID) |
|
2766 |
- countsRNA.clean = dplyr::select(getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(perm)), -ENSEMBL) |
|
2766 |
+ countsPeaks.clean = dplyr::select(getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE), -.data$peakID) |
|
2767 |
+ countsRNA.clean = dplyr::select(getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(perm)), -.data$ENSEMBL) |
|
2767 | 2768 |
|
2768 | 2769 |
# Cleverly construct the count matrices so we do the correlations in one go |
2769 | 2770 |
map_peaks = match(overlaps.sub.filt.df$peak.ID, getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE)$peakID) |
... | ... |
@@ -2812,29 +2813,29 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2812 | 2813 |
res.df = suppressMessages(tibble::as_tibble(res.m) %>% |
2813 | 2814 |
dplyr::mutate(peak.ID = getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE)$peakID[map_peaks], |
2814 | 2815 |
gene.ENSEMBL = getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(perm))$ENSEMBL[map_rna]) %>% |
2815 |
- dplyr::filter(!is.na(gene.ENSEMBL)) %>% # For some peak-gene combinations, no RNA-Seq data was available, these NAs are filtered |
|
2816 |
+ dplyr::filter(!is.na(.data$gene.ENSEMBL)) %>% # For some peak-gene combinations, no RNA-Seq data was available, these NAs are filtered |
|
2816 | 2817 |
# Add gene annotation and distance |
2817 | 2818 |
dplyr::left_join(overlaps.sub.filt.df, by = c("gene.ENSEMBL", "peak.ID")) %>% |
2818 | 2819 |
# Integrate TAD IDs also |
2819 |
- dplyr::left_join(dplyr::select(peak.TADs.df, peak.ID, tad.ID), by = c("peak.ID")) %>% |
|
2820 |
+ dplyr::left_join(dplyr::select(peak.TADs.df, .data$peak.ID, .data$tad.ID), by = c("peak.ID")) %>% |
|
2820 | 2821 |
|
2821 | 2822 |
dplyr::select(tidyselect::all_of(selectColumns))) %>% |
2822 |
- dplyr::mutate(peak.ID = as.factor(peak.ID), |
|
2823 |
- gene.ENSEMBL = as.factor(gene.ENSEMBL), |
|
2824 |
- tad.ID = as.factor(tad.ID)) %>% |
|
2825 |
- dplyr::rename(peak_gene.r = r, |
|
2826 |
- peak_gene.p_raw = p.raw) |
|
2823 |
+ dplyr::mutate(peak.ID = as.factor(.data$peak.ID), |
|
2824 |
+ gene.ENSEMBL = as.factor(.data$gene.ENSEMBL), |
|
2825 |
+ tad.ID = as.factor(.data$tad.ID)) %>% |
|
2826 |
+ dplyr::rename(peak_gene.r = .data$r, |
|
2827 |
+ peak_gene.p_raw = .data$p.raw) |
|
2827 | 2828 |
|
2828 | 2829 |
if (addRobustRegression) { |
2829 | 2830 |
res.df = dplyr::rename(res.df, |
2830 |
- peak_gene.p_raw.robust = p_raw.robust, |
|
2831 |
- peak_gene.r_robust = r_robust, |
|
2832 |
- peak_gene.bias_M_p.raw = bias_M_p.raw, |
|
2833 |
- peak_gene.bias_LS_p.raw = bias_LS_p.raw) |
|
2831 |
+ peak_gene.p_raw.robust = .data$p_raw.robust, |
|
2832 |
+ peak_gene.r_robust = .data$r_robust, |
|
2833 |
+ peak_gene.bias_M_p.raw = .data$bias_M_p.raw, |
|
2834 |
+ peak_gene.bias_LS_p.raw = .data$bias_LS_p.raw) |
|
2834 | 2835 |
} |
2835 | 2836 |
|
2836 | 2837 |
if (is.null(TADs)) { |
2837 |
- res.df = dplyr::select(res.df, -tad.ID) |
|
2838 |
+ res.df = dplyr::select(res.df, -.data$tad.ID) |
|
2838 | 2839 |
} |
2839 | 2840 |
|
2840 | 2841 |
futile.logger::flog.info(paste0(" Finished. Final number of rows: ", nrow(res.df))) |
... | ... |
@@ -2905,17 +2906,17 @@ addConnections_peak_gene <- function(GRN, overlapTypeGene = "TSS", corMethod = " |
2905 | 2906 |
dataCur.df = tibble::tibble(data_Peaks =unlist(counts1 [map1[i],]), |
2906 | 2907 |
data_RNA = unlist(counts2 [map2[i],]) ) |
2907 | 2908 |
|
2908 |
- # TODO: perm not known here |
|
2909 |
+ # TODO: perm and GRN not known here |
|
2909 | 2910 |
stop("Not implemeneted yet") |
2910 |
- g = ggplot(dataCur.df, aes(data1, data2)) + geom_point() + geom_smooth(method ="lm") + |
|
2911 |
- ggtitle(paste0(getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE)$peakID[map1[i]], |
|
2912 |
- ", ", |
|
2913 |
- getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(perm))$ENSEMBL[map2 [i]], |
|
2914 |
- ", p = ", round(res$p.value, 3))) + |
|
2915 |
- theme_bw() |
|
2916 |
- plot(g) |
|
2917 |
- |
|
2918 |
- nPlotted = nPlotted + 1 |
|
2911 |
+ # g = ggplot2::ggplot(dataCur.df, ggplot2::aes(data1, data2)) + ggplot2::geom_point() + ggplot2::geom_smooth(method ="lm") + |
|
2912 |
+ # ggplot2::ggtitle(paste0(getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE)$peakID[map1[i]], |
|
2913 |
+ # ", ", |
|
2914 |
+ # getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(perm))$ENSEMBL[map2 [i]], |
|
2915 |
+ # ", p = ", round(res$p.value, 3))) + |
|
2916 |
+ # ggplot2::theme_bw() |
|
2917 |
+ # plot(g) |
|
2918 |
+ # |
|
2919 |
+ # nPlotted = nPlotted + 1 |
|
2919 | 2920 |
} |
2920 | 2921 |
|
2921 | 2922 |
if (debugMode_nPlots > 0 & nPlotted >= debugMode_nPlots){ |
... | ... |
@@ -3064,8 +3065,8 @@ filterGRNAndConnectGenes <- function(GRN, |
3064 | 3065 |
grn.filt = GRN@connections$TF_peaks[[permIndex]]$main %>% |
3065 | 3066 |
tibble::as_tibble() %>% |
3066 | 3067 |
dplyr::left_join(GRN@data$TFs$translationTable, by = c("TF.name")) %>% |
3067 |
- dplyr::select(-HOCOID, -ENSEMBL) %>% |
|
3068 |
- dplyr::mutate(TF.ENSEMBL = as.factor(TF.ENSEMBL)) |
|
3068 |
+ dplyr::select(-.data$HOCOID, -.data$ENSEMBL) %>% |
|
3069 |
+ dplyr::mutate(TF.ENSEMBL = as.factor(.data$TF.ENSEMBL)) |
|
3069 | 3070 |
|
3070 | 3071 |
if (is.null(grn.filt)) { |
3071 | 3072 |
message = "No TF-peak connections found. Run the function addConnections_TF_peak first" |
... | ... |
@@ -3189,7 +3190,7 @@ filterGRNAndConnectGenes <- function(GRN, |
3189 | 3190 |
checkmate::assertIntegerish(peak_gene.maxDistance, lower = 0) |
3190 | 3191 |
futile.logger::flog.info(paste0(" Filter peak-gene connections for their distance and keep only connections with a maximum distance of ", peak_gene.maxDistance)) |
3191 | 3192 |
futile.logger::flog.info(paste0(" Number of peak-gene rows before filtering connection types: ", nrow(peakGeneCorrelations))) |
3192 |
- peakGeneCorrelations = dplyr::filter(peakGeneCorrelations, peak_gene.distance < peak_gene.maxDistance) |
|
3193 |
+ peakGeneCorrelations = dplyr::filter(peakGeneCorrelations, .data$peak_gene.distance < peak_gene.maxDistance) |
|
3193 | 3194 |
futile.logger::flog.info(paste0(" Number of peak-gene rows after filtering connection types: ", nrow(peakGeneCorrelations))) |
3194 | 3195 |
} |
3195 | 3196 |
|
... | ... |
@@ -3365,9 +3366,9 @@ filterGRNAndConnectGenes <- function(GRN, |
3365 | 3366 |
dplyr::starts_with("peak_gene."), |
3366 | 3367 |
dplyr::starts_with("gene."), |
3367 | 3368 |
-dplyr::starts_with("peak.gene.")) %>% # these are the annotation columns by ChipSeeker that are confusing |
3368 |
- dplyr::mutate(peak.ID = as.factor(peak.ID), |
|
3369 |
- gene.ENSEMBL = as.factor(gene.ENSEMBL), |
|
3370 |
- TF.name = as.factor(TF.name)) |
|
3369 |
+ dplyr::mutate(peak.ID = as.factor(.data$peak.ID), |
|
3370 |
+ gene.ENSEMBL = as.factor(.data$gene.ENSEMBL), |
|
3371 |
+ TF.name = as.factor(.data$TF.name)) |
|
3371 | 3372 |
|
3372 | 3373 |
|
3373 | 3374 |
GRN@connections$all.filtered[[permIndex]] = grn.filt |
... | ... |
@@ -3482,27 +3483,27 @@ filterGRNAndConnectGenes <- function(GRN, |
3482 | 3483 |
# Plot No. 1 |
3483 | 3484 |
res.l$ihwPlots$estimatedWeights = |
3484 | 3485 |
IHW::plot(ihw_res, what = "weights") + |
3485 |
- ggtitle("Estimated weights") |
|
3486 |
+ ggplot2::ggtitle("Estimated weights") |
|
3486 | 3487 |
|
3487 | 3488 |
# Plot No. 2 |
3488 | 3489 |
res.l$ihwPlots$decisionBoundary = |
3489 | 3490 |
IHW::plot(ihw_res, what = "decisionboundary") + |
3490 |
- ggtitle("Decision boundary") |
|
3491 |
+ ggplot2::ggtitle("Decision boundary") |
|
3491 | 3492 |
|
3492 | 3493 |
# Plot No. 3 |
3493 | 3494 |
res.l$ihwPlots$rawVsAdjPVal_all <- |
3494 | 3495 |
as.data.frame(ihw_res) %>% |
3495 |
- ggplot(aes(x = .data$pvalue, y = adj_pvalue, col = group)) + |
|
3496 |
- geom_point(size = 0.25) + scale_colour_hue(l = 70, c = 150, drop = FALSE) + |
|
3497 |
- ggtitle("Raw versus adjusted p-values") |
|
3496 |
+ ggplot2::ggplot(ggplot2::aes(x = .data$pvalue, y = .data$adj_pvalue, col = .data$group)) + |
|
3497 |
+ ggplot2::geom_point(size = 0.25) + ggplot2::scale_colour_hue(l = 70, c = 150, drop = FALSE) + |
|
3498 |
+ ggplot2::ggtitle("Raw versus adjusted p-values") |
|
3498 | 3499 |
|
3499 | 3500 |
# TODO why hard-coded |
3500 | 3501 |
pValThreshold = 0.2 |
3501 | 3502 |
|
3502 | 3503 |
# Plot No. 4 |
3503 | 3504 |
res.l$ihwPlots$rawVsAdjPVal_subset = |
3504 |
- res.l$ihwPlots$rawVsAdjPVal_all %+% subset(IHW::as.data.frame(ihw_res), adj_pvalue <= pValThreshold) + |
|
3505 |
- ggtitle("raw versus adjusted p-values (zoom)") |
|
3505 |
+ res.l$ihwPlots$rawVsAdjPVal_all %+% subset(IHW::as.data.frame(ihw_res), .data$adj_pvalue <= pValThreshold) + |
|
3506 |
+ ggplot2::ggtitle("raw versus adjusted p-values (zoom)") |
|
3506 | 3507 |
|
3507 | 3508 |
## ## |
3508 | 3509 |
## 2. p-value histograms ## |
... | ... |
@@ -3515,8 +3516,8 @@ filterGRNAndConnectGenes <- function(GRN, |
3515 | 3516 |
# Plot No. 5 |
3516 | 3517 |
|
3517 | 3518 |
res.l$ihwPlots$pValHistogram = |
3518 |
- ggplot(data.df, aes(x = pValues)) + geom_histogram(binwidth = 0.025, boundary = 0) + |
|
3519 |
- ggtitle("p-Value histogram independent of the covariate") |
|
3519 |
+ ggplot2::ggplot(data.df, ggplot2::aes(x = .data$pValues)) + ggplot2::geom_histogram(binwidth = 0.025, boundary = 0) + |
|
3520 |
+ ggplot2::ggtitle("p-Value histogram independent of the covariate") |
|
3520 | 3521 |
|
3521 | 3522 |
# Stratified p-value histograms by covariate |
3522 | 3523 |
# Plot No. 6 |
... | ... |
@@ -3525,14 +3526,14 @@ filterGRNAndConnectGenes <- function(GRN, |
3525 | 3526 |
|
3526 | 3527 |
data.df$covariate_group <- IHW::groups_by_filter(data.df$covariate, nGroups) |
3527 | 3528 |
res.l$ihwPlots$pValHistogramGroupedByCovariate = |
3528 |
- ggplot(data.df, aes(x=pValues)) + geom_histogram(binwidth = 0.025, boundary = 0) + |
|
3529 |
- facet_wrap( ~covariate_group, nrow = 2) + |
|
3530 |
- ggtitle("p-Value histogram grouped by the covariate") |
|
3529 |
+ ggplot2::ggplot(data.df, ggplot2::aes(x=.data$pValues)) + ggplot2::geom_histogram(binwidth = 0.025, boundary = 0) + |
|
3530 |
+ ggplot2::facet_wrap( ~covariate_group, nrow = 2) + |
|
3531 |
+ ggplot2::ggtitle("p-Value histogram grouped by the covariate") |
|
3531 | 3532 |
|
3532 | 3533 |
# Plot No. 7 |
3533 | 3534 |
res.l$ihwPlots$pValHistogramGroupedByCovariateECDF = |
3534 |
- ggplot(data.df, aes(x = pValues, col = covariate_group)) + stat_ecdf(geom = "step") + |
|
3535 |
- ggtitle("p-Value histogram grouped by the covariate (ECDF)") |
|
3535 |
+ ggplot2::ggplot(data.df, ggplot2::aes(x = .data$pValues, col = .data$covariate_group)) + ggplot2::stat_ecdf(geom = "step") + |
|
3536 |
+ ggplot2::ggtitle("p-Value histogram grouped by the covariate (ECDF)") |
|
3536 | 3537 |
|
3537 | 3538 |
|
3538 | 3539 |
# Check whether the covariate is informative about power under the alternative (property 1), |
... | ... |
@@ -3542,8 +3543,8 @@ filterGRNAndConnectGenes <- function(GRN, |
3542 | 3543 |
|
3543 | 3544 |
# Plot No. 8 |
3544 | 3545 |
res.l$ihwPlots$pValAgainstRankCovariate = |
3545 |
- ggplot(data.df, aes(x= covariateRank, y = -log10(pValues))) + geom_hex(bins = 100) + |
|
3546 |
- ggtitle("p-Value against rank of the covariate") |
|
3546 |
+ ggplot2::ggplot(data.df, ggplot2::aes(x= .data$covariateRank, y = -log10(.data$pValues))) + ggplot2::geom_hex(bins = 100) + |
|
3547 |
+ ggplot2::ggtitle("p-Value against rank of the covariate") |
|
3547 | 3548 |
|
3548 | 3549 |
|
3549 | 3550 |
if (!is.null(pdfFile)) { |
... | ... |
@@ -3580,9 +3581,9 @@ add_TF_gene_correlation <- function(GRN, corMethod = "pearson", addRobustRegress |
3580 | 3581 |
checkmate::assertClass(GRN, "GRN") |
3581 | 3582 |
GRN = .addFunctionLogToObject(GRN) |
3582 | 3583 |
checkmate::assertChoice(corMethod, c("pearson", "spearman")) |
3583 |
- assertFlag(addRobustRegression) |
|
3584 |
+ checkmate::assertFlag(addRobustRegression) |
|
3584 | 3585 |
checkmate::assertIntegerish(nCores, lower = 1) |
3585 |
- assertFlag(forceRerun) |
|
3586 |
+ checkmate::assertFlag(forceRerun) |
|
3586 | 3587 |
|
3587 | 3588 |
if (addRobustRegression) { |
3588 | 3589 |
packageMessage = paste0("The package robust is not installed, but needed here due to addRobustRegression = TRUE. Please install it and re-run this function or change addRobustRegression to FALSE.") |
... | ... |
@@ -3607,8 +3608,8 @@ add_TF_gene_correlation <- function(GRN, corMethod = "pearson", addRobustRegress |
3607 | 3608 |
|
3608 | 3609 |
permIndex = as.character(permutationCur) |
3609 | 3610 |
TF_genePairs = GRN@connections$all.filtered[[permIndex]] %>% |
3610 |
- dplyr::filter(!is.na(gene.ENSEMBL)) %>% |
|
3611 |
- dplyr::select(TF.name, gene.ENSEMBL) %>% |
|
3611 |
+ dplyr::filter(!is.na(.data$gene.ENSEMBL)) %>% |
|
3612 |
+ dplyr::select(.data$TF.name, .data$gene.ENSEMBL) %>% |
|
3612 | 3613 |
dplyr::distinct() %>% |
3613 | 3614 |
dplyr::left_join(GRN@data$TFs$translationTable, by = c("TF.name"), suffix = c("", ".transl")) # %>% |
3614 | 3615 |
# dplyr::distinct(ENSEMBL, ENSEMBL.transl) |
... | ... |
@@ -3619,7 +3620,7 @@ add_TF_gene_correlation <- function(GRN, corMethod = "pearson", addRobustRegress |
3619 | 3620 |
futile.logger::flog.info(paste0(" Iterate through ", maxRow, " TF-gene combinations and (if possible) calculate correlations using ", nCores, " cores. This may take a few minutes.")) |
3620 | 3621 |
|
3621 | 3622 |
|
3622 |
- countsRNA.clean = dplyr::select(getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(permutationCur)), -ENSEMBL) |
|
3623 |
+ countsRNA.clean = dplyr::select(getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(permutationCur)), -.data$ENSEMBL) |
|
3623 | 3624 |
|
3624 | 3625 |
map_TF = match(TF_genePairs$TF.ENSEMBL, getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(permutationCur))$ENSEMBL) |
3625 | 3626 |
map_gene = match(TF_genePairs$gene.ENSEMBL, getCounts(GRN, type = "rna", norm = TRUE, permuted = as.logical(permutationCur))$ENSEMBL) |
... | ... |
@@ -3664,7 +3665,7 @@ add_TF_gene_correlation <- function(GRN, corMethod = "pearson", addRobustRegress |
3664 | 3665 |
TF.name = as.factor(.data$TF.name)) %>% |
3665 | 3666 |
dplyr::rename(TF_gene.r = .data$r, |
3666 | 3667 |
TF_gene.p_raw = .data$p.raw) %>% |
3667 |
- dplyr::select(TF.name, TF.ENSEMBL, gene.ENSEMBL, tidyselect::everything()) |
|
3668 |
+ dplyr::select(.data$TF.name, .data$TF.ENSEMBL, .data$gene.ENSEMBL, tidyselect::everything()) |
|
3668 | 3669 |
|
3669 | 3670 |
|
3670 | 3671 |
if (addRobustRegression) { |
... | ... |
@@ -3770,7 +3771,7 @@ addSNPOverlap <- function(grn, SNPData, col_chr = "chr", col_pos = "pos", col_pe |
3770 | 3771 |
query_overlap_df = as.data.frame(S4Vectors::elementMetadata(peaks.gr)[query_row_ids, col_peakID]) |
3771 | 3772 |
subject_overlap_df = as.data.frame( SNPs.gr)[subject_row_ids, c("seqnames", "start")] |
3772 | 3773 |
|
3773 |
- overlaps.df = cbind.data.frame(query_overlap_df,subject_overlap_df) %>% dplyr::mutate(seqnames = as.character(seqnames)) |
|
3774 |
+ overlaps.df = cbind.data.frame(query_overlap_df,subject_overlap_df) %>% dplyr::mutate(seqnames = as.character(.data$seqnames)) |
|
3774 | 3775 |
colnames(overlaps.df) = c("peakID", "SNP_chr", "SNP_start") |
3775 | 3776 |
|
3776 | 3777 |
if (addAllColumns) { |
... | ... |
@@ -3785,13 +3786,13 @@ addSNPOverlap <- function(grn, SNPData, col_chr = "chr", col_pos = "pos", col_pe |
3785 | 3786 |
|
3786 | 3787 |
colnamesNew = setdiff(colnames(overlaps.df), col_peakID) |
3787 | 3788 |
overlaps.summarized.df = overlaps.df %>% |
3788 |
- dplyr::group_by(peak) %>% |
|
3789 |
+ dplyr::group_by(.data$peak) %>% |
|
3789 | 3790 |
dplyr::summarise_at(colnamesNew, myfun) %>% |
3790 | 3791 |
dplyr::ungroup() |
3791 | 3792 |
|
3792 | 3793 |
# Add no of SNPs also as column |
3793 | 3794 |
overlaps.summarized.df = overlaps.summarized.df %>% |
3794 |
- dplyr::mutate(SNP_nOverlap = stringr::str_count(SNP_chr, stringr::fixed("|")) + 1) |
|
3795 |
+ dplyr::mutate(SNP_nOverlap = stringr::str_count(.data$SNP_chr, stringr::fixed("|")) + 1) |
|
3795 | 3796 |
|
3796 | 3797 |
grn.SNPs = dplyr::left_join(grn, overlaps.summarized.df, by = col_peakID) |
3797 | 3798 |
|
... | ... |
@@ -3827,7 +3828,7 @@ addSNPOverlap <- function(grn, SNPData, col_chr = "chr", col_pos = "pos", col_pe |
3827 | 3828 |
#' @param TF_peak.fdr Numeric vector[0,1]. Default \code{c(0.001, 0.01, 0.05, 0.1, 0.2)}. TF-peak FDR values to iterate over. |
3828 | 3829 |
#' @template TF_peak.connectionTypes |
3829 | 3830 |
#' @param peak_gene.fdr Numeric vector[0,1]. Default \code{c(0.001, 0.01, 0.05, 0.1, 0.2)}. Peak-gene FDR values to iterate over. |
3830 |
-#' @param peak_gene.p_raw Numeric vector[0,1]. Default \code{NULL}. Peak-gene raw p-value values to iterate over. Skipped if set to NULL. |
|
3831 |
+# #' @param peak_gene.p_raw Numeric vector[0,1]. Default \code{NULL}. Peak-gene raw p-value values to iterate over. Skipped if set to NULL. |
|
3831 | 3832 |
#' @param peak_gene.r_range Numeric vector of length 2[-1,1]. Default \code{c(0,1)}. The correlation range of peak-gene connections to keep. |
3832 | 3833 |
#' @template gene.types |
3833 | 3834 |
#' @param allowMissingGenes Logical vector of length 1 or 2. Default \code{c(FALSE, TRUE)}. Allow genes to be missing for peak-gene connections? If both \code{FALSE} and \code{TRUE} are given, the code loops over both |
... | ... |
@@ -3844,7 +3845,7 @@ generateStatsSummary <- function(GRN, |
3844 | 3845 |
TF_peak.fdr = c(0.001, 0.01, 0.05, 0.1, 0.2), |
3845 | 3846 |
TF_peak.connectionTypes = "all", |
3846 | 3847 |
peak_gene.fdr = c(0.001, 0.01, 0.05, 0.1, 0.2), |
3847 |
- peak_gene.p_raw = NULL, |
|
3848 |
+# peak_gene.p_raw = NULL, |
|
3848 | 3849 |
peak_gene.r_range = c(0,1), |
3849 | 3850 |
gene.types = c("protein_coding"), |
3850 | 3851 |
allowMissingGenes = c(FALSE, TRUE), |
... | ... |
@@ -3860,12 +3861,11 @@ generateStatsSummary <- function(GRN, |
3860 | 3861 |
checkmate::assertNumeric(TF_peak.fdr, lower = 0, upper = 1, min.len = 1) |
3861 | 3862 |
checkmate::assertSubset(TF_peak.connectionTypes, c("all", GRN@config$TF_peak_connectionTypes), empty.ok = FALSE) |
3862 | 3863 |
checkmate::assertNumeric(peak_gene.fdr, lower = 0, upper = 1, min.len = 1) |
3863 |
- checkmate::assert(checkmate::checkNull(peak_gene.p_raw), checkmate::checkNumeric(peak_gene.p_raw, lower = 0, upper = 1, min.len = 1)) |
|
3864 |
+ # checkmate::assert(checkmate::checkNull(peak_gene.p_raw), checkmate::checkNumeric(peak_gene.p_raw, lower = 0, upper = 1, min.len = 1)) |
|
3864 | 3865 |
checkmate::assertNumeric(peak_gene.r_range, lower = -1, upper = 1, len = 2) |
3865 | 3866 |
checkmate::assertSubset(gene.types, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE) |
3866 | 3867 |
checkmate::assertSubset(allowMissingGenes, c(TRUE, FALSE)) |
3867 | 3868 |
checkmate::assertSubset(allowMissingTFs, c(TRUE, FALSE)) |
3868 |
- |
|
3869 | 3869 |
checkmate::assertFlag(forceRerun) |
3870 | 3870 |
|
3871 | 3871 |
if (is.null(GRN@stats$connections) | is.null(GRN@stats$connectionDetails.l) | forceRerun) { |
... | ... |
@@ -3959,35 +3959,40 @@ generateStatsSummary <- function(GRN, |
3959 | 3959 |
} # end of for (peak_gene.fdr_cur in peak_gene.fdr) |
3960 | 3960 |
|
3961 | 3961 |
|
3962 |
- if (!is.null(peak_gene.p_raw)) { |
|
3963 |
- |
|
3964 |
- futile.logger::flog.info(paste0(" Iterating over different peak-gene raw p-value thresholds...")) |
|
3965 |
- for (peak_gene.p_raw_cur in peak_gene.p_raw) { |
|
3966 |
- |
|
3967 |
- futile.logger::flog.info(paste0(" Peak-gene raw p-value = ", peak_gene.p_raw_cur)) |
|
3968 |
- |
|
3969 |
- # TODO: Add the other ones here also |
|
3970 |
- for (allowMissingGenesCur in allowMissingGenes) { |
|
3971 |
- |
|
3972 |
- GRN = filterGRNAndConnectGenes(GRN, |
|
3973 |
- TF_peak.fdr.threshold = TF_peak.fdr_cur, peak_gene.p_raw.threshold = peak_gene.p_raw_cur, |
|
3974 |
- peak_gene.fdr.threshold= NULL, |
|
3975 |
- gene.types = gene.types, |
|
3976 |
- allowMissingGenes = allowMissingGenesCur, peak_gene.r_range = peak_gene.r_range, |
|
3977 |
- filterTFs = NULL, filterGenes = NULL, filterPeaks = NULL, |
|
3978 |
- silent = TRUE) |
|
3979 |
- |
|
3980 |
- |
|
3981 |
- GRN@stats$connections = .addStats(GRN@stats$connections, GRN@connections$all.filtered[[permIndex]], |
|
3982 |
- perm = permutationCur, TF_peak.fdr = TF_peak.fdr_cur, peak_gene.fdr = NA, |
|
3983 |
- allowMissingGenes = allowMissingGenesCur, peak_gene.p_raw = peak_gene.p_raw_cur, |
|
3984 |
- peak_gene.r_range = paste0(peak_gene.r_range, collapse = ","), |
|
3985 |
- gene.types = paste0(gene.types, collapse = ",")) |
|
3986 |
- |
|
3987 |
- } # end of for (allowMissingGenesCur in allowMissingGenes) |
|
3988 |
- |
|
3989 |
- } # end of for (peak_gene.p_raw_cur in peak_gene.p_raw) |
|
3990 |
- } # end of if (!is.null(peak_gene.p_raw)) |
|
3962 |
+ # REMOVED FOR NOW, WAS INCOMPLETE AND NOT NEEDED ANYWAY |
|
3963 |
+ # if (!is.null(peak_gene.p_raw)) { |
|
3964 |
+ # |
|
3965 |
+ # futile.logger::flog.info(paste0(" Iterating over different peak-gene raw p-value thresholds...")) |
|
3966 |
+ # for (peak_gene.p_raw_cur in peak_gene.p_raw) { |
|
3967 |
+ # |
|
3968 |
+ # futile.logger::flog.info(paste0(" Peak-gene raw p-value = ", peak_gene.p_raw_cur)) |
|
3969 |
+ # |
|
3970 |
+ # # TODO: Add the other ones here also |
|
3971 |
+ # for (allowMissingGenesCur in allowMissingGenes) { |
|
3972 |
+ # |
|
3973 |
+ # GRN = filterGRNAndConnectGenes(GRN, |
|
3974 |
+ # TF_peak.fdr.threshold = TF_peak.fdr_cur, peak_gene.p_raw.threshold = peak_gene.p_raw_cur, |
|
3975 |
+ # peak_gene.fdr.threshold= NULL, |
|
3976 |
+ # gene.types = gene.types, |
|
3977 |
+ # allowMissingGenes = allowMissingGenesCur, peak_gene.r_range = peak_gene.r_range, |
|
3978 |
+ # filterTFs = NULL, filterGenes = NULL, filterPeaks = NULL, |
|
3979 |
+ # silent = TRUE) |
|
3980 |
+ # |
|
3981 |
+ # |
|
3982 |
+ # GRN@stats$connections = .addStats(GRN@stats$connections, GRN@connections$all.filtered[[permIndex]], |
|
3983 |
+ # perm = permutationCur, |
|
3984 |
+ # TF_peak.fdr = TF_peak.fdr_cur, TF_peak.connectionType = TF_peak.connectionTypeCur, |
|
3985 |
+ # peak_gene.fdr = NA, |
|
3986 |
+ # peak_gene.p_raw = peak_gene.p_raw_cur, |
|
3987 |
+ # peak_gene.r_range = paste0(peak_gene.r_range, collapse = ","), |
|
3988 |
+ # gene.types = paste0(gene.types, collapse = ","), |
|
3989 |
+ # allowMissingGenes = allowMissingGenesCur, |
|
3990 |
+ # allowMissingTFs = allowMissingTFsCur) |
|
3991 |
+ # |
|
3992 |
+ # } # end of for (allowMissingGenesCur in allowMissingGenes) |
|
3993 |
+ # |
|
3994 |
+ # } # end of for (peak_gene.p_raw_cur in peak_gene.p_raw) |
|
3995 |
+ # } # end of if (!is.null(peak_gene.p_raw)) |
|
3991 | 3996 |
|
3992 | 3997 |
|
3993 | 3998 |
} |
... | ... |
@@ -4010,11 +4015,11 @@ generateStatsSummary <- function(GRN, |
4010 | 4015 |
gene.types, |
4011 | 4016 |
allowMissingGenes, allowMissingTFs) { |
4012 | 4017 |
|
4013 |
- TF.stats = dplyr::select(connections.df, TF.name, peak.ID) %>% dplyr::filter(!is.na(peak.ID)) %>% dplyr::pull(TF.name) %>% as.character() %>% table() |
|
4014 |
- gene.stats = dplyr::select(connections.df, peak.ID, gene.ENSEMBL) %>% dplyr::filter(!is.na(gene.ENSEMBL)) %>% dplyr::pull(gene.ENSEMBL) %>% as.character() %>% table() |
|
4018 |
+ TF.stats = dplyr::select(connections.df, .data$TF.name, .data$peak.ID) %>% dplyr::filter(!is.na(.data$peak.ID)) %>% dplyr::pull(.data$TF.name) %>% as.character() %>% table() |
|
4019 |
+ gene.stats = dplyr::select(connections.df, .data$peak.ID, .data$gene.ENSEMBL) %>% dplyr::filter(!is.na(.data$gene.ENSEMBL)) %>% dplyr::pull(.data$gene.ENSEMBL) %>% as.character() %>% table() |
|
4015 | 4020 |
|
4016 |
- peak_gene.stats = dplyr::select(connections.df, peak.ID, gene.ENSEMBL) %>% dplyr::filter(!is.na(gene.ENSEMBL),!is.na(peak.ID)) %>% dplyr::pull(peak.ID) %>% as.character() %>% table() |
|
4017 |
- peak.TF.stats = dplyr::select(connections.df, peak.ID, TF.name) %>% dplyr::filter(!is.na(TF.name), !is.na(peak.ID)) %>% dplyr::pull(peak.ID) %>% as.character() %>% table() |
|
4021 |
+ peak_gene.stats = dplyr::select(connections.df, .data$peak.ID, .data$gene.ENSEMBL) %>% dplyr::filter(!is.na(.data$gene.ENSEMBL),!is.na(.data$peak.ID)) %>% dplyr::pull(.data$peak.ID) %>% as.character() %>% table() |
|
4022 |
+ peak.TF.stats = dplyr::select(connections.df, .data$peak.ID, .data$TF.name) %>% dplyr::filter(!is.na(.data$TF.name), !is.na(.data$peak.ID)) %>% dplyr::pull(.data$peak.ID) %>% as.character() %>% table() |
|
4018 | 4023 |
|
4019 | 4024 |
if (length(TF.stats) > 0){ |
4020 | 4025 |
TF.connections = c(min(TF.stats, na.rm = TRUE), |
... | ... |
@@ -4168,7 +4173,7 @@ loadExampleObject <- function(forceDownload = FALSE, fileURL = "https://www.embl |
4168 | 4173 |
GRN@config$files$output_log = "GRN.log" |
4169 | 4174 |
|
4170 | 4175 |
# Temporary fix: Replace lincRNA with lncRNA due to a recent change in biomaRt until we update the object directly |
4171 |
- GRN@annotation$genes = dplyr::mutate(GRN@annotation$genes, gene.type = dplyr::recode_factor(gene.type, lncRNA = "lincRNA")) |
|
4176 |
+ GRN@annotation$genes = dplyr::mutate(GRN@annotation$genes, gene.type = dplyr::recode_factor(.data$gene.type, lncRNA = "lincRNA")) |
|
4172 | 4177 |
|
4173 | 4178 |
GRN |
4174 | 4179 |
|
... | ... |
@@ -4215,7 +4220,7 @@ getCounts <- function(GRN, type, norm, permuted = FALSE) { |
4215 | 4220 |
if (checkmate::testClass(countsOrig, "DESeqDataSet")) { |
4216 | 4221 |
result = DESeq2::counts(countsOrig, norm = FALSE) %>% as.data.frame() %>% tibble::rownames_to_column("peakID") |
4217 | 4222 |
} else { |
4218 |
- result = countsOrig %>% dplyr::select(-isFiltered) %>% as.data.frame() |
|
4223 |
+ result = countsOrig %>% dplyr::select(-.data$isFiltered) %>% as.data.frame() |
|
4219 | 4224 |
} |
4220 | 4225 |
|
4221 | 4226 |
} |
... | ... |
@@ -4233,7 +4238,7 @@ getCounts <- function(GRN, type, norm, permuted = FALSE) { |
4233 | 4238 |
if (checkmate::testClass(countsOrig, "DESeqDataSet")) { |
4234 | 4239 |
result = DESeq2::counts(countsOrig, norm = FALSE) %>% as.data.frame() %>% tibble::rownames_to_column("ENSEMBL") |
4235 | 4240 |
} else { |
4236 |
- result = countsOrig %>% dplyr::select(-isFiltered) %>% as.data.frame() |
|
4241 |
+ result = countsOrig %>% dplyr::select(-.data$isFiltered) %>% as.data.frame() |
|
4237 | 4242 |
# TODO: isFiltered column? |
4238 | 4243 |
} |
4239 | 4244 |
|
... | ... |
@@ -4252,7 +4257,7 @@ getCounts <- function(GRN, type, norm, permuted = FALSE) { |
4252 | 4257 |
|
4253 | 4258 |
# Columns are shuffled so that non-matching samples are compared throughout the pipeline for all correlation-based analyses |
4254 | 4259 |
result = GRN@data$RNA$counts_norm.l[["0"]][shuffledColumnOrder] %>% |
4255 |
- dplyr::select(ENSEMBL, tidyselect::everything()) |
|
4260 |
+ dplyr::select(.data$ENSEMBL, tidyselect::everything()) |
|
4256 | 4261 |
|
4257 | 4262 |
} else { |
4258 | 4263 |
message= "Could not find counts in object." |
... | ... |
@@ -4262,7 +4267,7 @@ getCounts <- function(GRN, type, norm, permuted = FALSE) { |
4262 | 4267 |
} |
4263 | 4268 |
|
4264 | 4269 |
if ("isFiltered" %in% colnames(result)) { |
4265 |
- result = dplyr::filter(result, !isFiltered) %>% dplyr::select(-isFiltered) |
|
4270 |
+ result = dplyr::filter(result, !.data$isFiltered) %>% dplyr::select(-.data$isFiltered) |
|
4266 | 4271 |
} |
4267 | 4272 |
|
4268 | 4273 |
result |
... | ... |
@@ -4388,11 +4393,11 @@ getGRNConnections <- function(GRN, type = "all.filtered", permuted = FALSE, inc |
4388 | 4393 |
} |
4389 | 4394 |
|
4390 | 4395 |
df %>% |
4391 |
- dplyr::mutate(TF.name = as.factor(TF.name ), |
|
4392 |
- TF_peak.r_bin = as.factor(TF_peak.r_bin), |
|
4393 |
- peak.ID = as.factor(peak.ID), |
|
4394 |
- TF_peak.fdr_direction = as.factor(TF_peak.fdr_direction), |
|
4395 |
- TF_peak.connectionType = as.factor(TF_peak.connectionType)) |
|
4396 |
+ dplyr::mutate(TF.name = as.factor(.data$TF.name ), |
|
4397 |
+ TF_peak.r_bin = as.factor(.data$TF_peak.r_bin), |
|
4398 |
+ peak.ID = as.factor(.data$peak.ID), |
|
4399 |
+ TF_peak.fdr_direction = as.factor(.data$TF_peak.fdr_direction), |
|
4400 |
+ TF_peak.connectionType = as.factor(.data$TF_peak.connectionType)) |
|
4396 | 4401 |
|
4397 | 4402 |
} |
4398 | 4403 |
|
... | ... |
@@ -4577,18 +4582,18 @@ getBasic_metadata_visualization <- function(GRN, forceRerun = FALSE) { |
4577 | 4582 |
|
4578 | 4583 |
if (is.null(GRN@visualization$metadata) | forceRerun) { |
4579 | 4584 |
|
4580 |
- expMeans.m = getCounts(GRN, type = "rna", norm = TRUE, permuted = FALSE) %>% dplyr::select(-ENSEMBL) %>% as.matrix() |
|
4585 |
+ expMeans.m = getCounts(GRN, type = "rna", norm = TRUE, permuted = FALSE) %>% dplyr::select(-.data$ENSEMBL) %>% as.matrix() |
|
4581 | 4586 |
|
4582 | 4587 |
baseMean = rowMeans(expMeans.m) |
4583 | 4588 |
expression.df = tibble::tibble(ENSEMBL_ID = getCounts(GRN, type = "rna", norm = TRUE, permuted = FALSE)$ENSEMBL, baseMean = baseMean) %>% |
4584 |
- dplyr::mutate(ENSEMBL_ID = gsub("\\..+", "", ENSEMBL_ID, perl = TRUE), |
|
4589 |
+ dplyr::mutate(ENSEMBL_ID = gsub("\\..+", "", .data$ENSEMBL_ID, perl = TRUE), |
|
4585 | 4590 |
baseMean_log = log2(baseMean + 0.01)) |
4586 | 4591 |
|
4587 |
- expression_TF.df = dplyr::filter(expression.df, ENSEMBL_ID %in% GRN@data$TFs$translationTable$ENSEMBL) %>% |
|
4592 |
+ expression_TF.df = dplyr::filter(expression.df, .data$ENSEMBL_ID %in% GRN@data$TFs$translationTable$ENSEMBL) %>% |
|
4588 | 4593 |
dplyr::left_join(GRN@data$TFs$translationTable, by = c("ENSEMBL_ID" = "ENSEMBL")) |
4589 | 4594 |
|
4590 | 4595 |
meanPeaks.df = tibble::tibble(peakID = getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE)$peakID, |
4591 |
- mean = rowMeans(dplyr::select(getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE), -peakID))) %>% |
|
4596 |
+ mean = rowMeans(dplyr::select(getCounts(GRN, type = "peaks", norm = TRUE, permuted = FALSE), -.data$peakID))) %>% |
|
4592 | 4597 |
dplyr::mutate(mean_log = log2(mean + 0.01)) |
4593 | 4598 |
|
4594 | 4599 |
metadata = list("RNA_expression_genes" = expression.df, |
... | ... |
@@ -4627,7 +4632,7 @@ changeOutputDirectory <- function(GRN, outputDirectory = ".") { |
4627 | 4632 |
.createTables_peakGeneQC <- function(peakGeneCorrelations.all.cur, networkType_details, colors_vec, range) { |
4628 | 4633 |
|
4629 | 4634 |
d = peakGeneCorrelations.all.cur %>% |
4630 |
- dplyr::group_by(r_positive, class, peak_gene.p.raw.class) %>% |
|
4635 |
+ dplyr::group_by(.data$r_positive, class, .data$peak_gene.p.raw.class) %>% |
|
4631 | 4636 |
dplyr::summarise(n = dplyr::n()) %>% |
4632 | 4637 |
dplyr::ungroup() |
4633 | 4638 |
|
... | ... |
@@ -4650,34 +4655,34 @@ changeOutputDirectory <- function(GRN, outputDirectory = ".") { |
4650 | 4655 |
|
4651 | 4656 |
# Normalization factors |
4652 | 4657 |
dsum = d %>% |
4653 |
- dplyr::group_by(r_positive, class) %>% |
|
4658 |
+ dplyr::group_by(.data$r_positive, .data$class) %>% |
|
4654 | 4659 |
dplyr::summarise(sum_n = sum(.data$n)) |
4655 | 4660 |
|
4656 | 4661 |
|
4657 | 4662 |
# Summarize per bin |
4658 | 4663 |
d2 = d %>% |
4659 |
- dplyr::group_by(class, peak_gene.p.raw.class)%>% |
|
4660 |
- dplyr::summarise(sum_pos = .data$n[r_positive], |
|
4661 |
- sum_neg = .data$n[!r_positive]) %>% |
|
4662 |
- dplyr::mutate(ratio_pos_raw = sum_pos / sum_neg, |
|
4663 |
- enrichment_pos = sum_pos / (sum_pos + sum_neg), |
|
4664 |
- ratio_neg = 1 - enrichment_pos) %>% |
|
4664 |
+ dplyr::group_by(class, .data$peak_gene.p.raw.class)%>% |
|
4665 |
+ dplyr::summarise(sum_pos = .data$n[.data$r_positive], |
|
4666 |
+ sum_neg = .data$n[!.data$r_positive]) %>% |
|
4667 |
+ dplyr::mutate(ratio_pos_raw = .data$sum_pos / .data$sum_neg, |
|
4668 |
+ enrichment_pos = .data$sum_pos / (.data$sum_pos + .data$sum_neg), |
|
4669 |
+ ratio_neg = 1 - .data$enrichment_pos) %>% |
|
4665 | 4670 |
dplyr::ungroup() |
4666 | 4671 |
|
4667 | 4672 |
# Compare between real and permuted |
4668 |
- normFactor_real = dplyr::filter(dsum, class == !! (networkType_details[1])) %>% dplyr::pull(sum_n) %>% sum() / |
|
4669 |
- dplyr::filter(dsum, class == !! (networkType_details[2])) %>% dplyr::pull(sum_n) %>% sum() |
|
4673 |
+ normFactor_real = dplyr::filter(dsum, class == !! (networkType_details[1])) %>% dplyr::pull(.data$sum_n) %>% sum() / |
|
4674 |
+ dplyr::filter(dsum, class == !! (networkType_details[2])) %>% dplyr::pull(.data$sum_n) %>% sum() |
|
4670 | 4675 |
|
4671 | 4676 |
# ratio_norm not used currently, no normalization necessary here or not even useful because we dont want to normalize the r_pos and r_neg ratios: These are signal in a way. Only when comparing between real and permuted, we have to account for sample size for corrections |
4672 | 4677 |
d3 = d %>% |
4673 |
- dplyr::group_by(peak_gene.p.raw.class, r_positive) %>% |
|
4678 |
+ dplyr::group_by(.data$peak_gene.p.raw.class, .data$r_positive) %>% |
|
4674 | 4679 |
dplyr::summarise(n_real = .data$n[class == !! (names(colors_vec)[1]) ], |
4675 | 4680 |
n_permuted = .data$n[class == !! (names(colors_vec)[2]) ]) %>% |
4676 | 4681 |
dplyr::ungroup() %>% |
4677 |
- dplyr::mutate(ratio_real_raw = n_real / n_permuted, |
|
4678 |
- ratio_real_norm = ratio_real_raw / normFactor_real, |
|
4679 |
- enrichment_real = n_real / (n_real + n_permuted), |
|
4680 |
- enrichment_real_norm = (n_real / normFactor_real) / ((n_real / normFactor_real) + n_permuted)) |
|
4682 |
+ dplyr::mutate(ratio_real_raw = .data$n_real / .data$n_permuted, |
|
4683 |
+ ratio_real_norm = .data$ratio_real_raw / normFactor_real, |
|
4684 |
+ enrichment_real = .data$n_real / (.data$n_real + .data$n_permuted), |
|
4685 |
+ enrichment_real_norm = (.data$n_real / normFactor_real) / ((.data$n_real / normFactor_real) + .data$n_permuted)) |
|
4681 | 4686 |
|
4682 | 4687 |
|
4683 | 4688 |
stopifnot(identical(levels(d2$peak_gene.p.raw.class), levels(d3$peak_gene.p.raw.class))) |
... | ... |
@@ -4688,8 +4693,8 @@ changeOutputDirectory <- function(GRN, outputDirectory = ".") { |
4688 | 4693 |
ratio = c(d2$ratio_pos_raw, d3$ratio_real_norm), |
4689 | 4694 |
classAll = c(as.character(d2$class), d3$r_positive), |
4690 | 4695 |
set = c(d2$set, d3$set)) %>% |
4691 |
- dplyr::mutate(classAll = factor(classAll, levels=c(paste0("real_",range), paste0("random_",range), "TRUE", "FALSE")), |
|
4692 |
- peak_gene.p.raw.class = factor(peak_gene.p.raw.class, levels = levels(d2$peak_gene.p.raw.class))) |
|
4696 |
+ dplyr::mutate(classAll = factor(.data$classAll, levels=c(paste0("real_",range), paste0("random_",range), "TRUE", "FALSE")), |
|
4697 |
+ peak_gene.p.raw.class = factor(.data$peak_gene.p.raw.class, levels = levels(d2$peak_gene.p.raw.class))) |
|
4693 | 4698 |
|
4694 | 4699 |
d4 = tibble::tibble(peak_gene.p.raw.class = unique(d$peak_gene.p.raw.class), |
4695 | 4700 |
n_rpos_real = NA_integer_, n_rpos_random = NA_integer_, |
... | ... |
@@ -4713,13 +4718,13 @@ changeOutputDirectory <- function(GRN, outputDirectory = ".") { |
4713 | 4718 |
} |
4714 | 4719 |
|
4715 | 4720 |
d4 = d4 %>% |
4716 |
- dplyr::mutate(ratio_rneg_rpos_real = n_rneg_real / (n_rneg_real + n_rpos_real), |
|
4717 |
- ratio_rneg_rpos_random = n_rneg_random / (n_rneg_random + n_rpos_random), |
|
4718 |
- peak_gene.p.raw.class.bin = as.numeric(peak_gene.p.raw.class)) %>% |
|
4719 |
- dplyr::arrange(peak_gene.p.raw.class.bin) |
|
4721 |
+ dplyr::mutate(ratio_rneg_rpos_real = .data$n_rneg_real / (.data$n_rneg_real + .data$n_rpos_real), |
|
4722 |
+ ratio_rneg_rpos_random = .data$n_rneg_random / (.data$n_rneg_random + .data$n_rpos_random), |
|
4723 |
+ peak_gene.p.raw.class.bin = as.numeric(.data$peak_gene.p.raw.class)) %>% |
|
4724 |
+ dplyr::arrange(.data$peak_gene.p.raw.class.bin) |
|
4720 | 4725 |
|
4721 | 4726 |
d4_melt = reshape2::melt(d4, id = c("peak_gene.p.raw.class.bin", "peak_gene.p.raw.class")) %>% |
4722 |
- dplyr::filter(grepl("ratio", variable)) |
|
4727 |
+ dplyr::filter(grepl("ratio", .data$variable)) |
|
4723 | 4728 |
|
4724 | 4729 |
|
4725 | 4730 |
list(d = d, d2 = d2, d3 = d3, d4 = d4, d4_melt = d4_melt, d_merged = d_merged) |
... | ... |
@@ -191,7 +191,7 @@ |
191 | 191 |
.readHOCOMOCOTable <- function(file, delim = " ") { |
192 | 192 |
|
193 | 193 |
HOCOMOCO_mapping.df = .read_tidyverse_wrapper(file, type = "delim", delim = delim, col_types = readr::cols()) %>% |
194 |
- dplyr::mutate(ENSEMBL = gsub("\\..+", "", ENSEMBL, perl = TRUE)) # Clean ENSEMBL IDs |
|
194 |
+ dplyr::mutate(ENSEMBL = gsub("\\..+", "", .data$ENSEMBL, perl = TRUE)) # Clean ENSEMBL IDs |
|
195 | 195 |
|
196 | 196 |
checkmate::assertSubset(c("ENSEMBL", "HOCOID"), colnames(HOCOMOCO_mapping.df)) |
197 | 197 |
|
... | ... |
@@ -379,7 +379,7 @@ |
379 | 379 |
TFs_to_change = dplyr::filter(output.global.TFs, (!!as.name(colnameClassification) == "activator" | |
380 | 380 |
!!as.name(colnameClassification) == "repressor") & |
381 | 381 |
!!as.name( colnameClassificationPVal) > !!significanceThreshold_Wilcoxon) %>% |
382 |
- dplyr::pull(TF) |
|
382 |
+ dplyr::pull(.data$TF) |
|
383 | 383 |
|
384 | 384 |
# Filter some TFs to be undetermined |
385 | 385 |
if (length(TFs_to_change) > 0) { |
... | ... |
@@ -496,7 +496,7 @@ |
496 | 496 |
|
497 | 497 |
# Prepare the RNASeq data |
498 | 498 |
TF.specific = dplyr::left_join(HOCOMOCO_mapping, DESeq_results, by = "ENSEMBL") %>% |
499 |
- dplyr::filter(!is.na(baseMean)) |
|
499 |
+ dplyr::filter(!is.na(.data$baseMean)) |
|
500 | 500 |
|
501 | 501 |
if (nrow(TF.specific) == 0) { |
502 | 502 |
message = "The Ensembl IDs from the translation table do not match with the IDs from the RNA-seq counts table." |
... | ... |
@@ -510,7 +510,7 @@ |
510 | 510 |
output.global.TFs.merged = output.global.TFs %>% |
511 | 511 |
dplyr::filter(!!as.name(colnameClassificationCur) != "not-expressed") %>% |
512 | 512 |
dplyr::full_join(TF.specific, by = c( "TF" = "HOCOID")) %>% |
513 |
- dplyr::mutate(baseMeanNorm = (baseMean - min(baseMean, na.rm = TRUE)) / (max(baseMean, na.rm = TRUE) - min(baseMean, na.rm = TRUE)) + par.l$minPointSize) %>% |
|
513 |
+ dplyr::mutate(baseMeanNorm = (.data$baseMean - min(.data$baseMean, na.rm = TRUE)) / (max(.data$baseMean, na.rm = TRUE) - min(.data$baseMean, na.rm = TRUE)) + par.l$minPointSize) %>% |
|
514 | 514 |
dplyr::filter(!is.na(!!as.name(colnameClassificationCur))) |
515 | 515 |
|
516 | 516 |
|
... | ... |
@@ -531,11 +531,11 @@ |
531 | 531 |
signif(cor.res.l[["pearson"]]$p.value,2), "/", |
532 | 532 |
signif(cor.res.l[["spearman"]]$p.value,2), "\n(Pearson/Spearman, stringency: ", thresholdCur, ")") |
533 | 533 |
|
534 |
- g = ggplot(output.global.TFs.cur, aes(weighted_meanDifference, log2FoldChange)) + geom_point(aes(size = baseMeanNorm)) + |
|
535 |
- geom_smooth(method = par.l$regressionMethod, color = par.l$internal$colorCategories[classificationCur]) + |
|
536 |
- ggtitle(titleCur) + |
|
537 |
- ylab("log2 fold-change RNA-seq") + |
|
538 |
- theme_bw() + theme(plot.title = element_text(hjust = 0.5)) |
|
534 |
+ g = ggplot2::ggplot(output.global.TFs.cur, ggplot2::aes(.data$weighted_meanDifference, .data$log2FoldChange)) + ggplot2::geom_point(ggplot2::aes(size = .data$baseMeanNorm)) + |
|
535 |
+ ggplot2::geom_smooth(method = par.l$regressionMethod, color = par.l$internal$colorCategories[classificationCur]) + |
|
536 |
+ ggplot2::ggtitle(titleCur) + |
|
537 |
+ ggplot2::ylab("log2 fold-change RNA-seq") + |
|
538 |
+ ggplot2::theme_bw() + ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) |
|
539 | 539 |
plot(g) |
540 | 540 |
|
541 | 541 |
} |
... | ... |
@@ -583,7 +583,7 @@ |
583 | 583 |
|
584 | 584 |
plot(median.cor.tfs.non, seq_len(nTF), |
585 | 585 |
xlim = xlim, ylim = ylim, main=mainCur, xlab=xlab, ylab=ylab, |
586 |
- col = grDevices::adjustcolor("darkgrey",alpha=1), pch = 16, cex = 0.5,axes = FALSE) |
|
586 |
+ col = grDevices::adjustcolor("darkgrey",alpha.f=1), pch = 16, cex = 0.5, axes = FALSE) |
|
587 | 587 |
points(median.cor.tfs, seq_len(nTF), |
588 | 588 |
pch=16, cex = 0.5, |
589 | 589 |
col= dplyr::case_when(median.cor.tfs > thresCur.v[2] ~ par.l$internal$colorCategories["activator"], |
... | ... |
@@ -640,7 +640,7 @@ |
640 | 640 |
|
641 | 641 |
missingGenes = which(!HOCOMOCO_mapping.df.exp$HOCOID %in% colnames(sort.cor.m)) |
642 | 642 |
if (length(missingGenes) > 0) { |
643 |
- HOCOMOCO_mapping.df.exp = dplyr::filter(HOCOMOCO_mapping.df.exp, HOCOID %in% colnames(sort.cor.m)) |
|
643 |
+ HOCOMOCO_mapping.df.exp = dplyr::filter(HOCOMOCO_mapping.df.exp, .data$HOCOID %in% colnames(sort.cor.m)) |
|
644 | 644 |
} |
645 | 645 |
|
646 | 646 |
if (!is.null(file)) { |
... | ... |
@@ -36,22 +36,22 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE, |
36 | 36 |
# If set to TRUE, the new default, self-loops can happen and the graph is not strictly tripartite anymore |
37 | 37 |
if (model_TF_gene_nodes_separately) { |
38 | 38 |
TF_peak.df = GRN@connections$all.filtered[["0"]] %>% |
39 |
- dplyr::filter(!is.na(gene.ENSEMBL)) %>% |
|
39 |
+ dplyr::filter(!is.na(.data$gene.ENSEMBL)) %>% |
|
40 | 40 |
dplyr::select(c("TF.name", "peak.ID", "TF.ENSEMBL", "TF_peak.r")) %>% |
41 | 41 |
stats::na.omit() %>% |
42 | 42 |
dplyr::mutate(V2_name = NA) %>% |
43 | 43 |
unique() %>% |
44 |
- dplyr::rename(V1 = TF.name, V2 = peak.ID, V1_name = TF.ENSEMBL, r = TF_peak.r) %>% |
|
44 |
+ dplyr::rename(V1 = .data$TF.name, V2 = .data$peak.ID, V1_name = .data$TF.ENSEMBL, r = .data$TF_peak.r) %>% |
|
45 | 45 |
dplyr::mutate_at(c("V1","V2"), as.vector) |
46 | 46 |
} else { |
47 | 47 |
# Get Ensembl ID for TFs here to make a clean join and force a TF that is regulated by a peak to be the same node |
48 | 48 |
TF_peak.df = GRN@connections$all.filtered[["0"]] %>% |
49 |
- dplyr::filter(!is.na(gene.ENSEMBL)) %>% |
|
49 |
+ dplyr::filter(!is.na(.data$gene.ENSEMBL)) %>% |
|
50 | 50 |
dplyr::select(c("TF.ENSEMBL", "peak.ID", "TF.name", "TF_peak.r")) %>% |
51 | 51 |
stats::na.omit() %>% |
52 | 52 |
dplyr::mutate(V2_name = NA) %>% |
53 | 53 |
unique() %>% |
54 |
- dplyr::rename(V1 = TF.ENSEMBL, V2 = peak.ID, V1_name = TF.name, r = TF_peak.r) %>% |
|
54 |
+ dplyr::rename(V1 = .data$TF.ENSEMBL, V2 = .data$peak.ID, V1_name = .data$TF.name, r = .data$TF_peak.r) %>% |
|
55 | 55 |
dplyr::mutate_at(c("V1","V2"), as.vector) |
56 | 56 |
} |
57 | 57 |
|
... | ... |
@@ -60,15 +60,15 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE, |
60 | 60 |
stats::na.omit() %>% |
61 | 61 |
dplyr::mutate(V1_name = NA) %>% |
62 | 62 |
unique() %>% |
63 |
- dplyr::rename(V1 = peak.ID, V2 = gene.ENSEMBL, V2_name = gene.name, r = peak_gene.r) %>% |
|
63 |
+ dplyr::rename(V1 = .data$peak.ID, V2 = .data$gene.ENSEMBL, V2_name = .data$gene.name, r = .data$peak_gene.r) %>% |
|
64 | 64 |
dplyr::mutate_at(c("V1","V2"), as.vector) |
65 | 65 |
|
66 | 66 |
TF_peak_gene.df = dplyr::bind_rows(list(`tf-peak`=TF_peak.df, `peak-gene` = peak_gene.df), .id = "connectionType") %>% |
67 |
- dplyr::select(V1, V2, V1_name, V2_name, r, connectionType) |
|
67 |
+ dplyr::select(.data$V1, .data$V2, .data$V1_name, .data$V2_name, .data$r, .data$connectionType) |
|
68 | 68 |
|
69 | 69 |
TF_gene.df = dplyr::inner_join(TF_peak.df, peak_gene.df, by = c("V2"="V1"), suffix = c(".TF_peak", ".peak_gene")) %>% |
70 |
- dplyr::select(V1, V2.peak_gene, V1_name.TF_peak, V2_name.peak_gene) %>% |
|
71 |
- dplyr::rename(V1_name = V1_name.TF_peak, V2 = V2.peak_gene, V2_name = V2_name.peak_gene) %>% |
|
70 |
+ dplyr::select(.data$V1, .data$V2.peak_gene, .data$V1_name.TF_peak, .data$V2_name.peak_gene) %>% |
|
71 |
+ dplyr::rename(V1_name = .data$V1_name.TF_peak, V2 = .data$V2.peak_gene, V2_name = .data$V2_name.peak_gene) %>% |
|
72 | 72 |
dplyr::distinct() %>% |
73 | 73 |
dplyr::mutate(connectionType = "tf-gene") |
74 | 74 |
|
... | ... |
@@ -105,23 +105,23 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE, |
105 | 105 |
.buildGraph <- function(df, directed, allowLoops, removeMultiple = FALSE, silent = FALSE) { |
106 | 106 |
|
107 | 107 |
# Remove V1_name and V2_name as igraph treats additional columns as edge attribute, which can become messed up as it here refers to vertice attribute |
108 |
- df_mod = df %>% dplyr::select(-V1_name, -V2_name) |
|
108 |
+ df_mod = df %>% dplyr::select(-.data$V1_name, -.data$V2_name) |
|
109 | 109 |
|
110 | 110 |
TF_vertices = df %>% |
111 |
- dplyr::select(V1, V1_name) %>% |
|
112 |
- dplyr::rename(nodeID = V1) %>% |
|
111 |
+ dplyr::select(.data$V1, .data$V1_name) %>% |
|
112 |
+ dplyr::rename(nodeID = .data$V1) %>% |
|
113 | 113 |
dplyr::distinct() %>% |
114 |
- dplyr::group_by(nodeID) %>% |
|
115 |
- dplyr::summarise(names_TF_all = paste0(V1_name, collapse="|"), |
|
114 |
+ dplyr::group_by(.data$nodeID) %>% |
|
115 |
+ dplyr::summarise(names_TF_all = paste0(.data$V1_name, collapse="|"), |
|
116 | 116 |
nTF = dplyr::n(), |
117 | 117 |
isTF = TRUE, .groups = "keep") %>% |
118 | 118 |
dplyr::ungroup() |
119 | 119 |
|
120 | 120 |
gene_vertices = df %>% |
121 |
- dplyr::select(V2, V2_name) %>% |
|
121 |
+ dplyr::select(.data$V2, .data$V2_name) %>% |
|
122 | 122 |
dplyr::distinct() %>% |
123 | 123 |
dplyr::mutate(isGene = TRUE) %>% |
124 |
- dplyr::rename(names_gene = V2_name, nodeID = V2) %>% |
|
124 |
+ dplyr::rename(names_gene = .data$V2_name, nodeID = .data$V2) %>% |
|
125 | 125 |
dplyr::ungroup() |
126 | 126 |
|
127 | 127 |
# Combine vertex metadata |
... | ... |
@@ -147,7 +147,7 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE, |
147 | 147 |
|
148 | 148 |
} |
149 | 149 |
|
150 |
- .printGraphSummary(GRN, graph, silent = silent) |
|
150 |
+ .printGraphSummary(graph, silent = silent) |
|
151 | 151 |
|
152 | 152 |
if (!silent) futile.logger::flog.info(paste0(" Done. Graphs are saved in GRN@graph")) |
153 | 153 |
|
... | ... |
@@ -157,9 +157,9 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE, |
157 | 157 |
.printLoopsGraph <- function(graph_table, silent = FALSE) { |
158 | 158 |
|
159 | 159 |
loop_vertices = graph_table %>% |
160 |
- dplyr::filter(V1 == V2) %>% |
|
161 |
- dplyr::mutate(V1_name_combined = paste0(V1, " (", V1_name, ")")) %>% |
|
162 |
- dplyr::pull(V1_name_combined) |
|
160 |
+ dplyr::filter(.data$V1 == .data$V2) %>% |
|
161 |
+ dplyr::mutate(V1_name_combined = paste0(.data$V1, " (", .data$V1_name, ")")) %>% |
|
162 |
+ dplyr::pull(.data$V1_name_combined) |
|
163 | 163 |
|
164 | 164 |
if (length(loop_vertices) > 0) { |
165 | 165 |
if (!silent) futile.logger::flog.info(paste0(" The following nodes / vertices have loop edges (TF regulating itself):\n", paste0(loop_vertices, collapse = ", "))) |
... | ... |
@@ -171,7 +171,7 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE, |
171 | 171 |
|
172 | 172 |
|
173 | 173 |
multipleEdges = graph_table %>% |
174 |
- dplyr::group_by(V1,V2) %>% |
|
174 |
+ dplyr::group_by(.data$V1, .data$V2) %>% |
|
175 | 175 |
dplyr::summarize(n = dplyr::n(), .groups = "keep") %>% |
176 | 176 |
dplyr::ungroup() %>% |
177 | 177 |
dplyr::filter(.data$n>1) |
... | ... |
@@ -183,7 +183,7 @@ build_eGRN_graph <- function(GRN, model_TF_gene_nodes_separately = FALSE, |
183 | 183 |
} |
184 | 184 |
|
185 | 185 |
|
186 |
-.printGraphSummary <- function(GRN, graph, silent = FALSE) { |
|
186 |
+.printGraphSummary <- function(graph, silent = FALSE) { |
|
187 | 187 |
if (!silent) futile.logger::flog.info(paste0(" Graph summary:")) |
188 | 188 |
nVertices = length(igraph::V(graph)) |
189 | 189 |
nEdges = length(igraph::E(graph)) |
... | ... |
@@ -275,28 +275,41 @@ performAllNetworkAnalyses <- function(GRN, ontology = c("GO_BP", "GO_MF"), |
275 | 275 |
|
276 | 276 |
|
277 | 277 |
# Retrieve set of background genes (as vector) used for enrichment analyses from a GRN object |
278 |
-.getBackgroundGenes <-function(GRN, type = "neighborhood") { |
|
278 |
+.getBackgroundGenes <-function(GRN, type = "neighborhood", gene.types = "all") { |
|
279 | 279 |
|
280 |
- checkmate::assertChoice(type, c("all_annotated", "all_RNA", "neighborhood"), empty.ok = FALSE) |
|
280 |
+ checkmate::assertChoice(type, c("all_annotated", "all_RNA", "all_RNA_filtered", "neighborhood")) |
|
281 | 281 |
|
282 | 282 |
if (type == "all_annotated") { |
283 | 283 |
|
284 |
- backgroundGenes = geneAnnotation[[GRN@config$parameters$genomeAssembly]]$gene.ENSEMBL |
|
285 |
- |
|
284 |
+ backgroundGenes = GRN@annotation$genes$gene.ENSEMBL |
|
285 |
+ |
|
286 | 286 |
} else if (type == "all_RNA") { |
287 | 287 |
|
288 |
- if (checkmate::testClass(GRN@data$RNA$counts_orig, "DESeqDataSet")) { |
|
289 |
- backgroundGenes = rownames(GRN@data$RNA$counts_orig) |
|
290 |
- } else { |
|
291 |
- backgroundGenes = GRN@data$RNA$counts_orig$ENSEMBL |
|
292 |
- } |
|
288 |
+ backgroundGenes = GRN@data$RNA$counts_norm.l[["0"]] %>% |
|
289 |
+ dplyr::pull(.data$ENSEMBL) |
|
293 | 290 |
|
294 | 291 |
|
292 |
+ } else if (type == "all_RNA_filtered") { |
|
293 |
+ |
|
294 |
+ backgroundGenes = GRN@data$RNA$counts_norm.l[["0"]] %>% |
|
295 |
+ dplyr::filter(!.data$isFiltered) %>% |
|
296 |
+ dplyr::pull(.data$ENSEMBL) |
|
297 |
+ |
|
298 |
+ |
|
295 | 299 |
} else if (type == "neighborhood") { |
296 | 300 |
|
297 | 301 |
# Retrieve only those who are in the neighborhood of genes |
298 | 302 |
backgroundGenes = levels(GRN@connections$peak_genes[["0"]]$gene.ENSEMBL) |
299 | 303 |
} |
304 |
+ |
|
305 |
+ |
|
306 |
+ # Filter genes by gene.type. |
|
307 |
+ if (gene.types != "all") { |
|
308 |
+ backgroundGenes = dplyr::filter(GRN@annotation$genes, |
|
309 |
+ .data$gene.ENSEMBL %in% backgroundGenes, |
|
310 |
+ .data$gene.type %in% gene.types) %>% |
|
311 |
+ dplyr::pull(.data$gene.ENSEMBL) |
|
312 |
+ } |
|
300 | 313 |
|
301 | 314 |
|
302 | 315 |
backgroundGenes |
... | ... |
@@ -304,6 +317,7 @@ performAllNetworkAnalyses <- function(GRN, ontology = c("GO_BP", "GO_MF"), |
304 | 317 |
} |
305 | 318 |
|
306 | 319 |
|
320 |
+ |
|
307 | 321 |
#' Run an enrichment analysis for the genes in the whole network in the filtered \code{\linkS4class{GRN}} object |
308 | 322 |
#' |
309 | 323 |
#' The enrichment analysis is based on the whole network, see \code{\link{calculateCommunitiesEnrichment}} and \code{\link{calculateTFEnrichment}} for |
... | ... |
@@ -323,7 +337,10 @@ performAllNetworkAnalyses <- function(GRN, ontology = c("GO_BP", "GO_MF"), |
323 | 337 |
#' As they are listed under \code{Suggests}, they may not yet be installed, and the function will throw an error if they are missing. |
324 | 338 |
#' @param algorithm Character. Default \code{"weight01"}. One of: \code{"classic"}, \code{"elim"}, \code{"weight"}, \code{"weight01"}, \code{"lea"}, \code{"parentchild"}. Only relevant if ontology is GO related (GO_BP, GO_MF, GO_CC), ignored otherwise. Name of the algorithm that handles the GO graph structures. Valid inputs are those supported by the \code{topGO} library. |
325 | 339 |
#' @param statistic Character. Default \code{"fisher"}. One of: \code{"fisher"}, \code{"ks"}, \code{"t"}, \code{"globaltest"}, \code{"sum"}, \code{"ks.ties"}. Statistical test to be used. Only relevant if ontology is GO related (GO_BP, GO_MF, GO_CC), and valid inputs are those supported by the topGO library, ignored otherwise. For the other ontologies the test statistic is always Fisher. |
326 |
-#' @param background Character. Default \code{"neighborhood"}. One of: \code{"all_annotated"}, \code{"all_RNA"}, \code{"neighborhood"}. Set of genes to be used to construct the background for the enrichment analysis. This can either be all annotated genes in the reference genome (all_annotated), all differentially expressed genes (all_RNA), or all the genes that are within the neighborhood of a peak in the GRN (neighborhood) |
|
340 |
+#' @param background Character. Default \code{"neighborhood"}. One of: \code{"all_annotated"}, \code{"all_RNA"}, \code{"all_RNA_filtered"}, \code{"neighborhood"}. Set of genes to be used to construct the background for the enrichment analysis. This can either be all annotated genes in the reference genome (\code{all_annotated}), all genes from the provided RNA data (\code{all_RNA}), all genes from the provided RNA data excluding those marked as filtered after executing \code{filterData} (\code{all_RNA_filtered}), or all the genes that are within the neighborhood of any peak (before applying any filters except for the user-defined \code{promoterRange} value in \code{addConnections_peak_gene}) (\code{neighborhood}). |
|
341 |
+#' @param background_geneTypes Character vector of gene types that should be considered for the background. Default \code{"all"}. |
|
342 |
+#' Only gene types as defined in the \code{\linkS4class{GRN}} object, slot \code{GRN@annotation$genes$gene.type} are allowed. |
|
343 |
+#' The special keyword \code{"all"} means no filter on gene type. |
|
327 | 344 |
#' @param pAdjustMethod Character. Default \code{"BH"}. One of: \code{"holm"}, \code{"hochberg"}, \code{"hommel"}, \code{"bonferroni"}, \code{"BH"}, \code{"BY"}, \code{"fdr"}. This parameter is only relevant for the following ontologies: KEGG, DO, Reactome. For the other ontologies, the algorithm serves as an adjustment. |
328 | 345 |
#' @template forceRerun |
329 | 346 |
#' @return An updated \code{\linkS4class{GRN}} object, with the enrichment results stored in the \code{stats$Enrichment$general} slot. |
... | ... |
@@ -336,13 +353,10 @@ performAllNetworkAnalyses <- function(GRN, ontology = c("GO_BP", "GO_MF"), |
336 | 353 |
#' GRN = loadExampleObject() |
337 | 354 |
#' GRN = calculateGeneralEnrichment(GRN, ontology = "GO_BP", forceRerun = FALSE) |
338 | 355 |
#' @export |
339 |
-# #' @importFrom topGO whichAlgorithms whichTests |
|
340 |
- |
|
341 |
-# Deactivated temporarily |
|
342 |
-# @import BiocManager |
|
343 | 356 |
calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"), |
344 | 357 |
algorithm = "weight01", statistic = "fisher", |
345 |
- background = "neighborhood", pAdjustMethod = "BH", forceRerun = FALSE) { |
|
358 |
+ background = "neighborhood", background_geneTypes = "all", |
|
359 |
+ pAdjustMethod = "BH", forceRerun = FALSE) { |
|
346 | 360 |
|
347 | 361 |
start = Sys.time() |
348 | 362 |
checkmate::assertClass(GRN, "GRN") |
... | ... |
@@ -352,7 +366,8 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"), |
352 | 366 |
|
353 | 367 |
.checkPackage_topGO_and_arguments(ontology, algorithm, statistic) |
354 | 368 |
|
355 |
- checkmate::assertChoice(background, c("all_annotated", "all_RNA", "neighborhood")) |
|
369 |
+ checkmate::assertChoice(background, c("all_annotated", "all_RNA", "all_RNA_filtered", "neighborhood")) |
|
370 |
+ checkmate::assertSubset(background_geneTypes, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE) |
|
356 | 371 |
checkmate::assertChoice(pAdjustMethod, c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")) |
357 | 372 |
checkmate::assertFlag(forceRerun) |
358 | 373 |
|
... | ... |
@@ -360,7 +375,7 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"), |
360 | 375 |
|
361 | 376 |
|
362 | 377 |
mapping = .getGenomeObject(GRN@config$parameters$genomeAssembly, type = "packageName") |
363 |
- backgroundGenes = .getBackgroundGenes(GRN, type = background) |
|
378 |
+ backgroundGenes = .getBackgroundGenes(GRN, type = background, gene.types = background_geneTypes) |
|
364 | 379 |
|
365 | 380 |
futile.logger::flog.info(paste0("Calculating general enrichment statistics. This may take a while")) |
366 | 381 |
|
... | ... |
@@ -375,7 +390,8 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"), |
375 | 390 |
# run general enrichment analysis and store tabulated results in GRN object |
376 | 391 |
# Only use the "targets", i.e., genes as foreground because it would artificially enrich for TF terms, such as "DNA-binding" "transcription activation" type terms. |
377 | 392 |
GRN@stats[["Enrichment"]][["general"]][[ontologyCur]] = |
378 |
- .runEnrichment(foreground = GRN@graph$TF_gene$table$V2, |
|
393 |
+ .runEnrichment(GRN, |
|
394 |
+ foreground = GRN@graph$TF_gene$table$V2, |
|
379 | 395 |
background = backgroundGenes, |
380 | 396 |
backgroundStr = background, |
381 | 397 |
ontology = ontologyCur, algorithm = algorithm, statistic = statistic, |
... | ... |
@@ -401,25 +417,6 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"), |
401 | 417 |
} |
402 | 418 |
|
403 | 419 |
|
404 |
-.getBackgroundGenes <-function(GRN, type = "neighborhood") { |
|
405 |
- # Retrieve set of background genes (as vector) used for enrichment analyses from a GRN object |
|
406 |
- checkmate::assertChoice(type, c("all_annotated", "all_RNA", "neighborhood")) |
|
407 |
- if (type == "all_annotated") { |
|
408 |
- backgroundGenes = geneAnnotation[[GRN@config$parameters$genomeAssembly]]$gene.ENSEMBL |
|
409 |
- } else if (type == "all_RNA") { |
|
410 |
- if (checkmate::testClass(GRN@data$RNA$counts_orig, "DESeqDataSet")) { |
|
411 |
- backgroundGenes = rownames(GRN@data$RNA$counts_orig) |
|
412 |
- } else { |
|
413 |
- backgroundGenes = GRN@data$RNA$counts_orig$ENSEMBL |
|
414 |
- } |
|
415 |
- } else if (type == "neighborhood") { |
|
416 |
- # Retrieve only those who are in the neighborhood of genes |
|
417 |
- backgroundGenes = levels(GRN@connections$peak_genes[["0"]]$gene.ENSEMBL) |
|
418 |
- } |
|
419 |
- backgroundGenes |
|
420 |
-} |
|
421 |
- |
|
422 |
- |
|
423 | 420 |
.combineEnrichmentResults <- function(GRN, type, ontology, p, nSignificant, display_pAdj) { |
424 | 421 |
|
425 | 422 |
if (type == "byCommunity") { |
... | ... |
@@ -433,7 +430,7 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"), |
433 | 430 |
lapply(function(x) {x[[ontology]]$results}) %>% |
434 | 431 |
dplyr::bind_rows(.id = idMerge) %>% |
435 | 432 |
dplyr::select(-tidyselect::starts_with("topG")) %>% |
436 |
- dplyr::mutate(pval = as.numeric(pval)) %>% |
|
433 |
+ dplyr::mutate(pval = as.numeric(.data$pval)) %>% |
|
437 | 434 |
tibble::as_tibble()) |
438 | 435 |
|
439 | 436 |
# p-adjust only available for non-GO ontologies |
... | ... |
@@ -454,18 +451,18 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"), |
454 | 451 |
|
455 | 452 |
# get enriched terms from general enrichment and make sure it is kept for the community enrichment |
456 | 453 |
enrichedTermsGeneral = enrichmentGeneral %>% |
457 |
- dplyr::filter(pval <= p, Found >= nSignificant) %>% |
|
458 |
- dplyr::pull(ID) |
|
454 |
+ dplyr::filter(.data$pval <= p, .data$Found >= nSignificant) %>% |
|
455 |
+ dplyr::pull(.data$ID) |
|
459 | 456 |
|
460 | 457 |
enrichedTermsGrouped = resultsCombined.df %>% |
461 |
- dplyr::filter(pval <= p, Found >= nSignificant) %>% |
|
462 |
- dplyr::pull(ID) |
|
458 |
+ dplyr::filter(.data$pval <= p, .data$Found >= nSignificant) %>% |
|
459 |
+ dplyr::pull(.data$ID) |
|
463 | 460 |
|
464 | 461 |
all.df = resultsCombined.df %>% |
465 | 462 |
rbind(enrichmentGeneral) %>% |
466 |
- dplyr::mutate(ID = as.factor(ID), |
|
467 |
- pval =as.numeric(gsub(">|<", "", pval))) %>% |
|
468 |
- dplyr::filter(pval <= p & (Found >= nSignificant | ID %in% c(enrichedTermsGeneral, enrichedTermsGrouped))) |
|
463 |
+ dplyr::mutate(ID = as.factor(.data$ID), |
|
464 |
+ pval =as.numeric(gsub(">|<", "", .data$pval))) %>% |
|
465 |
+ dplyr::filter(.data$pval <= p & (.data$Found >= nSignificant | .data$ID %in% c(enrichedTermsGeneral, enrichedTermsGrouped))) |
|
469 | 466 |
|
470 | 467 |
all.df[, idMerge] = as.factor(all.df[, idMerge, drop = TRUE]) |
471 | 468 |
|
... | ... |
@@ -493,7 +490,7 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"), |
493 | 490 |
} |
494 | 491 |
|
495 | 492 |
#' @importFrom biomaRt useEnsembl getBM |
496 |
-.runEnrichment <- function(foreground, background, backgroundStr, database = "GO", ontology, |
|
493 |
+.runEnrichment <- function(GRN, foreground, background, backgroundStr, database = "GO", ontology, |
|
497 | 494 |
description = "Enrichment Analysis", |
498 | 495 |
algorithm="weight01", statistic = "fisher", mapping, pAdjustMethod = "BH", minGSSize = 0, maxGSSize = 5000){ |
499 | 496 |
|
... | ... |
@@ -591,8 +588,8 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"), |
591 | 588 |
# Dont trim GO terms here, happens later when plotting |
592 | 589 |
result.tbl = unique(topGO::GenTable(go_enrichment, pval = result, orderBy = "pval", numChar = 1000, |
593 | 590 |
topNodes = length(topGO::score(result))) ) %>% |
594 |
- dplyr::rename(ID = GO.ID, Found = Significant) %>% # make it more clear what Significant refers to here |
|
595 |
- dplyr::mutate(GeneRatio = Found / nForeground) |
|
591 |
+ dplyr::rename(ID = .data$GO.ID, Found = .data$Significant) %>% # make it more clear what Significant refers to here |
|
592 |
+ dplyr::mutate(GeneRatio = .data$Found / nForeground) |
|
596 | 593 |
|
597 | 594 |
|
598 | 595 |
result.list[["results"]] = result.tbl |
... | ... |
@@ -720,7 +717,7 @@ calculateGeneralEnrichment <- function(GRN, ontology = c("GO_BP", "GO_MF"), |
720 | 717 |
if (!is.null(enrichmentObj)) { |
721 | 718 |
|
722 | 719 |
result.tbl = enrichmentObj@result %>% |
723 |
- dplyr::rename(Term = Description, pval = .data$pvalue, Found = Count) %>% |
|
720 |
+ dplyr::rename(Term = .data$Description, pval = .data$pvalue, Found = .data$Count) %>% |
|
724 | 721 |
dplyr::mutate(GeneRatio = sapply(parse (text = enrichmentObj@result$GeneRatio), eval)) |
725 | 722 |
|
726 | 723 |
} else { |
... | ... |
@@ -867,7 +864,8 @@ calculateCommunitiesStats <- function(GRN, clustering = "louvain", forceRerun = |
867 | 864 |
# #' @importFrom topGO whichAlgorithms whichTests |
868 | 865 |
calculateCommunitiesEnrichment <- function(GRN, |
869 | 866 |
ontology = c("GO_BP", "GO_MF"), algorithm = "weight01", |
870 |
- statistic = "fisher", background = "neighborhood", |
|
867 |
+ statistic = "fisher", |
|
868 |
+ background = "neighborhood", background_geneTypes = "all", |
|
871 | 869 |
selection = "byRank", communities = seq_len(10), |
872 | 870 |
pAdjustMethod = "BH", |
873 | 871 |
forceRerun = FALSE) { |
... | ... |
@@ -881,7 +879,8 @@ calculateCommunitiesEnrichment <- function(GRN, |
881 | 879 |
|
882 | 880 |
.checkPackage_topGO_and_arguments(ontology, algorithm, statistic) |
883 | 881 |
|
884 |
- checkmate::assertChoice(background, c("all_annotated", "all_RNA", "neighborhood")) |
|
882 |
+ checkmate::assertChoice(background, c("all_annotated", "all_RNA", "all_RNA_filtered", "neighborhood")) |
|
883 |
+ checkmate::assertSubset(background_geneTypes, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE) |
|
885 | 884 |
checkmate::assertChoice(selection, c("byRank", "byLabel")) |
886 | 885 |
checkmate::assertChoice(pAdjustMethod, c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")) |
887 | 886 |
checkmate::assertFlag(forceRerun) |
... | ... |
@@ -914,7 +913,7 @@ calculateCommunitiesEnrichment <- function(GRN, |
914 | 913 |
|
915 | 914 |
|
916 | 915 |
mapping = .getGenomeObject(GRN@config$parameters$genomeAssembly, type = "packageName") |
917 |
- backgroundGenes = .getBackgroundGenes(GRN, type = background) |
|
916 |
+ backgroundGenes = .getBackgroundGenes(GRN, type = background, gene.types = background_geneTypes) |
|
918 | 917 |
|
919 | 918 |
if (is.null(GRN@stats[["Enrichment"]][["byCommunity"]])) { |
920 | 919 |
GRN@stats[["Enrichment"]][["byCommunity"]] = list() |
... | ... |
@@ -930,15 +929,16 @@ calculateCommunitiesEnrichment <- function(GRN, |
930 | 929 |
|
931 | 930 |
|
932 | 931 |
foregroundCur = vertexMetadata %>% |
933 |
- dplyr::filter(community == communityCur) %>% |
|
934 |
- dplyr::pull(name) |
|
932 |
+ dplyr::filter(.data$community == communityCur) %>% |
|
933 |
+ dplyr::pull(.data$name) |
|
935 | 934 |
|
936 | 935 |
for (ontologyCur in ontology) { |
937 | 936 |
|
938 | 937 |
if (is.null(GRN@stats[["Enrichment"]][["byCommunity"]][[communityCur]][[ontologyCur]]) | forceRerun) { |
939 | 938 |
|
940 | 939 |
GRN@stats[["Enrichment"]][["byCommunity"]][[communityCur]][[ontologyCur]] = |
941 |
- .runEnrichment(foreground = foregroundCur, |
|
940 |
+ .runEnrichment(GRN, |
|
941 |
+ foreground = foregroundCur, |
|
942 | 942 |
background = backgroundGenes, |
943 | 943 |
backgroundStr = background, |
944 | 944 |
ontology = ontologyCur, |
... | ... |
@@ -967,7 +967,7 @@ calculateCommunitiesEnrichment <- function(GRN, |
967 | 967 |
|
968 | 968 |
df = igraph::vertex.attributes(GRN@graph[[graph]]$graph) %>% |
969 | 969 |
as.data.frame() %>% |
970 |
- dplyr::count(community) |
|
970 |
+ dplyr::count(.data$community) |
|
971 | 971 |
|
972 | 972 |
|
973 | 973 |
if (is.null(communities)) { |
... | ... |
@@ -976,7 +976,7 @@ calculateCommunitiesEnrichment <- function(GRN, |
976 | 976 |
selCommunities = df %>% |
977 | 977 |
dplyr::arrange(dplyr::desc(.data$n)) %>% |
978 | 978 |
dplyr::slice(communities) %>% |
979 |
- dplyr::pull(community) %>% |
|
979 |
+ dplyr::pull(.data$community) %>% |
|
980 | 980 |
as.character() |
981 | 981 |
|
982 | 982 |
if (length(selCommunities) == 0) { |
... | ... |
@@ -1049,22 +1049,22 @@ getTopNodes <- function(GRN, nodeType, rankType, n = 0.1, use_TF_gene_network = |
1049 | 1049 |
if (rankType == "degree"){ |
1050 | 1050 |
col = dplyr::if_else(nodeType == "gene", "V2", "V1") |
1051 | 1051 |
topNodes = graph.df %>% |
1052 |
- dplyr::filter(connectionType == link) %>% |
|
1052 |
+ dplyr::filter(.data$connectionType == link) %>% |
|
1053 | 1053 |
dplyr::count(!!as.name(col), sort = TRUE) %>% |
1054 | 1054 |
# dplyr::rename(!!slot := V1, Connections = n) %>% |
1055 | 1055 |
dplyr::rename(Connections = n) %>% |
1056 |
- dplyr::arrange(dplyr::desc(Connections)) %>% |
|
1056 |
+ dplyr::arrange(dplyr::desc(.data$Connections)) %>% |
|
1057 | 1057 |
dplyr::slice(seq_len(top.n)) |
1058 | 1058 |
|
1059 | 1059 |
# TODO: change column names |
1060 | 1060 |
if (nodeType == "gene") { |
1061 | 1061 |
topNodes = topNodes %>% |
1062 |
- dplyr::left_join(graph.df %>% dplyr::select(V2, V2_name) %>% dplyr::distinct(), by = "V2") %>% |
|
1063 |
- dplyr::rename(gene.ENSEMBL = V2, gene.name = V2_name) |
|
1062 |
+ dplyr::left_join(graph.df %>% dplyr::select(.data$V2, .data$V2_name) %>% dplyr::distinct(), by = "V2") %>% |
|
1063 |
+ dplyr::rename(gene.ENSEMBL = .data$V2, gene.name = .data$V2_name) |
|
1064 | 1064 |
} else { |
1065 | 1065 |
topNodes = topNodes %>% |
1066 |
- dplyr::left_join(graph.df %>% dplyr::select(V1, V1_name) %>% dplyr::distinct(), by = "V1") %>% |
|
1067 |
- dplyr::rename(TF.ENSEMBL = V1, TF.name = V1_name) |
|
1066 |
+ dplyr::left_join(graph.df %>% dplyr::select(.data$V1, .data$V1_name) %>% dplyr::distinct(), by = "V1") %>% |
|
1067 |
+ dplyr::rename(TF.ENSEMBL = .data$V1, TF.name = .data$V1_name) |
|
1068 | 1068 |
} |
1069 | 1069 |
|
1070 | 1070 |
|
... | ... |
@@ -1076,7 +1076,7 @@ getTopNodes <- function(GRN, nodeType, rankType, n = 0.1, use_TF_gene_network = |
1076 | 1076 |
|
1077 | 1077 |
# Remove unnecessary extra column |
1078 | 1078 |
if ("name_plot" %in% colnames(topNodes)) { |
1079 |
- topNodes = dplyr::select(topNodes, -name_plot) |
|
1079 |
+ topNodes = dplyr::select(topNodes, -.data$name_plot) |
|
1080 | 1080 |
} |
1081 | 1081 |
|
1082 | 1082 |
.printExecutionTime(start) |
... | ... |
@@ -1108,7 +1108,8 @@ getTopNodes <- function(GRN, nodeType, rankType, n = 0.1, use_TF_gene_network = |
1108 | 1108 |
# #' @importFrom topGO whichAlgorithms whichTests |
1109 | 1109 |
calculateTFEnrichment <- function(GRN, rankType = "degree", n = 3, TF.names = NULL, |
1110 | 1110 |
ontology = c("GO_BP", "GO_MF"), algorithm = "weight01", |
1111 |
- statistic = "fisher", background = "neighborhood", |
|
1111 |
+ statistic = "fisher", |
|
1112 |
+ background = "neighborhood", background_geneTypes = "all", |
|
1112 | 1113 |
pAdjustMethod = "BH", |
1113 | 1114 |
forceRerun = FALSE){ |
1114 | 1115 |
|
... | ... |
@@ -1122,7 +1123,8 @@ calculateTFEnrichment <- function(GRN, rankType = "degree", n = 3, TF.names = NU |
1122 | 1123 |
|
1123 | 1124 |
.checkPackage_topGO_and_arguments(ontology, algorithm, statistic) |
1124 | 1125 |
|
1125 |
- checkmate::assertChoice(background, c("all_annotated", "all_RNA", "neighborhood")) |
|
1126 |
+ checkmate::assertChoice(background, c("all_annotated", "all_RNA", "all_RNA_filtered", "neighborhood")) |
|
1127 |
+ checkmate::assertSubset(background_geneTypes, c("all", unique(as.character(GRN@annotation$genes$gene.type))) %>% stats::na.omit(), empty.ok = FALSE) |
|
1126 | 1128 |
checkmate::assertChoice(pAdjustMethod, c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")) |
1127 | 1129 |
checkmate::assertFlag(forceRerun) |
1128 | 1130 |
|
... | ... |
@@ -1138,7 +1140,7 @@ calculateTFEnrichment <- function(GRN, rankType = "degree", n = 3, TF.names = NU |
1138 | 1140 |
} else{ |
1139 | 1141 |
|
1140 | 1142 |
# TF.name is always there, irrespective of whether ENSEMBL ID or TF name is used as primary ID type |
1141 |
- TFset = getTopNodes(GRN, nodeType = "TF", rankType = rankType, n, use_TF_gene_network = TRUE) %>% dplyr::pull(TF.name) |
|
1143 |
+ TFset = getTopNodes(GRN, nodeType = "TF", rankType = rankType, n, use_TF_gene_network = TRUE) %>% dplyr::pull(.data$TF.name) |
|
1142 | 1144 |
} |
1143 | 1145 |
|
1144 | 1146 |
# TODO: Continue working on the TF.name level or switch to Ensembl? Should be in concordance with the graph! |
... | ... |
@@ -1165,12 +1167,12 @@ calculateTFEnrichment <- function(GRN, rankType = "degree", n = 3, TF.names = NU |
1165 | 1167 |
|
1166 | 1168 |
# get the genes associated with current top TF |
1167 | 1169 |
curGenes = GRN@connections$all.filtered$`0` %>% |
1168 |
- dplyr::filter(TF.name == TF) %>% |
|
1169 |
- dplyr::pull(gene.ENSEMBL) %>% |
|
1170 |
+ dplyr::filter(.data$TF.name == TF) %>% |
|
1171 |
+ dplyr::pull(.data$gene.ENSEMBL) %>% |
|
1170 | 1172 |
unique() |
1171 | 1173 |
|
1172 | 1174 |
|
1173 |
- backgroundGenes = .getBackgroundGenes(GRN, type = background) |
|
1175 |
+ backgroundGenes = .getBackgroundGenes(GRN, type = background, gene.types = background_geneTypes) |
|
1174 | 1176 |
|
1175 | 1177 |
for (ontologyCur in ontology) { |
1176 | 1178 |
|
... | ... |
@@ -1184,7 +1186,8 @@ calculateTFEnrichment <- function(GRN, rankType = "degree", n = 3, TF.names = NU |
1184 | 1186 |
|
1185 | 1187 |
|
1186 | 1188 |
GRN@stats[["Enrichment"]][["byTF"]][[TF]][[ontologyCur]] = |
1187 |
- .runEnrichment(foreground = curGenes, |
|
1189 |
+ .runEnrichment(GRN, |
|
1190 |
+ foreground = curGenes, |
|
1188 | 1191 |
background = backgroundGenes, |
1189 | 1192 |
backgroundStr = background, |
1190 | 1193 |
ontology = ontologyCur, |
... | ... |
@@ -1,3 +1,6 @@ |
1 |
+# TODO: tidyselect:::where has only been properly exported in August 2022, see here: https://github.com/r-lib/tidyselect/pull/274 |
|
2 |
+# As a workaround, as recommended, we use this here for now: |
|
3 |
+utils::globalVariables("where") |
|
1 | 4 |
|
2 | 5 |
############### PCA ############### |
3 | 6 |
|
... | ... |
@@ -18,8 +21,8 @@ |
18 | 21 |
#' @return An updated \code{\linkS4class{GRN}} object. |
19 | 22 |
#' @examples |
20 | 23 |
#' # See the Workflow vignette on the GRaNIE website for examples |
21 |
-#' # GRN = loadExampleObject() |
|
22 |
-#' # GRN = plotPCA_all(GRN, topn = 500, type = "rna", plotAsPDF = FALSE) |
|
24 |
+#' GRN = loadExampleObject() |
|
25 |
+#' GRN = plotPCA_all(GRN, topn = 500, data = "rna", type = "raw", plotAsPDF = FALSE, pages = 1) |
|
23 | 26 |
#' @export |
24 | 27 |
plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL, |
25 | 28 |
data = c("rna", "peaks"), topn = c(500,1000,5000), type = c("raw", "normalized"), |
... | ... |
@@ -178,12 +181,12 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL, |
178 | 181 |
} else if (transformation == "log2") { |
179 | 182 |
checkmate::assertMatrix(counts) |
180 | 183 |
counts.transf = log2(counts + 1) |
181 |
- metadata = GRN@data$metadata %>% dplyr::filter(sampleID %in% colnames(counts.transf)) %>% tibble::column_to_rownames("sampleID") %>% as.data.frame() |
|
184 |
+ metadata = GRN@data$metadata %>% dplyr::filter(.data$sampleID %in% colnames(counts.transf)) %>% tibble::column_to_rownames("sampleID") %>% as.data.frame() |
|
182 | 185 |
|
183 | 186 |
} else { |
184 | 187 |
checkmate::assertMatrix(counts) |
185 | 188 |
counts.transf = counts |
186 |
- metadata = GRN@data$metadata %>% dplyr::filter(sampleID %in% colnames(counts.transf)) %>% tibble::column_to_rownames("sampleID") %>% as.data.frame() |
|
189 |
+ metadata = GRN@data$metadata %>% dplyr::filter(.data$sampleID %in% colnames(counts.transf)) %>% tibble::column_to_rownames("sampleID") %>% as.data.frame() |
|
187 | 190 |
} |
188 | 191 |
|
189 | 192 |
# Override scaling when already vst-transformed |
... | ... |
@@ -232,18 +235,18 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL, |
232 | 235 |
percentVar <- pca$sdev^2 / sum( pca$sdev^2 ) |
233 | 236 |
|
234 | 237 |
screeplot.df = tibble::tibble(PCs = factor(paste0("PC ", seq_len(nPCS)), levels = paste0("PC ", seq_len(nPCS))), variation = percentVar[seq_len(nPCS)] * 100) %>% |
235 |
- dplyr::mutate(variation_sum = cumsum(variation), |
|
238 |
+ dplyr::mutate(variation_sum = cumsum(.data$variation), |
|
236 | 239 |
pos = seq_len(nPCS)) |
237 | 240 |
|
238 | 241 |
# 1. Scree plot |
239 | 242 |
if (is.null(pages) | (!is.null(pages) && pageCounter %in% pages)) { |
240 | 243 |
|
241 |
- g = ggplot(screeplot.df, aes(PCs, variation)) + geom_bar(stat = "identity") + |
|
242 |
- xlab("Principal component (PC)") + ylab("Explained variation / Cumultative sum (in %)") + |
|
243 |
- geom_point(data = screeplot.df, aes(pos, variation_sum), color = "red") + |
|
244 |
- geom_line(data = screeplot.df, aes(pos, variation_sum), color = "red") + |
|
245 |
- ggtitle(paste0("Screeplot for top ", topn, " variable features")) + |
|
246 |
- theme_bw() |
|
244 |
+ g = ggplot2::ggplot(screeplot.df, ggplot2::aes(.data$PCs, .data$variation)) + ggplot2::geom_bar(stat = "identity") + |
|
245 |
+ ggplot2::xlab("Principal component (PC)") + ggplot2::ylab("Explained variation / Cumultative sum (in %)") + |
|
246 |
+ ggplot2::geom_point(data = screeplot.df, ggplot2::aes(.data$pos, .data$variation_sum), color = "red") + |
|
247 |
+ ggplot2::geom_line(data = screeplot.df, ggplot2::aes(.data$pos, .data$variation_sum), color = "red") + |
|
248 |
+ ggplot2::ggtitle(paste0("Screeplot for top ", topn, " variable features")) + |
|
249 |
+ ggplot2::theme_bw() |
|
247 | 250 |
|
248 | 251 |
plot(g) |
249 | 252 |
} |
... | ... |
@@ -290,24 +293,24 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL, |
290 | 293 |
dplyr::mutate_if(is.logical, as.factor) |
291 | 294 |
|
292 | 295 |
|
293 |
- g = g = ggplot(data=d, aes(x=PC1, y=PC2, color=group)) + geom_point(size=3) + |
|
294 |
- xlab(paste0("PC1: ",round(percentVar[1] * 100, 1),"% variance")) + |
|
295 |
- ylab(paste0("PC2: ",round(percentVar[2] * 100, 1),"% variance")) + |
|
296 |
- ggtitle(plotTitle) + coord_fixed() |
|
296 |
+ g = g = ggplot2::ggplot(data=d, ggplot2::aes(x=.data$PC1, y=.data$PC2, color=group)) + ggplot2::geom_point(size=3) + |
|
297 |
+ ggplot2::xlab(paste0("PC1: ",round(percentVar[1] * 100, 1),"% variance")) + |
|
298 |
+ ggplot2::ylab(paste0("PC2: ",round(percentVar[2] * 100, 1),"% variance")) + |
|
299 |
+ ggplot2::ggtitle(plotTitle) + ggplot2::coord_fixed() |
|
297 | 300 |
|
298 | 301 |
|
299 | 302 |
|
300 | 303 |
if (is.factor(d$group)) { |
301 |
- g = g + scale_color_viridis_d() |
|
304 |
+ g = g + ggplot2::scale_color_viridis_d() |
|
302 | 305 |
|
303 | 306 |
} else { |
304 | 307 |
|
305 | 308 |
is.date <- function(x) inherits(x, 'Date') |
306 | 309 |
|
307 | 310 |
if (!is.date(d$group)) { |
308 |
- g = g + scale_color_viridis_c() |
|
311 |
+ g = g + ggplot2::scale_color_viridis_c() |
|
309 | 312 |
} else { |
310 |
- g = g + scale_color_viridis_c(trans = "date") |
|
313 |
+ g = g + ggplot2::scale_color_viridis_c(trans = "date") |
|
311 | 314 |
} |
312 | 315 |
|
313 | 316 |
|
... | ... |
@@ -315,7 +318,7 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL, |
315 | 318 |
|
316 | 319 |
|
317 | 320 |
if (skipLegend) { |
318 |
- g = g + theme(legend.position = "none") |
|
321 |
+ g = g + ggplot2::theme(legend.position = "none") |
|
319 | 322 |
} |
320 | 323 |
|
321 | 324 |
plot(g) |
... | ... |
@@ -346,15 +349,15 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL, |
346 | 349 |
data = log2(data+1) |
347 | 350 |
} |
348 | 351 |
|
349 |
- data.df = reshape2::melt(tibble::as_tibble(data), measure.vars = colnames(data)) %>% dplyr::rename(sample = variable) |
|
350 |
- g = ggplot(data.df, aes(value, color = sample)) + geom_density() + theme_bw() |
|
352 |
+ data.df = reshape2::melt(tibble::as_tibble(data), measure.vars = colnames(data)) %>% dplyr::rename(sample = .data$variable) |
|
353 |
+ g = ggplot2::ggplot(data.df, ggplot2::aes(.data$value, color = .data$sample)) + ggplot2::geom_density() + ggplot2::theme_bw() |
|
351 | 354 |
|
352 | 355 |
title = paste0("Density of values per sample for all features") |
353 | 356 |
if (! legend) { |
354 |
- g = g + theme(legend.position = "none") |
|
357 |
+ g = g + ggplot2::theme(legend.position = "none") |
|
355 | 358 |
title = paste0(title, " (Legend omitted, too many samples)") |
356 | 359 |
} |
357 |
- g = g + ggtitle(title) |
|
360 |
+ g = g + ggplot2::ggtitle(title) |
|
358 | 361 |
|
359 | 362 |
plot(g) |
360 | 363 |
|
... | ... |
@@ -498,7 +501,7 @@ plotPCA_all <- function(GRN, outputFolder = NULL, basenameOutput = NULL, |
498 | 501 |
#' @examples |
499 | 502 |
#' # See the Workflow vignette on the GRaNIE website for examples |
500 | 503 |
#' GRN = loadExampleObject() |
501 |
-#' GRN = plotDiagnosticPlots_TFPeaks(GRN, outputFolder = ".", dataType = "real", nTFMax = 2) |
|
504 |
+#' GRN = plotDiagnosticPlots_TFPeaks(GRN, outputFolder = ".", dataType = "real", nTFMax = 2, pages = 1) |
|
502 | 505 |
#' @export |
503 | 506 |
plotDiagnosticPlots_TFPeaks <- function(GRN, |
504 | 507 |
outputFolder = NULL, |
... | ... |
@@ -583,10 +586,10 @@ plotDiagnosticPlots_TFPeaks <- function(GRN, |
583 | 586 |
.generateTF_GC_diagnosticPlots <- function(TFCur, GC_classes_foreground.df, GC_classes_background.df, GC_classes_all.df, peaksForeground, peaksBackground, peaksBackgroundGC) { |
584 | 587 |
|
585 | 588 |
GC_classes_background_GC.df = peaksBackgroundGC %>% |
586 |
- dplyr::group_by(GC_class) %>% |
|
587 |
- dplyr::summarise(n= dplyr::n(), peak_width_mean = mean(peak_width), peak_width_sd = sd(peak_width)) %>% |
|
589 |
+ dplyr::group_by(.data$GC_class) %>% |
|
590 |
+ dplyr::summarise(n= dplyr::n(), peak_width_mean = mean(.data$peak_width), peak_width_sd = sd(.data$peak_width)) %>% |
|
588 | 591 |
dplyr::ungroup() %>% |
589 |
- tidyr::complete(GC_class, fill = list(n = 0)) %>% |
|
592 |
+ tidyr::complete(.data$GC_class, fill = list(n = 0)) %>% |
|
590 | 593 |
dplyr::mutate(n_rel = .data$n / nrow(peaksBackgroundGC), type = "background_GC") |
591 | 594 |
|
592 | 595 |
# TODO |
... | ... |
@@ -594,7 +597,7 @@ plotDiagnosticPlots_TFPeaks <- function(GRN, |
594 | 597 |
dplyr::mutate(type = forcats::fct_relevel(.data$type, "foreground", "background_GC", "background_orig")) %>% |
595 | 598 |
dplyr::left_join(GC_classes_all.df, by = "GC_class") %>% |
596 | 599 |
dplyr::rowwise() %>% |
597 |
- dplyr::mutate(n.bg.needed.relFreq = dplyr::if_else(round(n.bg.needed.perc * 100, 0) > 100, "", paste0(as.character(round(n.bg.needed.perc * 100, 0)),"%"))) |
|
600 |
+ dplyr::mutate(n.bg.needed.relFreq = dplyr::if_else(round(.data$n.bg.needed.perc * 100, 0) > 100, "", paste0(as.character(round(.data$n.bg.needed.perc * 100, 0)),"%"))) |
|
598 | 601 |
|
599 | 602 |
# Current hack: Put those numbers where the no. of peaks in the GC background < the required one (i.e., where resampling occured) |
600 | 603 |
GC_classes_all2.df$n.bg.needed.relFreq[GC_classes_all2.df$type != "background_GC" | GC_classes_all2.df$n == 0] = "" |
... | ... |
@@ -606,28 +609,28 @@ plotDiagnosticPlots_TFPeaks <- function(GRN, |
606 | 609 |
labelVec = c("background_orig" = label_background_orig, "background_GC" = label_background_GC, "foreground" = label_foreground) |
607 | 610 |
colorVec = c("background_orig" = "lightgray", "background_GC" = "darkgray", "foreground" = "black") |
608 | 611 |
|
609 |
- g1 = ggplot(GC_classes_all2.df , aes(GC_class, n_rel, group = .data$type, fill = .data$type, label = n.bg.needed.relFreq)) + |
|
610 |
- geom_bar(stat = "identity", position = position_dodge(width = 0.5), width = 0.5) + |
|
611 |
- geom_text(vjust=-0.5, size = 3) + |
|
612 |
- ylim(0,0.75) + |
|
613 |
- #geom_line(aes(color= type), size = 2) + |
|
614 |
- scale_fill_manual(name = "Peakset origin", labels = labelVec, values = colorVec) + |
|
615 |
- #scale_color_manual(name = "Signal", labels = labelVec, values = colorVec) + |
|
616 |
- theme_bw() + ggtitle(paste0(TFCur, ": Before and after GC-adjustment")) + |
|
617 |
- xlab("GC class from peaks") + |
|
618 |
- ylab("Relative frequencies") + |
|
619 |
- theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10)) |
|
620 |
- |
|
621 |