git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/chromstaR@122816 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: chromstaR |
2 | 2 |
Type: Package |
3 | 3 |
Title: Combinatorial and Differential Chromatin State Analysis for ChIP-Seq Data |
4 |
-Version: 1.1.0 |
|
5 |
-Date: 2014-05-03 |
|
4 |
+Version: 1.1.1 |
|
5 |
+Date: 2016-10-19 |
|
6 | 6 |
Author: Aaron Taudt, Maria Colome Tatche, Matthias Heinig, Minh Anh Nguyen |
7 | 7 |
Maintainer: Aaron Taudt <aaron.taudt@gmail.com> |
8 | 8 |
Description: This package implements functions for combinatorial and differential analysis of ChIP-seq data. It includes uni- and multivariate peak-calling, export to genome browser viewable files, and functions for enrichment analyses. |
... | ... |
@@ -6,17 +6,17 @@ export(binReads) |
6 | 6 |
export(callPeaksMultivariate) |
7 | 7 |
export(callPeaksReplicates) |
8 | 8 |
export(callPeaksUnivariate) |
9 |
-export(changePostCutoff) |
|
9 |
+export(changeFDR) |
|
10 | 10 |
export(collapseBins) |
11 | 11 |
export(combineMultivariates) |
12 | 12 |
export(dec2bin) |
13 |
-export(exportBinnedData) |
|
14 |
-export(exportCombinedMultivariate) |
|
13 |
+export(exportCombinations) |
|
14 |
+export(exportCounts) |
|
15 | 15 |
export(exportGRangesAsBedFile) |
16 |
-export(exportMultivariate) |
|
17 |
-export(exportUnivariates) |
|
16 |
+export(exportPeaks) |
|
18 | 17 |
export(fixedWidthBins) |
19 | 18 |
export(genomicFrequencies) |
19 |
+export(getCombinations) |
|
20 | 20 |
export(getDistinctColors) |
21 | 21 |
export(getStateColors) |
22 | 22 |
export(heatmapCombinations) |
... | ... |
@@ -24,9 +24,9 @@ export(heatmapCountCorrelation) |
24 | 24 |
export(heatmapTransitionProbs) |
25 | 25 |
export(loadHmmsFromFiles) |
26 | 26 |
export(plotEnrichCountHeatmap) |
27 |
+export(plotEnrichment) |
|
27 | 28 |
export(plotExpression) |
28 | 29 |
export(plotFoldEnrichHeatmap) |
29 |
-export(plotFoldEnrichment) |
|
30 | 30 |
export(plotHistogram) |
31 | 31 |
export(readBamFileAsGRanges) |
32 | 32 |
export(readBedFileAsGRanges) |
33 | 33 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,26 @@ |
1 |
+CHANGES IN VERSION 1.1.1 |
|
2 |
+------------------------ |
|
3 |
+ |
|
4 |
+NEW FEATURES |
|
5 |
+ |
|
6 |
+ o Selection of peaks can be done with 'changeFDR'. |
|
7 |
+ |
|
8 |
+ o Peak calls are available in each chromstaR-object as list entry '$peaks'. |
|
9 |
+ |
|
10 |
+SIGNIFICANT USER-LEVEL CHANGES |
|
11 |
+ |
|
12 |
+ o 'plotFoldEnrichment' renamed to 'plotEnrichment'. |
|
13 |
+ |
|
14 |
+ o 'exportBinnedData', 'exportUnivariates', 'exportMultivariates', 'exportCombinedMultivariates' replaced by 'exportCounts', 'exportPeaks', 'exportCombinations'. |
|
15 |
+ |
|
16 |
+BUG FIXES |
|
17 |
+ |
|
18 |
+ o Proper computation of fold enrichments, with < 1 indicating depletion and > 1 indicating enrichment. |
|
19 |
+ |
|
20 |
+DEPRECATED AND DEFUNCT |
|
21 |
+ |
|
22 |
+ o 'changePostCutoff'. |
|
23 |
+ |
|
24 |
+ o 'plotFoldEnrichment'. |
|
25 |
+ |
|
26 |
+ o 'exportBinnedData', 'exportUnivariates', 'exportMultivariates', 'exportCombinedMultivariates'. |
... | ... |
@@ -11,8 +11,7 @@ |
11 | 11 |
#' @param per.chrom If \code{per.chrom=TRUE} chromosomes will be treated separately. This tremendously speeds up the calculation but results might be noisier as compared to \code{per.chrom=FALSE}, where all chromosomes are concatenated for the HMM. |
12 | 12 |
#' @param chromosomes A vector specifying the chromosomes to use from the models in \code{hmms}. The default (\code{NULL}) uses all available chromosomes. |
13 | 13 |
#' @param eps Convergence threshold for the Baum-Welch algorithm. |
14 |
-#' @param post.cutoff False discovery rate. The default \code{NULL} means that the state with maximum posterior probability will be chosen, irrespective of its absolute probability. |
|
15 |
-#' @param keep.posteriors If set to \code{TRUE}, posteriors will be available in the output. This is useful to change the post.cutoff later, but increases the necessary disk space to store the result immense. |
|
14 |
+#' @param keep.posteriors If set to \code{TRUE}, posteriors will be available in the output. This can be useful to change the posterior cutoff later, but increases the necessary disk space to store the result immensely. |
|
16 | 15 |
#' @param num.threads Number of threads to use. Setting this to >1 may give increased performance. |
17 | 16 |
#' @param max.time The maximum running time in seconds for the Baum-Welch algorithm. If this time is reached, the Baum-Welch will terminate after the current iteration finishes. The default \code{NULL} is no limit. |
18 | 17 |
#' @param max.iter The maximum number of iterations for the Baum-Welch algorithm. The default \code{NULL} is no limit. |
... | ... |
@@ -50,7 +49,7 @@ |
50 | 49 |
#'heatmapTransitionProbs(multimodel) |
51 | 50 |
#'heatmapCountCorrelation(multimodel) |
52 | 51 |
#' |
53 |
-callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=TRUE, chromosomes=NULL, eps=0.01, post.cutoff=NULL, keep.posteriors=TRUE, num.threads=1, max.time=NULL, max.iter=NULL, keep.densities=FALSE, verbosity=1) { |
|
52 |
+callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=TRUE, chromosomes=NULL, eps=0.01, keep.posteriors=FALSE, num.threads=1, max.time=NULL, max.iter=NULL, keep.densities=FALSE, verbosity=1) { |
|
54 | 53 |
|
55 | 54 |
## Intercept user input |
56 | 55 |
if (!is.null(use.states)) { |
... | ... |
@@ -71,9 +70,6 @@ callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=T |
71 | 70 |
if (check.logical(keep.posteriors)!=0) stop("argument 'keep.posteriors' expects a logical (TRUE or FALSE)") |
72 | 71 |
if (check.logical(keep.densities)!=0) stop("argument 'keep.densities' expects a logical (TRUE or FALSE)") |
73 | 72 |
if (check.integer(verbosity)!=0) stop("argument 'verbosity' expects an integer") |
74 |
- if (!is.null(post.cutoff)) { |
|
75 |
- if (post.cutoff>1 | post.cutoff<0) stop("argument 'post.cutoff' has to be between 0 and 1 if specified") |
|
76 |
- } |
|
77 | 73 |
if (length(hmms)==0) { |
78 | 74 |
stop("argument 'hmms' is of length=0. Cannot call multivariate peaks with no models.") |
79 | 75 |
} |
... | ... |
@@ -90,9 +86,6 @@ callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=T |
90 | 86 |
result$bins$state <- factor(c(0,0,1))[hmm$bins$state] |
91 | 87 |
result$bins$posteriors <- matrix(hmm$bins$posterior.modified, ncol=1, dimnames=list(NULL, result$info$ID)) |
92 | 88 |
result$bins$differential.score <- 0 |
93 |
- if (!is.null(post.cutoff)) { |
|
94 |
- result$bins$state <- factor(as.integer(result$bins$posteriors >= post.cutoff)) |
|
95 |
- } |
|
96 | 89 |
## Add combinations |
97 | 90 |
mapping <- NULL |
98 | 91 |
if (!is.null(use.states)) { |
... | ... |
@@ -125,8 +118,6 @@ callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=T |
125 | 118 |
# Distributions |
126 | 119 |
result$distributions <- list(hmm$distributions) |
127 | 120 |
names(result$distributions) <- result$info$ID |
128 |
- # post.cutoff |
|
129 |
- result$post.cutoff <- post.cutoff |
|
130 | 121 |
## Convergence info |
131 | 122 |
convergenceInfo <- list(eps=NA, loglik=NA, loglik.delta=NA, num.iterations=NA, time.sec=NA) |
132 | 123 |
result$convergenceInfo <- convergenceInfo |
... | ... |
@@ -167,7 +158,7 @@ callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=T |
167 | 158 |
ptm <- startTimedMessage("Running multivariate ...") |
168 | 159 |
models <- foreach (chrom = chromosomes, .packages='chromstaR') %dopar% { |
169 | 160 |
bins <- p$bins[seqnames(p$bins)==chrom] |
170 |
- model <- runMultivariate(bins=bins, info=p$info, comb.states=p$comb.states, use.states=p$use.states, distributions=p$distributions, weights=p$weights, correlationMatrix=p$correlationMatrix, correlationMatrixInverse=p$correlationMatrixInverse, determinant=p$determinant, max.iter=max.iter, max.time=max.time, eps=eps, num.threads=1, post.cutoff=post.cutoff, keep.posteriors=keep.posteriors, keep.densities=keep.densities, verbosity=verbosity) |
|
161 |
+ model <- runMultivariate(bins=bins, info=p$info, comb.states=p$comb.states, use.states=p$use.states, distributions=p$distributions, weights=p$weights, correlationMatrix=p$correlationMatrix, correlationMatrixInverse=p$correlationMatrixInverse, determinant=p$determinant, max.iter=max.iter, max.time=max.time, eps=eps, num.threads=1, keep.posteriors=keep.posteriors, keep.densities=keep.densities, verbosity=verbosity) |
|
171 | 162 |
model |
172 | 163 |
} |
173 | 164 |
stopTimedMessage(ptm) |
... | ... |
@@ -176,7 +167,7 @@ callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=T |
176 | 167 |
for (chrom in chromosomes) { |
177 | 168 |
ptm <- startTimedMessage("Chromosome = ", chrom, "\n") |
178 | 169 |
bins <- p$bins[seqnames(p$bins)==chrom] |
179 |
- model <- suppressMessages( runMultivariate(bins=bins, info=p$info, comb.states=p$comb.states, use.states=p$use.states, distributions=p$distributions, weights=p$weights, correlationMatrix=p$correlationMatrix, correlationMatrixInverse=p$correlationMatrixInverse, determinant=p$determinant, max.iter=max.iter, max.time=max.time, eps=eps, num.threads=1, post.cutoff=post.cutoff, keep.posteriors=keep.posteriors, keep.densities=keep.densities, verbosity=verbosity) ) |
|
170 |
+ model <- suppressMessages( runMultivariate(bins=bins, info=p$info, comb.states=p$comb.states, use.states=p$use.states, distributions=p$distributions, weights=p$weights, correlationMatrix=p$correlationMatrix, correlationMatrixInverse=p$correlationMatrixInverse, determinant=p$determinant, max.iter=max.iter, max.time=max.time, eps=eps, num.threads=1, keep.posteriors=keep.posteriors, keep.densities=keep.densities, verbosity=verbosity) ) |
|
180 | 171 |
message("Time spent for chromosome = ", chrom, ":", appendLF=FALSE) |
181 | 172 |
stopTimedMessage(ptm) |
182 | 173 |
models[[chrom]] <- model |
... | ... |
@@ -191,7 +182,7 @@ callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=T |
191 | 182 |
## Run multivariate for all chromosomes |
192 | 183 |
} else { |
193 | 184 |
|
194 |
- model <- runMultivariate(bins=p$bins, info=p$info, comb.states=p$comb.states, use.states=p$use.states, distributions=p$distributions, weights=p$weights, correlationMatrix=p$correlationMatrix, correlationMatrixInverse=p$correlationMatrixInverse, determinant=p$determinant, max.iter=max.iter, max.time=max.time, eps=eps, num.threads=num.threads, post.cutoff=post.cutoff, keep.posteriors=keep.posteriors, keep.densities=keep.densities, verbosity=verbosity) |
|
185 |
+ model <- runMultivariate(bins=p$bins, info=p$info, comb.states=p$comb.states, use.states=p$use.states, distributions=p$distributions, weights=p$weights, correlationMatrix=p$correlationMatrix, correlationMatrixInverse=p$correlationMatrixInverse, determinant=p$determinant, max.iter=max.iter, max.time=max.time, eps=eps, num.threads=num.threads, keep.posteriors=keep.posteriors, keep.densities=keep.densities, verbosity=verbosity) |
|
195 | 186 |
|
196 | 187 |
} |
197 | 188 |
|
... | ... |
@@ -200,7 +191,7 @@ callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=T |
200 | 191 |
} |
201 | 192 |
|
202 | 193 |
|
203 |
-runMultivariate <- function(bins, info, comb.states, use.states, distributions, weights, correlationMatrix, correlationMatrixInverse, determinant, max.iter, max.time, eps, num.threads, post.cutoff, keep.posteriors, keep.densities, transitionProbs.initial=NULL, startProbs.initial=NULL, verbosity=1) { |
|
194 |
+runMultivariate <- function(bins, info, comb.states, use.states, distributions, weights, correlationMatrix, correlationMatrixInverse, determinant, max.iter, max.time, eps, num.threads, keep.posteriors, keep.densities, transitionProbs.initial=NULL, startProbs.initial=NULL, verbosity=1) { |
|
204 | 195 |
|
205 | 196 |
ptm <- startTimedMessage("Starting multivariate HMM with ", length(comb.states), " combinatorial states") |
206 | 197 |
message("") |
... | ... |
@@ -273,6 +264,12 @@ runMultivariate <- function(bins, info, comb.states, use.states, distributions, |
273 | 264 |
result$info <- info |
274 | 265 |
## Bin coordinates, posteriors and states |
275 | 266 |
result$bins <- bins |
267 |
+ if (!is.null(use.states$state)) { |
|
268 |
+ state.levels <- levels(use.states$state) |
|
269 |
+ } else { |
|
270 |
+ state.levels <- hmm$comb.states |
|
271 |
+ } |
|
272 |
+ result$bins$state <- factor(hmm$states, levels=state.levels) |
|
276 | 273 |
if (get.posteriors) { |
277 | 274 |
ptm <- startTimedMessage("Transforming posteriors to `per sample` representation ...") |
278 | 275 |
hmm$posteriors <- matrix(hmm$posteriors, ncol=hmm$max.states) |
... | ... |
@@ -281,20 +278,11 @@ runMultivariate <- function(bins, info, comb.states, use.states, distributions, |
281 | 278 |
post.per.track <- hmm$posteriors %*% binstates |
282 | 279 |
colnames(post.per.track) <- result$info$ID |
283 | 280 |
result$bins$posteriors <- post.per.track |
284 |
- result$bins$differential.score <- differentialScoreSum(result$bins$posteriors, result$info) |
|
281 |
+ result$bins$peakScores <- getPeakScores(result$bins) |
|
282 |
+ result$bins$differential.score <- differentialScoreSum(result$bins$peakScores, result$info) |
|
285 | 283 |
stopTimedMessage(ptm) |
286 | 284 |
} |
287 | 285 |
ptm <- startTimedMessage("Calculating states from posteriors ...") |
288 |
- if (!is.null(use.states$state)) { |
|
289 |
- state.levels <- levels(use.states$state) |
|
290 |
- } else { |
|
291 |
- state.levels <- hmm$comb.states |
|
292 |
- } |
|
293 |
- if (!is.null(post.cutoff)) { |
|
294 |
- result$bins$state <- factor(bin2dec(result$bins$posteriors >= post.cutoff), levels=state.levels) |
|
295 |
- } else { |
|
296 |
- result$bins$state <- factor(hmm$states, levels=state.levels) |
|
297 |
- } |
|
298 | 286 |
stopTimedMessage(ptm) |
299 | 287 |
if (keep.densities) { |
300 | 288 |
result$bins$densities <- matrix(hmm$densities, ncol=hmm$max.states) |
... | ... |
@@ -314,6 +302,16 @@ runMultivariate <- function(bins, info, comb.states, use.states, distributions, |
314 | 302 |
if (!keep.posteriors) { |
315 | 303 |
result$bins$posteriors <- NULL |
316 | 304 |
} |
305 |
+ ## Peaks |
|
306 |
+ result$peaks <- list() |
|
307 |
+ for (i1 in 1:ncol(result$segments$peakScores)) { |
|
308 |
+ mask <- result$segments$peakScores[,i1] > 0 |
|
309 |
+ peaks <- result$segments[mask] |
|
310 |
+ mcols(peaks) <- NULL |
|
311 |
+ peaks$peakScores <- result$segments$peakScores[mask,i1] |
|
312 |
+ result$peaks[[i1]] <- peaks |
|
313 |
+ } |
|
314 |
+ names(result$peaks) <- colnames(result$segments$peakScores) |
|
317 | 315 |
## Parameters |
318 | 316 |
result$mapping <- mapping |
319 | 317 |
combinations <- mapping[as.character(comb.states)] |
... | ... |
@@ -337,8 +335,6 @@ runMultivariate <- function(bins, info, comb.states, use.states, distributions, |
337 | 335 |
# Distributions |
338 | 336 |
result$distributions <- distributions |
339 | 337 |
names(result$distributions) <- result$info$ID |
340 |
- # post.cutoff |
|
341 |
- result$post.cutoff <- post.cutoff |
|
342 | 338 |
## Convergence info |
343 | 339 |
convergenceInfo <- list(eps=eps, loglik=hmm$loglik, loglik.delta=hmm$loglik.delta, num.iterations=hmm$num.iterations, time.sec=hmm$time.sec) |
344 | 340 |
result$convergenceInfo <- convergenceInfo |
... | ... |
@@ -420,7 +420,6 @@ callPeaksUnivariateAllChr <- function(binned.data, input.data=NULL, eps=0.01, in |
420 | 420 |
ranges=ranges(binned.data), |
421 | 421 |
counts=hmm$counts, |
422 | 422 |
state=states) |
423 |
- result$bins$score <- apply(hmm$posteriors, 1, max) |
|
424 | 423 |
result$bins$posteriors <- hmm$posteriors |
425 | 424 |
if (!control) { |
426 | 425 |
result$bins$posterior.modified <- hmm$posteriors[,'modified'] |
... | ... |
@@ -430,17 +429,22 @@ callPeaksUnivariateAllChr <- function(binned.data, input.data=NULL, eps=0.01, in |
430 | 429 |
} |
431 | 430 |
seqlengths(result$bins) <- seqlengths(binned.data) |
432 | 431 |
stopTimedMessage(ptm) |
432 |
+ ## Peak score as maximum posterior in that peak |
|
433 |
+ result$bins$peakScores <- getPeakScore.univariate(result$bins) |
|
433 | 434 |
## Segmentation |
434 | 435 |
ptm <- startTimedMessage("Making segmentation ...") |
435 | 436 |
df <- as.data.frame(result$bins) |
436 |
- red.df <- suppressMessages(collapseBins(df, column2collapseBy='state', columns2getMax=c('score'), columns2drop=c('width',grep('posterior', names(df), value=TRUE), 'counts'))) |
|
437 |
- red.gr <- GRanges(seqnames=red.df[,1], ranges=IRanges(start=red.df[,2], end=red.df[,3]), strand=red.df[,4], state=red.df[,'state'], score=red.df[,'max.score']) |
|
438 |
- result$segments <- red.gr |
|
437 |
+ red.df <- suppressMessages(collapseBins(df, column2collapseBy='state', columns2drop=c('width',grep('posterior', names(df), value=TRUE), 'counts'))) |
|
438 |
+ result$segments <- methods::as(red.df, 'GRanges') |
|
439 | 439 |
seqlengths(result$segments) <- seqlengths(binned.data)[seqlevels(result$segments)] |
440 | 440 |
if (!keep.posteriors) { |
441 | 441 |
result$bins$posteriors <- NULL |
442 | 442 |
} |
443 | 443 |
stopTimedMessage(ptm) |
444 |
+ ## Peaks |
|
445 |
+ result$peaks <- result$segments[result$segments$state == 'modified'] |
|
446 |
+ result$peaks$state <- NULL |
|
447 |
+ result$segments <- NULL |
|
444 | 448 |
## Parameters |
445 | 449 |
# Weights |
446 | 450 |
result$weights <- hmm$weights |
447 | 451 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,199 @@ |
1 |
+#' Change the false discovery rate of a Hidden Markov Model |
|
2 |
+#' |
|
3 |
+#' Adjusts the peak calls of a \code{\link{uniHMM}} or \code{\link{multiHMM}} object with the given false discovery rate. |
|
4 |
+#' |
|
5 |
+#' Each peak has a peak score associated (between 0 and 1). The false discovery rate is a simple cutoff on 1-peakScore, e.g. for an \code{fdr = 0.01} only peaks with a peak score \code{>= 0.99} (1-fdr) will be selected. |
|
6 |
+#' |
|
7 |
+#' @author Aaron Taudt |
|
8 |
+#' @param model A \code{\link{uniHMM}} or \code{\link{multiHMM}} object with posteriors. |
|
9 |
+#' @param fdr A vector of values between 0 and 1 for each column in \code{model$bins$peakScores}. If only one value is given, it will be reused for all columns. Values close to 0 will yield more stringent peak calls with lower false positive but higher false negative rate. |
|
10 |
+#' @param invert Select peaks below (\code{FDR}) or above (\code{TRUE}) the given \code{fdr}. This is useful to select low confidence peaks. |
|
11 |
+#' @return The input object is returned with adjusted peak calls. |
|
12 |
+#' @export |
|
13 |
+#' @examples |
|
14 |
+#'## Get an example BAM file |
|
15 |
+#'file <- system.file("extdata", "euratrans", |
|
16 |
+#' "lv-H3K27me3-BN-male-bio2-tech1.bam", |
|
17 |
+#' package="chromstaRData") |
|
18 |
+#'## Bin the file into bin size 1000bp |
|
19 |
+#'data(rn4_chrominfo) |
|
20 |
+#'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000, |
|
21 |
+#' chromosomes='chr12') |
|
22 |
+#'## Fit the univariate Hidden Markov Model |
|
23 |
+#'hmm <- callPeaksUnivariate(binned, max.time=60, eps=1) |
|
24 |
+#'## Compare fits with different fdrs |
|
25 |
+#'plotHistogram(hmm) + ylim(0,0.25) + ylim(0,0.3) |
|
26 |
+#'plotHistogram(changeFDR(hmm, fdr=0.01)) + ylim(0,0.3) |
|
27 |
+#'plotHistogram(changeFDR(hmm, fdr=1e-12)) + ylim(0,0.3) |
|
28 |
+#' |
|
29 |
+changeFDR <- function(model, fdr=0.01, invert=FALSE) { |
|
30 |
+ |
|
31 |
+ if (!is.numeric(fdr)) { |
|
32 |
+ warning("'fdr' is not numeric. Nothing done.") |
|
33 |
+ return(model) |
|
34 |
+ } |
|
35 |
+ if (is(model, class.univariate.hmm)) { |
|
36 |
+ model.new <- changeFDR.univariate(model=model, fdr=fdr, invert=invert) |
|
37 |
+ } else if (is(model, class.multivariate.hmm) | is(model, class.combined.multivariate.hmm)) { |
|
38 |
+ model.new <- changeFDR.multivariate(model=model, fdr=fdr, invert=invert) |
|
39 |
+ } |
|
40 |
+ return(model.new) |
|
41 |
+ |
|
42 |
+} |
|
43 |
+ |
|
44 |
+ |
|
45 |
+changeFDR.multivariate <- function(model, fdr, invert=FALSE) { |
|
46 |
+ |
|
47 |
+ ## Make fdr vector |
|
48 |
+ if (is.null(model$bins$peakScores)) stop("Cannot recalculate states because peakScores are missing.") |
|
49 |
+ numcol <- ncol(model$bins$peakScores) |
|
50 |
+ if (length(fdr) == 1) { |
|
51 |
+ fdr <- rep(fdr, numcol) |
|
52 |
+ } |
|
53 |
+ if (length(fdr) != numcol) { |
|
54 |
+ stop("Need ", numcol, " values in 'fdr' but only ", length(fdr), " are provided.") |
|
55 |
+ } |
|
56 |
+ |
|
57 |
+ for (fdr.i in fdr) { |
|
58 |
+ if (fdr.i < 0 | fdr.i > 1) { |
|
59 |
+ stop("Values for 'fdr' need to be inside the interval [0,1].") |
|
60 |
+ } |
|
61 |
+ } |
|
62 |
+ names(fdr) <- colnames(model$bins$peakScores) |
|
63 |
+ |
|
64 |
+ # Check if replicates have the same fdr value |
|
65 |
+ reps <- sub('-rep.*', '', model$info$ID) |
|
66 |
+ if (any(sapply(split(fdr, reps), function(x) { Reduce('&', x==x[1]) }) == FALSE)) { |
|
67 |
+ stop("Replicates must have the same fdr value.") |
|
68 |
+ } |
|
69 |
+ |
|
70 |
+ ## fdr threshold |
|
71 |
+ threshold <- 1-fdr |
|
72 |
+ |
|
73 |
+ ### Multivariate HMM ### |
|
74 |
+ if (is(model, class.multivariate.hmm)) { |
|
75 |
+ ## Calculate states |
|
76 |
+ ptm <- startTimedMessage("Calculating states from peakScores ...") |
|
77 |
+ p <- model$bins$peakScores |
|
78 |
+ p.thresholded <- matrix(FALSE, ncol=ncol(p), nrow=nrow(p)) |
|
79 |
+ for (icol in 1:ncol(p)) { |
|
80 |
+ if (!invert) { |
|
81 |
+ p.thresholded[,icol] <- p[,icol] >= threshold[icol] |
|
82 |
+ } else { |
|
83 |
+ p.thresholded[,icol] <- p[,icol] < threshold[icol] & p[,icol] > 0 |
|
84 |
+ } |
|
85 |
+ } |
|
86 |
+ states <- factor(bin2dec(p.thresholded), levels=levels(model$bins$state)) |
|
87 |
+ model$bins$state <- states |
|
88 |
+ ## Combinations |
|
89 |
+ if (!is.null(model$bins$combination)) { |
|
90 |
+ mapping <- model$mapping |
|
91 |
+ model$bins$combination <- factor(mapping[as.character(model$bins$state)], levels=mapping[as.character(levels(model$bins$state))]) |
|
92 |
+ } |
|
93 |
+ stopTimedMessage(ptm) |
|
94 |
+ ## Redo segmentation |
|
95 |
+ model$segments <- multivariateSegmentation(model$bins, column2collapseBy='state') |
|
96 |
+ ## Redo peaks |
|
97 |
+ model$peaks <- list() |
|
98 |
+ for (i1 in 1:ncol(model$segments$peakScores)) { |
|
99 |
+ mask <- model$segments$peakScores[,i1] > 0 |
|
100 |
+ peaks <- model$segments[mask] |
|
101 |
+ mcols(peaks) <- NULL |
|
102 |
+ peaks$peakScores <- model$segments$peakScores[mask,i1] |
|
103 |
+ model$peaks[[i1]] <- peaks |
|
104 |
+ } |
|
105 |
+ names(model$peaks) <- colnames(model$segments$peakScores) |
|
106 |
+ |
|
107 |
+ } else if (is(model, class.combined.multivariate.hmm)) { |
|
108 |
+ mapping.df <- stateBrewer(model$info[,setdiff(names(model$info), 'ID')], mode='full') |
|
109 |
+ mapping <- mapping.df$combination |
|
110 |
+ names(mapping) <- mapping.df$state |
|
111 |
+ p <- model$bins$peakScores |
|
112 |
+ p.thresholded <- matrix(FALSE, ncol=ncol(p), nrow=nrow(p)) |
|
113 |
+ for (icol in 1:ncol(p)) { |
|
114 |
+ if (!invert) { |
|
115 |
+ p.thresholded[,icol] <- p[,icol] >= threshold[icol] |
|
116 |
+ } else { |
|
117 |
+ p.thresholded[,icol] <- p[,icol] < threshold[icol] & p[,icol] > 0 |
|
118 |
+ } |
|
119 |
+ } |
|
120 |
+ states <- factor(bin2dec(p.thresholded), levels=mapping.df$state) |
|
121 |
+ ## Make fake multiHMM |
|
122 |
+ multiHMM <- list() |
|
123 |
+ class(multiHMM) <- class.multivariate.hmm |
|
124 |
+ multiHMM$info <- model$info |
|
125 |
+ multiHMM$bins <- model$bins |
|
126 |
+ multiHMM$bins$combination <- mapping[as.character(states)] |
|
127 |
+ multiHMM$bins$state <- factor(states) |
|
128 |
+ multiHMM$bins$posteriors <- model$bins$posteriors |
|
129 |
+ multiHMM$bins$peakScores <- p |
|
130 |
+ multiHMM$mapping <- mapping |
|
131 |
+ multiHMM$peaks <- model$peaks |
|
132 |
+ model <- combineMultivariates(list(multiHMM), mode='full') |
|
133 |
+ ## Apply FDR on peaks ## |
|
134 |
+ for (i1 in 1:length(model$peaks)) { |
|
135 |
+ peaks <- model$peaks[[i1]] |
|
136 |
+ if (!invert) { |
|
137 |
+ model$peaks[[i1]] <- peaks[peaks$peakScores >= threshold[names(model$peaks)[i1]]] |
|
138 |
+ } else { |
|
139 |
+ model$peaks[[i1]] <- peaks[peaks$peakScores < threshold[names(model$peaks)[i1]]] |
|
140 |
+ } |
|
141 |
+ } |
|
142 |
+ } else { |
|
143 |
+ stop("Supply either a uniHMM, multiHMM or combinedMultiHMM object.") |
|
144 |
+ } |
|
145 |
+ ## Return model |
|
146 |
+ model$fdr <- fdr |
|
147 |
+ return(model) |
|
148 |
+ |
|
149 |
+ |
|
150 |
+ |
|
151 |
+} |
|
152 |
+ |
|
153 |
+ |
|
154 |
+changeFDR.univariate <- function(model, fdr, invert=FALSE) { |
|
155 |
+ |
|
156 |
+ fdr <- suppressWarnings( as.numeric(fdr) ) |
|
157 |
+ if (!is.numeric(fdr) | is.na(fdr)) { |
|
158 |
+ warning("Not changing false discovery rate because given 'fdr' is not numeric.") |
|
159 |
+ return(model) |
|
160 |
+ } else if (fdr < 0 | fdr > 1) { |
|
161 |
+ warning("fdr has to be inside the interval [0,1]. Nothing done.") |
|
162 |
+ return(model) |
|
163 |
+ } |
|
164 |
+ |
|
165 |
+ ## fdr threshold |
|
166 |
+ threshold <- 1-fdr |
|
167 |
+ |
|
168 |
+ if (is.null(model$bins$peakScores)) stop("Cannot recalculate states because column 'peakScores' is missing.") |
|
169 |
+ ## Calculate states |
|
170 |
+ ptm <- startTimedMessage("Calculating states from peakScores ...") |
|
171 |
+ states <- model$bins$state |
|
172 |
+ states[model$bins$state == 'modified'] <- 'unmodified' |
|
173 |
+ if (!invert) { |
|
174 |
+ states[ model$bins$peakScores >= threshold ] <- 'modified' |
|
175 |
+ } else { |
|
176 |
+ states[ model$bins$peakScores < threshold & model$bins$peakScores > 0 ] <- 'modified' |
|
177 |
+ } |
|
178 |
+ model$bins$state <- states |
|
179 |
+ stopTimedMessage(ptm) |
|
180 |
+ ## Redo segmentation |
|
181 |
+ ptm <- startTimedMessage("Making segmentation ...") |
|
182 |
+ gr <- model$bins |
|
183 |
+ df <- as.data.frame(gr) |
|
184 |
+ red.df <- suppressMessages(collapseBins(df, column2collapseBy='state', columns2average=c('score'), columns2drop=c('width',grep('posteriors', names(df), value=TRUE), 'counts'))) |
|
185 |
+ model$segments <- methods::as(red.df, 'GRanges') |
|
186 |
+ seqlengths(model$segments) <- seqlengths(model$bins)[seqlevels(model$segments)] |
|
187 |
+ ## Redo peaks |
|
188 |
+ model$peaks <- model$segments[model$segments$state == 'modified'] |
|
189 |
+ model$peaks$state <- NULL |
|
190 |
+ model$segments <- NULL |
|
191 |
+ ## Redo weights |
|
192 |
+ model$weights <- table(model$bins$state) / length(model$bins) |
|
193 |
+ stopTimedMessage(ptm) |
|
194 |
+ |
|
195 |
+ ## Return model |
|
196 |
+ model$fdr <- fdr |
|
197 |
+ return(model) |
|
198 |
+ |
|
199 |
+} |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-#' Change the false discovery rate of a Hidden Markov Model |
|
1 |
+#' Change the posterior cutoff of a Hidden Markov Model |
|
2 | 2 |
#' |
3 | 3 |
#' Adjusts the peak calls of a \code{\link{uniHMM}} or \code{\link{multiHMM}} object with the given posterior cutoff. |
4 | 4 |
#' |
... | ... |
@@ -8,25 +8,24 @@ |
8 | 8 |
#' @param model A \code{\link{uniHMM}} or \code{\link{multiHMM}} object with posteriors. |
9 | 9 |
#' @param post.cutoff A vector of posterior cutoff values between 0 and 1 the same length as \code{ncol(model$bins$posteriors)}. If only one value is given, it will be reused for all columns. Values close to 1 will yield more stringent peak calls with lower false positive but higher false negative rate. |
10 | 10 |
#' @return The input object is returned with adjusted peak calls. |
11 |
-#' @export |
|
12 |
-#' @examples |
|
13 |
-#'## Get an example BAM file |
|
14 |
-#'file <- system.file("extdata", "euratrans", |
|
15 |
-#' "lv-H3K27me3-BN-male-bio2-tech1.bam", |
|
16 |
-#' package="chromstaRData") |
|
17 |
-#'## Bin the file into bin size 1000bp |
|
18 |
-#'data(rn4_chrominfo) |
|
19 |
-#'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000, |
|
20 |
-#' chromosomes='chr12') |
|
21 |
-#'## Fit the univariate Hidden Markov Model |
|
22 |
-#'# !Keep posteriors to change the post.cutoff later! |
|
23 |
-#'hmm <- callPeaksUnivariate(binned, max.time=60, eps=1, |
|
24 |
-#' keep.posteriors=TRUE) |
|
25 |
-#'## Compare fits with different post.cutoffs |
|
26 |
-#'plotHistogram(changePostCutoff(hmm, post.cutoff=0.01)) + ylim(0,0.25) |
|
27 |
-#'plotHistogram(hmm) + ylim(0,0.25) |
|
28 |
-#'plotHistogram(changePostCutoff(hmm, post.cutoff=0.99)) + ylim(0,0.25) |
|
29 |
-#' |
|
11 |
+# #' @examples |
|
12 |
+# #'## Get an example BAM file |
|
13 |
+# #'file <- system.file("extdata", "euratrans", |
|
14 |
+# #' "lv-H3K27me3-BN-male-bio2-tech1.bam", |
|
15 |
+# #' package="chromstaRData") |
|
16 |
+# #'## Bin the file into bin size 1000bp |
|
17 |
+# #'data(rn4_chrominfo) |
|
18 |
+# #'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000, |
|
19 |
+# #' chromosomes='chr12') |
|
20 |
+# #'## Fit the univariate Hidden Markov Model |
|
21 |
+# #'# !Keep posteriors to change the post.cutoff later! |
|
22 |
+# #'hmm <- callPeaksUnivariate(binned, max.time=60, eps=1, |
|
23 |
+# #' keep.posteriors=TRUE) |
|
24 |
+# #'## Compare fits with different post.cutoffs |
|
25 |
+# #'plotHistogram(changePostCutoff(hmm, post.cutoff=0.01)) + ylim(0,0.25) |
|
26 |
+# #'plotHistogram(hmm) + ylim(0,0.25) |
|
27 |
+# #'plotHistogram(changePostCutoff(hmm, post.cutoff=0.99)) + ylim(0,0.25) |
|
28 |
+# #' |
|
30 | 29 |
changePostCutoff <- function(model, post.cutoff=0.5) { |
31 | 30 |
|
32 | 31 |
if (!is.numeric(post.cutoff)) { |
... | ... |
@@ -80,7 +79,7 @@ changePostCutoff.multivariate <- function(model, post.cutoff) { |
80 | 79 |
for (icol in 1:ncol(post)) { |
81 | 80 |
post.thresholded[,icol] <- post[,icol] >= threshold[icol] |
82 | 81 |
} |
83 |
- states <- factor(bin2dec(model$bins$posteriors >= threshold), levels=levels(model$bins$state)) |
|
82 |
+ states <- factor(bin2dec(post.thresholded), levels=levels(model$bins$state)) |
|
84 | 83 |
model$bins$state <- states |
85 | 84 |
## Combinations |
86 | 85 |
if (!is.null(model$bins$combination)) { |
... | ... |
@@ -90,6 +90,10 @@ combineMultivariates <- function(hmms, mode) { |
90 | 90 |
counts[[1]] <- hmm$bins$counts |
91 | 91 |
posteriors <- list() |
92 | 92 |
posteriors[[1]] <- hmm$bins$posteriors |
93 |
+ peakScores <- list() |
|
94 |
+ peakScores[[1]] <- hmm$bins$peakScores |
|
95 |
+ peaks <- list() |
|
96 |
+ peaks[[1]] <- hmm$peaks |
|
93 | 97 |
binstates <- list() |
94 | 98 |
binstates[[1]] <- dec2bin(hmm$bins$state, colnames=hmm$info$ID) |
95 | 99 |
stopTimedMessage(ptm) |
... | ... |
@@ -102,6 +106,8 @@ combineMultivariates <- function(hmms, mode) { |
102 | 106 |
combs[[i1]] <- hmm$bins$combination |
103 | 107 |
counts[[i1]] <- hmm$bins$counts |
104 | 108 |
posteriors[[i1]] <- hmm$bins$posteriors |
109 |
+ peakScores[[i1]] <- hmm$bins$peakScores |
|
110 |
+ peaks[[i1]] <- hmm$peaks |
|
105 | 111 |
binstates[[i1]] <- dec2bin(hmm$bins$state, colnames=hmm$info$ID) |
106 | 112 |
stopTimedMessage(ptm) |
107 | 113 |
} |
... | ... |
@@ -109,12 +115,13 @@ combineMultivariates <- function(hmms, mode) { |
109 | 115 |
ptm <- startTimedMessage("Concatenating conditions ...") |
110 | 116 |
counts <- do.call(cbind, counts) |
111 | 117 |
posteriors <- do.call(cbind, posteriors) |
118 |
+ peakScores <- do.call(cbind, peakScores) |
|
112 | 119 |
infos <- do.call(rbind, infos) |
113 | 120 |
conditions <- unique(infos$condition) |
114 | 121 |
states <- factor(bin2dec(do.call(cbind, binstates))) |
115 | 122 |
names(states) <- NULL |
116 | 123 |
names(combs) <- conditions |
117 |
- combs.df <- as(combs,'DataFrame') |
|
124 |
+ combs.df <- methods::as(combs,'DataFrame') |
|
118 | 125 |
stopTimedMessage(ptm) |
119 | 126 |
|
120 | 127 |
} else if (mode == 'differential') { |
... | ... |
@@ -122,6 +129,8 @@ combineMultivariates <- function(hmms, mode) { |
122 | 129 |
infos <- list() |
123 | 130 |
counts <- list() |
124 | 131 |
posteriors <- list() |
132 |
+ peakScores <- list() |
|
133 |
+ peaks <- list() |
|
125 | 134 |
binstates <- list() |
126 | 135 |
for (i1 in 1:length(hmms)) { |
127 | 136 |
ptm <- startTimedMessage("Processing HMM ",i1," ...") |
... | ... |
@@ -129,12 +138,15 @@ combineMultivariates <- function(hmms, mode) { |
129 | 138 |
infos[[i1]] <- hmm$info |
130 | 139 |
counts[[i1]] <- hmm$bins$counts |
131 | 140 |
posteriors[[i1]] <- hmm$bins$posteriors |
141 |
+ peakScores[[i1]] <- hmm$bins$peakScores |
|
142 |
+ peaks[[i1]] <- hmm$peaks |
|
132 | 143 |
binstates[[i1]] <- dec2bin(hmm$bins$state, colnames=hmm$info$ID) |
133 | 144 |
stopTimedMessage(ptm) |
134 | 145 |
} |
135 | 146 |
ptm <- startTimedMessage("Concatenating HMMs ...") |
136 | 147 |
counts <- do.call(cbind, counts) |
137 | 148 |
posteriors <- do.call(cbind, posteriors) |
149 |
+ peakScores <- do.call(cbind, peakScores) |
|
138 | 150 |
infos <- do.call(rbind, infos) |
139 | 151 |
conditions <- unique(infos$condition) |
140 | 152 |
binstates <- do.call(cbind, binstates) |
... | ... |
@@ -160,7 +172,7 @@ combineMultivariates <- function(hmms, mode) { |
160 | 172 |
combs[[condition]] <- mapping[as.character(states.cond)] |
161 | 173 |
} |
162 | 174 |
combs.df <- as.data.frame(combs) # get factors instead of characters |
163 |
- combs.df <- as(combs.df, 'DataFrame') |
|
175 |
+ combs.df <- methods::as(combs.df, 'DataFrame') |
|
164 | 176 |
stopTimedMessage(ptm) |
165 | 177 |
|
166 | 178 |
} else if (mode == 'full') { |
... | ... |
@@ -175,6 +187,8 @@ combineMultivariates <- function(hmms, mode) { |
175 | 187 |
conditions <- unique(infos$condition) |
176 | 188 |
counts <- hmm$bins$counts |
177 | 189 |
posteriors <- hmm$bins$posteriors |
190 |
+ peakScores <- hmm$bins$peakScores |
|
191 |
+ peaks <- hmm$peaks |
|
178 | 192 |
states <- hmm$bins$state |
179 | 193 |
combs <- list() |
180 | 194 |
for (condition in conditions) { |
... | ... |
@@ -186,7 +200,7 @@ combineMultivariates <- function(hmms, mode) { |
186 | 200 |
stopTimedMessage(ptm) |
187 | 201 |
} |
188 | 202 |
combs.df <- as.data.frame(combs) # get factors instead of characters |
189 |
- combs.df <- as(combs.df, 'DataFrame') |
|
203 |
+ combs.df <- methods::as(combs.df, 'DataFrame') |
|
190 | 204 |
|
191 | 205 |
} else if (mode == 'replicate') { |
192 | 206 |
## Load first HMM for coordinates |
... | ... |
@@ -201,6 +215,10 @@ combineMultivariates <- function(hmms, mode) { |
201 | 215 |
counts[[1]] <- hmm$bins$counts |
202 | 216 |
posteriors <- list() |
203 | 217 |
posteriors[[1]] <- hmm$bins$posteriors |
218 |
+ peakScores <- list() |
|
219 |
+ peakScores[[1]] <- hmm$bins$peakScores |
|
220 |
+ peaks <- list() |
|
221 |
+ peaks[[1]] <- hmm$peaks |
|
204 | 222 |
binstates <- list() |
205 | 223 |
binstates[[1]] <- dec2bin(hmm$bins$state, colnames=hmm$info$ID) |
206 | 224 |
stopTimedMessage(ptm) |
... | ... |
@@ -212,6 +230,8 @@ combineMultivariates <- function(hmms, mode) { |
212 | 230 |
infos[[i1]] <- hmm$info |
213 | 231 |
counts[[i1]] <- hmm$bins$counts |
214 | 232 |
posteriors[[i1]] <- hmm$bins$posteriors |
233 |
+ peakScores[[i1]] <- hmm$bins$peakScores |
|
234 |
+ peaks[[i1]] <- hmm$peaks |
|
215 | 235 |
binstates[[i1]] <- dec2bin(hmm$bins$state, colnames=hmm$info$ID) |
216 | 236 |
stopTimedMessage(ptm) |
217 | 237 |
} |
... | ... |
@@ -219,6 +239,7 @@ combineMultivariates <- function(hmms, mode) { |
219 | 239 |
ptm <- startTimedMessage("Concatenating mark-conditions ...") |
220 | 240 |
counts <- do.call(cbind, counts) |
221 | 241 |
posteriors <- do.call(cbind, posteriors) |
242 |
+ peakScores <- do.call(cbind, peakScores) |
|
222 | 243 |
binstates <- do.call(cbind, binstates) |
223 | 244 |
infos <- do.call(rbind, infos) |
224 | 245 |
conditions <- unique(infos$condition) |
... | ... |
@@ -236,7 +257,7 @@ combineMultivariates <- function(hmms, mode) { |
236 | 257 |
combs[[condition]] <- mapping[as.character(states)] |
237 | 258 |
} |
238 | 259 |
names(combs) <- conditions |
239 |
- combs.df <- as(combs,'DataFrame') |
|
260 |
+ combs.df <- methods::as(combs,'DataFrame') |
|
240 | 261 |
stopTimedMessage(ptm) |
241 | 262 |
} else { |
242 | 263 |
stop("Unknown mode '", mode, "'.") |
... | ... |
@@ -261,9 +282,10 @@ combineMultivariates <- function(hmms, mode) { |
261 | 282 |
## Transferring counts and posteriors |
262 | 283 |
bins$counts <- counts |
263 | 284 |
bins$posteriors <- posteriors |
285 |
+ bins$peakScores <- peakScores |
|
264 | 286 |
|
265 | 287 |
## Add differential score ## |
266 |
- bins$differential.score <- differentialScoreSum(bins$posteriors, infos) |
|
288 |
+ bins$differential.score <- differentialScoreSum(bins$peakScores, infos) |
|
267 | 289 |
|
268 | 290 |
### Redo the segmentation for all conditions combined |
269 | 291 |
ptm <- startTimedMessage("Redoing segmentation for all conditions combined ...") |
... | ... |
@@ -273,20 +295,23 @@ combineMultivariates <- function(hmms, mode) { |
273 | 295 |
|
274 | 296 |
### Redo the segmentation for each condition separately |
275 | 297 |
ptm <- startTimedMessage("Redoing segmentation for each condition separately ...") |
276 |
- segments.separate <- list() |
|
298 |
+ segments.per.condition <- list() |
|
277 | 299 |
for (cond in names(combs)) { |
278 | 300 |
bins.cond <- bins |
279 | 301 |
mcols(bins.cond) <- mcols(bins)[paste0('combination.',cond)] |
280 | 302 |
df <- as.data.frame(bins.cond) |
281 | 303 |
names(df)[6] <- cond |
282 | 304 |
segments.cond <- suppressMessages( collapseBins(df, column2collapseBy=cond, columns2drop=c('width', grep('posteriors', names(df), value=TRUE))) ) |
283 |
- segments.cond <- as(segments.cond, 'GRanges') |
|
305 |
+ segments.cond <- methods::as(segments.cond, 'GRanges') |
|
284 | 306 |
names(mcols(segments.cond)) <- 'combination' |
285 | 307 |
seqlengths(segments.cond) <- seqlengths(bins)[seqlevels(segments.cond)] |
286 |
- segments.separate[[cond]] <- segments.cond |
|
308 |
+ segments.per.condition[[cond]] <- segments.cond |
|
287 | 309 |
} |
288 | 310 |
stopTimedMessage(ptm) |
289 | 311 |
|
312 |
+ ### Flatten peak list ### |
|
313 |
+ peaks <- unlist(peaks) |
|
314 |
+ |
|
290 | 315 |
### Make return object |
291 | 316 |
hmm <- list() |
292 | 317 |
class(hmm) <- class.combined.multivariate.hmm |
... | ... |
@@ -294,7 +319,8 @@ combineMultivariates <- function(hmms, mode) { |
294 | 319 |
rownames(hmm$info) <- NULL |
295 | 320 |
hmm$bins <- bins |
296 | 321 |
hmm$segments <- segments |
297 |
- hmm$segments.separate <- segments.separate |
|
322 |
+ hmm$segments.per.condition <- segments.per.condition |
|
323 |
+ hmm$peaks <- peaks |
|
298 | 324 |
hmm$frequencies <- freqs$table |
299 | 325 |
return(hmm) |
300 | 326 |
|
... | ... |
@@ -41,6 +41,19 @@ NULL |
41 | 41 |
NULL |
42 | 42 |
|
43 | 43 |
|
44 |
+#' Gene coordinates for rn4 |
|
45 |
+#' |
|
46 |
+#' A data.frame containing gene coordinates and biotypes of the rn4 assembly. |
|
47 |
+#' |
|
48 |
+#' @docType data |
|
49 |
+#' @name genes_rn4 |
|
50 |
+#' @format A data.frame. |
|
51 |
+#' @examples |
|
52 |
+#'data(genes_rn4) |
|
53 |
+#'head(genes_rn4) |
|
54 |
+NULL |
|
55 |
+ |
|
56 |
+ |
|
44 | 57 |
#' chromstaR objects |
45 | 58 |
#' |
46 | 59 |
#' @description |
... | ... |
@@ -136,7 +149,7 @@ NULL |
136 | 149 |
#' A \code{list()} with the following entries: |
137 | 150 |
#' \item{bins}{A \code{\link[GenomicRanges]{GRanges}} object containing genomic bin coordinates and human readable combinations for the combined \code{\link{multiHMM}} objects.} |
138 | 151 |
#' \item{segments}{Same as \code{bins}, but consecutive bins with the same state are collapsed into segments.} |
139 |
-#' \item{segments.separate}{A \code{list} with segments for each condition separately.} |
|
152 |
+#' \item{segments.per.condition}{A \code{list} with segments for each condition separately.} |
|
140 | 153 |
#' @seealso \code{\link{combineMultivariates}}, \code{\link{uniHMM}}, \code{\link{multiHMM}} |
141 | 154 |
#' @name combinedMultiHMM |
142 | 155 |
#' @aliases combinedHMM |
... | ... |
@@ -18,7 +18,7 @@ |
18 | 18 |
#' \describe{ |
19 | 19 |
#' \item{\code{\link{heatmapCountCorrelation}}}{Heatmap of read count correlations.} |
20 | 20 |
#' \item{\code{\link{plotEnrichCountHeatmap}}}{Heatmap of read counts around annotation.} |
21 |
-#' \item{\code{\link{plotFoldEnrichment}}}{Enrichment of combinatorial states around annotation.} |
|
21 |
+#' \item{\code{\link{plotEnrichment}}}{Enrichment of combinatorial states around annotation.} |
|
22 | 22 |
#' \item{\code{\link{plotFoldEnrichHeatmap}}}{Enrichment of combinatorial states at multiple annotations.} |
23 | 23 |
#' \item{\code{\link{plotExpression}}}{Boxplot of expression values that overlap combinatorial states.} |
24 | 24 |
#' } |
... | ... |
@@ -32,17 +32,17 @@ |
32 | 32 |
#'### Make the enrichment plots ### |
33 | 33 |
#'# We expect promoter [H3K4me3] and bivalent-promoter signatures [H3K4me3+H3K27me3] |
34 | 34 |
#'# to be enriched at transcription start sites. |
35 |
-#' plotFoldEnrichment(hmm = model, annotation = genes, bp.around.annotation = 15000) + |
|
35 |
+#' plotEnrichment(hmm = model, annotation = genes, bp.around.annotation = 15000) + |
|
36 | 36 |
#' ggtitle('Fold enrichment around genes') + |
37 | 37 |
#' xlab('distance from gene body') |
38 | 38 |
#' |
39 | 39 |
#'# Plot enrichment only at TSS. We make use of the fact that TSS is the start of a gene. |
40 |
-#' plotFoldEnrichment(model, genes, region = 'start') + |
|
40 |
+#' plotEnrichment(model, genes, region = 'start') + |
|
41 | 41 |
#' ggtitle('Fold enrichment around TSS') + |
42 | 42 |
#' xlab('distance from TSS in [bp]') |
43 | 43 |
#'# Note: If you want to facet the plot because you have many combinatorial states you |
44 | 44 |
#'# can do that with |
45 |
-#' plotFoldEnrichment(model, genes, region = 'start') + |
|
45 |
+#' plotEnrichment(model, genes, region = 'start') + |
|
46 | 46 |
#' facet_wrap(~ combination) |
47 | 47 |
#' |
48 | 48 |
#'# Another form of visualization that shows every TSS in a heatmap |
... | ... |
@@ -65,6 +65,7 @@ NULL |
65 | 65 |
#' @param annotations A \code{list()} with \code{\link{GRanges}} objects containing coordinates of multiple annotations The names of the list entries will be used to name the return values. |
66 | 66 |
#' @param plot A logical indicating whether the plot or an array with the fold enrichment values is returned. |
67 | 67 |
#' @importFrom S4Vectors subjectHits queryHits |
68 |
+#' @importFrom IRanges subsetByOverlaps |
|
68 | 69 |
#' @importFrom reshape2 melt |
69 | 70 |
#' @export |
70 | 71 |
plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combinations=NULL, marks=NULL, plot=TRUE) { |
... | ... |
@@ -89,7 +90,8 @@ plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combina |
89 | 90 |
mark.levels <- marks |
90 | 91 |
} |
91 | 92 |
genome <- sum(as.numeric(width(bins))) |
92 |
- feature.lengths <- lapply(annotations, function(x) { sum(as.numeric(width(x))) }) |
|
93 |
+ annotationsAtBins <- lapply(annotations, function(x) { IRanges::subsetByOverlaps(x, bins) }) |
|
94 |
+ feature.lengths <- lapply(annotationsAtBins, function(x) { sum(as.numeric(width(x))) }) |
|
93 | 95 |
|
94 | 96 |
if (what == 'peaks') { |
95 | 97 |
binstates <- dec2bin(bins$state, colnames=hmm$info$ID) |
... | ... |
@@ -98,16 +100,17 @@ plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combina |
98 | 100 |
## Fold enrichment |
99 | 101 |
ggplts <- list() |
100 | 102 |
folds <- list() |
103 |
+ maxfolds <- list() |
|
101 | 104 |
for (condition in conditions) { |
102 | 105 |
if (what == 'combinations') { |
103 | 106 |
bins$combination <- mcols(bins)[,paste0('combination.', condition)] |
104 |
- fold <- array(NA, dim=c(length(annotations), length(comb.levels)), dimnames=list(annotation=names(annotations), combination=comb.levels)) |
|
107 |
+ fold <- array(NA, dim=c(length(annotationsAtBins), length(comb.levels)), dimnames=list(annotation=names(annotationsAtBins), combination=comb.levels)) |
|
105 | 108 |
for (icomb in 1:length(comb.levels)) { |
106 | 109 |
mask <- bins$combination == comb.levels[icomb] |
107 | 110 |
bins.mask <- bins[mask] |
108 | 111 |
combstate.length <- sum(as.numeric(width(bins.mask))) |
109 |
- for (ifeat in 1:length(annotations)) { |
|
110 |
- feature <- annotations[[ifeat]] |
|
112 |
+ for (ifeat in 1:length(annotationsAtBins)) { |
|
113 |
+ feature <- annotationsAtBins[[ifeat]] |
|
111 | 114 |
ind <- findOverlaps(bins.mask, feature) |
112 | 115 |
|
113 | 116 |
binsinfeature <- bins.mask[unique(S4Vectors::queryHits(ind))] |
... | ... |
@@ -121,7 +124,7 @@ plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combina |
121 | 124 |
} |
122 | 125 |
|
123 | 126 |
} else if (what == 'peaks') { |
124 |
- fold <- array(NA, dim=c(length(annotations), length(mark.levels)), dimnames=list(annotation=names(annotations), mark=mark.levels)) |
|
127 |
+ fold <- array(NA, dim=c(length(annotationsAtBins), length(mark.levels)), dimnames=list(annotation=names(annotationsAtBins), mark=mark.levels)) |
|
125 | 128 |
for (imark in 1:length(mark.levels)) { |
126 | 129 |
mark <- mark.levels[imark] |
127 | 130 |
colmask <- hmm$info$mark == mark |
... | ... |
@@ -133,8 +136,8 @@ plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combina |
133 | 136 |
mask <- binstates.cond |
134 | 137 |
bins.mask <- bins[mask] |
135 | 138 |
combstate.length <- sum(as.numeric(width(bins.mask))) |
136 |
- for (ifeat in 1:length(annotations)) { |
|
137 |
- feature <- annotations[[ifeat]] |
|
139 |
+ for (ifeat in 1:length(annotationsAtBins)) { |
|
140 |
+ feature <- annotationsAtBins[[ifeat]] |
|
138 | 141 |
ind <- findOverlaps(bins.mask, feature) |
139 | 142 |
|
140 | 143 |
binsinfeature <- bins.mask[unique(S4Vectors::queryHits(ind))] |
... | ... |
@@ -151,16 +154,22 @@ plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combina |
151 | 154 |
|
152 | 155 |
if (plot) { |
153 | 156 |
df <- reshape2::melt(fold, value.name='foldEnrichment') |
157 |
+ maxfolds[[condition]] <- max(df$foldEnrichment, na.rm=TRUE) |
|
154 | 158 |
if (what == 'combinations') { |
155 |
- ggplt <- ggplot(df) + geom_tile(aes_string(x='combination', y='annotation', fill='foldEnrichment')) + theme(axis.text.x = element_text(angle=90, hjust=1)) + scale_fill_gradient(low='white', high='blue') |
|
159 |
+ ggplt <- ggplot(df) + geom_tile(aes_string(x='combination', y='annotation', fill='foldEnrichment')) |
|
156 | 160 |
} else if (what == 'peaks') { |
157 |
- ggplt <- ggplot(df) + geom_tile(aes_string(x='mark', y='annotation', fill='foldEnrichment')) + theme(axis.text.x = element_text(angle=90, hjust=1)) + scale_fill_gradient(low='white', high='blue') |
|
161 |
+ ggplt <- ggplot(df) + geom_tile(aes_string(x='mark', y='annotation', fill='foldEnrichment')) |
|
158 | 162 |
} |
163 |
+ ggplt <- ggplt + theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5)) |
|
159 | 164 |
ggplts[[condition]] <- ggplt |
160 | 165 |
} else { |
161 | 166 |
folds[[condition]] <- fold |
162 | 167 |
} |
163 | 168 |
} |
169 |
+ if (plot) { |
|
170 |
+ maxfolds <- unlist(maxfolds) |
|
171 |
+ ggplts <- lapply(ggplts, function(ggplt) { ggplt + scale_fill_gradientn(colors=grDevices::colorRampPalette(c("blue","white","red"))(20), values=c(seq(0,1,length.out=10), seq(1,max(maxfolds, na.rm=TRUE),length.out=10)), rescaler=function(x,...) {x}, oob=identity, limits=c(0,max(maxfolds, na.rm=TRUE))) }) |
|
172 |
+ } |
|
164 | 173 |
|
165 | 174 |
if (class(hmm) == class.multivariate.hmm) { |
166 | 175 |
if (plot) { |
... | ... |
@@ -183,6 +192,7 @@ plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combina |
183 | 192 |
#' @inheritParams enrichmentAtAnnotation |
184 | 193 |
#' @param max.rows An integer specifying the number of randomly subsampled rows that are plotted from the \code{annotation} object. This is necessary to avoid crashing for heatmaps with too many rows. |
185 | 194 |
#' @importFrom reshape2 melt |
195 |
+#' @importFrom IRanges subsetByOverlaps |
|
186 | 196 |
#' @export |
187 | 197 |
plotEnrichCountHeatmap <- function(hmm, annotation, bp.around.annotation=10000, max.rows=1000) { |
188 | 198 |
|
... | ... |
@@ -190,24 +200,27 @@ plotEnrichCountHeatmap <- function(hmm, annotation, bp.around.annotation=10000, |
190 | 200 |
## Variables |
191 | 201 |
bins <- hmm$bins |
192 | 202 |
if (class(hmm) == class.combined.multivariate.hmm) { |
203 |
+ conditions <- sub('combination.', '', grep('combination', names(mcols(bins)), value=TRUE)) |
|
204 |
+ comb.levels <- levels(mcols(bins)[,paste0('combination.', conditions[1])]) |
|
205 |
+ ## Create new column combination with all conditions combined |
|
206 |
+ combinations <- list() |
|
207 |
+ for (condition in conditions) { |
|
208 |
+ combinations[[condition]] <- paste0(condition, ":", mcols(bins)[,paste0('combination.', condition)]) |
|
209 |
+ } |
|
210 |
+ combinations$sep <- ', ' |
|
211 |
+ bins$combination <- factor(do.call(paste, combinations)) |
|
193 | 212 |
} else if (class(hmm) == class.multivariate.hmm) { |
194 |
- # Rename 'combination' to 'combination.' for coherence with combinedMultiHMM |
|
195 |
- names(mcols(bins))[grep('combination', names(mcols(bins)))] <- 'combination.' |
|
213 |
+ comb.levels <- levels(bins$combination) |
|
196 | 214 |
} |
197 |
- conditions <- sub('combination.', '', grep('combination', names(mcols(bins)), value=TRUE)) |
|
198 |
- comb.levels <- levels(mcols(bins)[,paste0('combination.', conditions[1])]) |
|
199 | 215 |
binsize <- width(bins)[1] |
200 | 216 |
around <- round(bp.around.annotation/binsize) |
201 |
- ## Create new column combination with all conditions combined |
|
202 |
- combinations <- list() |
|
203 |
- for (condition in conditions) { |
|
204 |
- combinations[[condition]] <- paste0(condition, ":", mcols(bins)[,paste0('combination.', condition)]) |
|
205 |
- } |
|
206 |
- combinations$sep <- ', ' |
|
207 |
- bins$combination <- factor(do.call(paste, combinations)) |
|
217 |
+ |
|
218 |
+ ## Get RPKM values |
|
219 |
+ bins$counts <- sweep(bins$counts, MARGIN = 2, STATS = colSums(bins$counts), FUN = '/') |
|
220 |
+ bins$counts <- bins$counts * 1e6 * 1000/mean(width(bins)) |
|
208 | 221 |
|
209 | 222 |
# Subsampling for plotting of huge data.frames |
210 |
- annotation <- subsetByOverlaps(annotation, bins) |
|
223 |
+ annotation <- IRanges::subsetByOverlaps(annotation, bins) |
|
211 | 224 |
if (length(annotation)>max.rows) { |
212 | 225 |
annotation <- sample(annotation, size=max.rows, replace=FALSE) |
213 | 226 |
} |
... | ... |
@@ -262,7 +275,9 @@ plotEnrichCountHeatmap <- function(hmm, annotation, bp.around.annotation=10000, |
262 | 275 |
panel.grid = element_blank(), |
263 | 276 |
panel.border = element_rect(fill='NA'), |
264 | 277 |
panel.background = element_rect(fill='white'), |
265 |
- axis.text.y = element_blank() |
|
278 |
+ axis.text.y = element_blank(), |
|
279 |
+ axis.ticks.y = element_blank(), |
|
280 |
+ axis.line.y = element_blank() |
|
266 | 281 |
) |
267 | 282 |
|
268 | 283 |
## Prepare data.frame |
... | ... |
@@ -272,18 +287,18 @@ plotEnrichCountHeatmap <- function(hmm, annotation, bp.around.annotation=10000, |
272 | 287 |
comb2keep <- names(num.comb)[num.comb/sum(num.comb) > 0.005] |
273 | 288 |
counts <- counts[comb2keep] |
274 | 289 |
df <- reshape2::melt(counts) |
275 |
- names(df) <- c('id','position','counts','track','combination') |
|
290 |
+ names(df) <- c('id','position','RPKM','track','combination') |
|
276 | 291 |
df$id <- factor(df$id, levels=rev(unique(df$id))) |
277 | 292 |
df$combination <- factor(df$combination, levels=unique(df$combination)) |
278 | 293 |
df$track <- factor(df$track, levels=colnames(bins$counts)) |
279 | 294 |
|
280 | 295 |
## Plot as heatmap |
281 | 296 |
ggplt <- ggplot(df) + geom_tile(aes_string(x='position', y='id', color='combination')) |
282 |
- ggplt <- ggplt + geom_tile(aes_string(x='position', y='id', fill='counts'), alpha=0.6) |
|
297 |
+ ggplt <- ggplt + scale_color_manual(values = getDistinctColors(length(unique(df$combination)))) |
|
298 |
+ ggplt <- ggplt + geom_tile(aes_string(x='position', y='id', fill='RPKM'), alpha=0.6) |
|
283 | 299 |
ggplt <- ggplt + facet_wrap( ~ track, nrow=1) + custom_theme |
284 | 300 |
ggplt <- ggplt + xlab('distance from annotation in [bp]') + ylab('') |
285 |
- breaks <- sort(c(0, 10^(0:5), max(df$counts, na.rm = TRUE))) |
|
286 |
- ggplt <- ggplt + scale_fill_continuous(trans='log1p', breaks=breaks, labels=breaks, low='white', high='black') |
|
301 |
+ ggplt <- ggplt + scale_fill_continuous(trans='log1p', low='white', high='black') |
|
287 | 302 |
# Insert horizontal lines |
288 | 303 |
y.lines <- sapply(split(df$id, df$combination), function(x) { max(as.integer(x)) }) |
289 | 304 |
df.lines <- data.frame(y=sort(y.lines[-1]) + 0.5) |
... | ... |
@@ -300,15 +315,19 @@ plotEnrichCountHeatmap <- function(hmm, annotation, bp.around.annotation=10000, |
300 | 315 |
#' @inheritParams enrichmentAtAnnotation |
301 | 316 |
#' @importFrom reshape2 melt |
302 | 317 |
#' @export |
303 |
-plotFoldEnrichment <- function(hmm, annotation, bp.around.annotation=10000, region=c("start","inside","end"), num.intervals=20, what='combinations', combinations=NULL, marks=NULL) { |
|
318 |
+plotEnrichment <- function(hmm, annotation, bp.around.annotation=10000, region=c("start","inside","end"), num.intervals=20, what='combinations', combinations=NULL, marks=NULL, statistic='fold') { |
|
304 | 319 |
|
305 | 320 |
## Check user input |
306 | 321 |
if ((!what %in% c('combinations','peaks','counts')) | length(what) > 1) { |
307 | 322 |
stop("argument 'what' must be one of c('combinations','peaks','counts')") |
308 | 323 |
} |
324 |
+ if (!is.null(marks) & what!='peaks') { |
|
325 |
+ stop("Please set argument 'what=\"peaks\"' if you want to plot marks instead of combinations.") |
|
326 |
+ } |
|
309 | 327 |
|
310 | 328 |
## Variables |
311 | 329 |
hmm <- loadHmmsFromFiles(hmm, check.class=c(class.multivariate.hmm, class.combined.multivariate.hmm))[[1]] |
330 |
+ hmm$bins$counts <- rpkm.matrix(hmm$bins$counts, binsize=mean(width(hmm$bins))) |
|
312 | 331 |
bins <- hmm$bins |
313 | 332 |
if (class(hmm) == class.combined.multivariate.hmm) { |
314 | 333 |
} else if (class(hmm) == class.multivariate.hmm) { |
... | ... |
@@ -329,14 +348,15 @@ plotFoldEnrichment <- function(hmm, annotation, bp.around.annotation=10000, regi |
329 | 348 |
|
330 | 349 |
if (what %in% c('peaks','counts')) { |
331 | 350 |
### Get fold enrichment |
332 |
- enrich <- enrichmentAtAnnotation(hmm$bins, hmm$info, annotation, bp.around.annotation=bp.around.annotation, region=region, what=what, num.intervals=num.intervals) |
|
351 |
+ enrich <- enrichmentAtAnnotation(hmm$bins, hmm$info, annotation, bp.around.annotation=bp.around.annotation, region=region, what=what, num.intervals=num.intervals, statistic=statistic) |
|
333 | 352 |
} |
334 | 353 |
ggplts <- list() |
354 |
+ maxfolds <- list() |
|
335 | 355 |
for (condition in conditions) { |
336 | 356 |
if (what == 'combinations') { |
337 | 357 |
### Get fold enrichment |
338 | 358 |
bins$combination <- mcols(bins)[,paste0('combination.', condition)] |
339 |
- enrich.cond <- enrichmentAtAnnotation(bins, hmm$info, annotation, bp.around.annotation=bp.around.annotation, region=region, what=what, num.intervals=num.intervals) |
|
359 |
+ enrich.cond <- enrichmentAtAnnotation(bins, hmm$info, annotation, bp.around.annotation=bp.around.annotation, region=region, what=what, num.intervals=num.intervals, statistic=statistic) |
|
340 | 360 |
} else { |
341 | 361 |
enrich.cond <- enrich |
342 | 362 |
} |
... | ... |
@@ -344,15 +364,21 @@ plotFoldEnrichment <- function(hmm, annotation, bp.around.annotation=10000, regi |
344 | 364 |
df <- reshape2::melt(enrich.cond) |
345 | 365 |
df$L1 <- factor(df$L1, levels=c('start','inside','end')) |
346 | 366 |
df <- rbind(df[df$L1 == 'start',], df[df$L1 == 'inside',], df[df$L1 == 'end',]) |
347 |
- if (length(region)>=2) { |
|
367 |
+ if (length(region)>=2 & 'inside' %in% region) { |
|
348 | 368 |
df <- df[!(df$L1 == 'start' & df$lag > 0),] |
349 | 369 |
df <- df[!(df$L1 == 'end' & df$lag < 0),] |
350 | 370 |
df$position <- apply(data.frame(df$interval, df$lag), 1, max, na.rm = TRUE) |
371 |
+ } else if (length(region)==2 & ! 'inside' %in% region) { |
|
372 |
+ df <- df[!(df$L1 == 'start' & df$lag > 0),] |
|
373 |
+ df <- df[!(df$L1 == 'end' & df$lag < 0),] |
|
374 |
+ df$position <- df$lag |
|
351 | 375 |
} else if (length(region)==1) { |
352 | 376 |
df <- df[df$L1 == region,] |
353 | 377 |
df$position <- df$lag |
354 | 378 |
} |
355 |
- df$position[df$L1 == 'end'] <- df$position[df$L1 == 'end'] + bp.around.annotation |
|
379 |
+ if ('inside' %in% region) { |
|
380 |
+ df$position[df$L1 == 'end'] <- df$position[df$L1 == 'end'] + bp.around.annotation |
|
381 |
+ } |
|
356 | 382 |
df$position[df$L1 == 'inside'] <- df$position[df$L1 == 'inside'] * bp.around.annotation |
357 | 383 |
if (what == 'combinations') { |
358 | 384 |
df <- df[df$combination %in% comb.levels,] |
... | ... |
@@ -367,20 +393,42 @@ plotFoldEnrichment <- function(hmm, annotation, bp.around.annotation=10000, regi |
367 | 393 |
|
368 | 394 |
### Plot |
369 | 395 |
if (what == 'combinations') { |
370 |
- ggplt <- ggplot(df) + geom_line(aes_string(x='position', y='value', col='combination'), size=2) + ylab('fold enrichment') |
|
396 |
+ ggplt <- ggplot(df) + geom_line(aes_string(x='position', y='value', col='combination'), size=2) |
|
397 |
+ if (statistic == 'fold') { |
|
398 |
+ ggplt <- ggplt + ylab('fold enrichment') |
|
399 |
+ ggplt <- ggplt + geom_hline(yintercept=1, lty=2) |
|
400 |
+ } else if (statistic == 'fraction') { |
|
401 |
+ ggplt <- ggplt + ylab('fraction') |
|
402 |
+ } |
|
403 |
+ ggplt <- ggplt + scale_color_manual(values = getDistinctColors(length(unique(df$combination)))) |
|
371 | 404 |
} else if (what == 'peaks') { |
372 |
- ggplt <- ggplot(df) + geom_line(aes_string(x='position', y='value', col='mark'), size=2) + ylab('fraction of positions in peak') |
|
405 |
+ ggplt <- ggplot(df) + geom_line(aes_string(x='position', y='value', col='mark'), size=2) |
|
406 |
+ if (statistic == 'fold') { |
|
407 |
+ ggplt <- ggplt + ylab('fold enrichment') |
|
408 |
+ ggplt <- ggplt + geom_hline(yintercept=1, lty=2) |
|
409 |
+ } else if (statistic == 'fraction') { |
|
410 |
+ ggplt <- ggplt + ylab('fraction') |
|
411 |
+ } |
|
412 |
+ ggplt <- ggplt + scale_color_manual(values = getDistinctColors(length(unique(df$mark)))) |
|
373 | 413 |
} else if (what == 'counts') { |
374 |
- ggplt <- ggplot(df) + geom_line(aes_string(x='position', y='value', col='track'), size=2) + ylab('read count') |
|
414 |
+ ggplt <- ggplot(df) + geom_line(aes_string(x='position', y='value', col='track'), size=2) + ylab('RPKM') |
|
415 |
+ ggplt <- ggplt + scale_color_manual(values = getDistinctColors(length(unique(df$track)))) |
|
375 | 416 |
} |
376 | 417 |
ggplt <- ggplt + theme_bw() + xlab('distance from annotation in [bp]') |
377 |
- if (length(region)>=2) { |
|
418 |
+ if (length(region)>=2 & 'inside' %in% region) { |
|
378 | 419 |
breaks <- c(c(-1, -0.5, 0, 0.5, 1, 1.5, 2) * bp.around.annotation) |
379 | 420 |
labels <- c(-bp.around.annotation, -bp.around.annotation/2, '0%', '50%', '100%', bp.around.annotation/2, bp.around.annotation) |
380 | 421 |
ggplt <- ggplt + scale_x_continuous(breaks=breaks, labels=labels) |
381 | 422 |
} |
423 |
+ maxfolds[[condition]] <- max(df$value, na.rm=TRUE) |
|
382 | 424 |
ggplts[[condition]] <- ggplt |
383 | 425 |
} |
426 |
+ maxfolds <- unlist(maxfolds) |
|
427 |
+ if (statistic == 'fraction' & what %in% c('combinations','peaks')) { |
|
428 |
+ ggplts <- lapply(ggplts, function(ggplt) { ggplt + scale_y_continuous(limits=c(0,1)) }) |
|
429 |
+ } else { |
|
430 |
+ ggplts <- lapply(ggplts, function(ggplt) { ggplt + scale_y_continuous(limits=c(0,max(maxfolds, na.rm=TRUE)*1.1)) }) |
|
431 |
+ } |
|
384 | 432 |
if (class(hmm) == class.multivariate.hmm) { |
385 | 433 |
return(ggplts[[1]]) |
386 | 434 |
} else if (class(hmm) == class.combined.multivariate.hmm) { |
... | ... |
@@ -402,9 +450,10 @@ plotFoldEnrichment <- function(hmm, annotation, bp.around.annotation=10000, regi |
402 | 450 |
#' @param region A combination of \code{c('start','inside','end')} specifying the region of the annotation for which the enrichment will be calculated. Select \code{'start'} if you have a point-sized annotation like transcription start sites. Select \code{c('start','inside','end')} if you have long annotations like genes. |
403 | 451 |
#' @param what One of \code{c('combinations','peaks','counts')} specifying which statistic to calculate. |
404 | 452 |
#' @param num.intervals Number of intervals for enrichment 'inside' of annotation. |
453 |
+#' @param statistic The statistic to calculate. Either 'fold' for fold enrichments or 'fraction' for fraction of bins falling into the annotation. |
|
405 | 454 |
#' @return A \code{list()} containing \code{data.frame()}s for enrichment of combinatorial states and binary states at the start, end and inside of the annotation. |
406 | 455 |
#' @importFrom S4Vectors as.factor subjectHits queryHits |
407 |
-enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation=10000, region=c('start','inside','end'), what=c('combinations','peaks','counts'), num.intervals=21) { |
|
456 |
+enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation=10000, region=c('start','inside','end'), what=c('combinations','peaks','counts'), num.intervals=21, statistic='fold') { |
|
408 | 457 |
|
409 | 458 |
## Check user input |
410 | 459 |
if ((!what %in% c('combinations','peaks','counts')) | length(what) > 1) { |
... | ... |
@@ -427,6 +476,7 @@ enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation= |
427 | 476 |
binstates <- dec2bin(bins$state, colnames=info$ID) |
428 | 477 |
# Remove replicates |
429 | 478 |
binstates <- binstates[ ,info.dedup$ID] |
479 |
+ colsums.binstates <- colSums(binstates) |
|
430 | 480 |
} |
431 | 481 |
if ('counts' %in% what) { |
432 | 482 |
counts <- bins$counts |
... | ... |
@@ -455,14 +505,25 @@ enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation= |
455 | 505 |
index.inside.minus <- index.inside.minus[!is.na(index.inside.minus)] |
456 | 506 |
index <- c(index.inside.plus, index.inside.minus) |
457 | 507 |
index <- index[index>0 & index<=length(bins)] # index could cross chromosome boundaries, but we risk it |
458 |
- if ('peaks' %in% what) binstates.inside[as.character(interval),] <- colMeans(binstates[index,]) |
|
508 |
+ if ('peaks' %in% what) { |
|
509 |
+ if (statistic == 'fraction') { |
|
510 |
+ binstates.inside[as.character(interval),] <- colSums(binstates[index,]) / length(index) # or colMeans |
|
511 |
+ } else if (statistic == 'fold') { |
|
512 |
+ binstates.inside[as.character(interval),] <- colSums(binstates[index,]) / length(index) / colsums.binstates * length(bins) |
|
513 |
+ } |
|
514 |
+ } |
|
459 | 515 |
if ('combinations' %in% what) { |
460 |
- fold <- table(combinations[index]) / tcombinations / length(annotation) * length(bins) # fold enrichment |
|
461 |
-# fold <- table(combinations[index]) / length(annotation) # percentage enrichment |
|
516 |
+ if (statistic == 'fraction') { |
|
517 |
+ fold <- table(combinations[index]) / length(index) |
|
518 |
+ } else if (statistic == 'fold') { |
|
519 |
+ fold <- table(combinations[index]) / length(index) / tcombinations * length(bins) # fold enrichment |
|
520 |
+ } |
|
462 | 521 |
fold[is.na(fold)] <- 0 |
463 | 522 |
combinations.inside[as.character(interval),] <- fold |
464 | 523 |
} |
465 |
- if ('counts' %in% what) counts.inside[as.character(interval),] <- colMeans(counts[index,]) |
|
524 |
+ if ('counts' %in% what) { |
|
525 |
+ counts.inside[as.character(interval),] <- colMeans(counts[index,]) |
|
526 |
+ } |
|
466 | 527 |
} |
467 | 528 |
if ('peaks' %in% what) { |
468 | 529 |
enrich$peaks$inside <- binstates.inside |
... | ... |
@@ -491,14 +552,25 @@ enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation= |
491 | 552 |
for (ilag in -lag:lag) { |
492 | 553 |
index <- c(index.start.plus+ilag, index.start.minus-ilag) |
493 | 554 |
index <- index[index>0 & index<=length(bins)] |
494 |
- if ('peaks' %in% what) binstates.start[as.character(ilag),] <- colMeans(binstates[index,]) |
|
555 |
+ if ('peaks' %in% what) { |
|
556 |
+ if (statistic == 'fraction') { |
|
557 |
+ binstates.start[as.character(ilag),] <- colSums(binstates[index,]) / length(index) |
|
558 |
+ } else if (statistic == 'fold') { |
|
559 |
+ binstates.start[as.character(ilag),] <- colSums(binstates[index,]) / length(index) / colsums.binstates * length(bins) |
|
560 |
+ } |
|
561 |
+ } |
|
495 | 562 |
if ('combinations' %in% what) { |
496 |
- fold <- table(combinations[index]) / tcombinations / length(annotation) * length(bins) # fold enrichment |
|
497 |
-# fold <- table(combinations[index]) / length(annotation) # percentage enrichment |
|
563 |
+ if (statistic == 'fraction') { |
|
564 |
+ fold <- table(combinations[index]) / length(index) |
|
565 |
+ } else if (statistic == 'fold') { |
|
566 |
+ fold <- table(combinations[index]) / length(index) / tcombinations * length(bins) # fold enrichment |
|
567 |
+ } |
|
498 | 568 |
fold[is.na(fold)] <- 0 |
499 | 569 |
combinations.start[as.character(ilag),] <- fold |
500 | 570 |
} |
501 |
- if ('counts' %in% what) counts.start[as.character(ilag),] <- colMeans(counts[index,]) |
|
571 |
+ if ('counts' %in% what) { |
|
572 |
+ counts.start[as.character(ilag),] <- colMeans(counts[index,]) |
|
573 |
+ } |
|
502 | 574 |
} |
503 | 575 |
if ('peaks' %in% what) { |
504 | 576 |
rownames(binstates.start) <- as.numeric(rownames(binstates.start)) * binsize |
... | ... |
@@ -530,14 +602,25 @@ enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation= |
530 | 602 |
for (ilag in -lag:lag) { |
531 | 603 |
index <- c(index.end.plus+ilag, index.end.minus-ilag) |
532 | 604 |
index <- index[index>0 & index<=length(bins)] |
533 |
- if ('peaks' %in% what) binstates.end[as.character(ilag),] <- colMeans(binstates[index,]) |
|
605 |
+ if ('peaks' %in% what) { |
|
606 |
+ if (statistic == 'fraction') { |
|
607 |
+ binstates.end[as.character(ilag),] <- colSums(binstates[index,]) / length(index) |
|
608 |
+ } else if (statistic == 'fold') { |
|
609 |
+ binstates.end[as.character(ilag),] <- colSums(binstates[index,]) / length(index) / colsums.binstates * length(bins) |
|
610 |
+ } |
|
611 |
+ } |
|
534 | 612 |
if ('combinations' %in% what) { |
535 |
- fold <- table(combinations[index]) / tcombinations / length(annotation) * length(bins) # fold enrichment |
|
536 |
-# fold <- table(combinations[index]) / length(annotation) # percentage enrichment |
|
613 |
+ if (statistic == 'fraction') { |
|
614 |
+ fold <- table(combinations[index]) / length(index) |
|
615 |
+ } else if (statistic == 'fold') { |
|
616 |
+ fold <- table(combinations[index]) / length(index) / tcombinations * length(bins) # fold enrichment |
|
617 |
+ } |
|
537 | 618 |
fold[is.na(fold)] <- 0 |
538 | 619 |
combinations.end[as.character(ilag),] <- fold |
539 | 620 |
} |
540 |
- if ('counts' %in% what) counts.end[as.character(ilag),] <- colMeans(counts[index,]) |
|
621 |
+ if ('counts' %in% what) { |
|
622 |
+ counts.end[as.character(ilag),] <- colMeans(counts[index,]) |
|
623 |
+ } |
|
541 | 624 |
} |
542 | 625 |
if ('peaks' %in% what) { |
543 | 626 |
rownames(binstates.end) <- as.numeric(rownames(binstates.end)) * binsize |
... | ... |
@@ -14,35 +14,35 @@ insertchr <- function(gr) { |
14 | 14 |
#===================================================== |
15 | 15 |
# Export binned data |
16 | 16 |
#===================================================== |
17 |
-#' Export genome browser viewable files |
|
18 |
-#' |
|
19 |
-#' Export read counts as genome browser viewable file |
|
20 |
-#' |
|
21 |
-#' Export read counts from \code{\link{binned.data}} as a file which can be uploaded into a genome browser. Read counts are exported in WIGGLE format (.wig.gz). |
|
22 |
-#' |
|
23 |
-#' @author Aaron Taudt |
|
24 |
-#' @param binned.data.list A \code{list()} of \code{\link{binned.data}} objects or vector of files that contain such objects. |
|
25 |
-#' @param filename The name of the file that will be written. The ending ".wig.gz" for read counts will be appended. Any existing file will be overwritten. |
|
26 |
-#' @param header A logical indicating whether the output file will have a heading track line (\code{TRUE}) or not (\code{FALSE}). |
|
27 |
-#' @param separate.files A logical indicating whether or not to produce separate files for each object in \code{binned.data.list}. |
|
28 |
-#' @return \code{NULL} |
|
29 |
-#' @seealso \code{\link{exportUnivariates}}, \code{\link{exportMultivariate}} |
|
30 |
-#' @importFrom utils write.table |
|
31 |
-#' @importFrom grDevices col2rgb |
|
32 |
-#' @export |
|
33 |
-#' @examples |
|
34 |
-#'## Get an example BAM file |
|
35 |
-#'file <- system.file("extdata", "euratrans", |
|
36 |
-#' "lv-H3K27me3-BN-male-bio2-tech1.bam", |
|
37 |
-#' package="chromstaRData") |
|
38 |
-#'## Bin the file into bin size 1000bp |
|
39 |
-#'data(rn4_chrominfo) |
|
40 |
-#'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000, |
|
41 |
-#' chromosomes='chr12') |
|
42 |
-#'## Export the binned read counts |
|
43 |
-#'exportBinnedData(list(binned), filename=tempfile()) |
|
44 |
-#' |
|
45 |
-exportBinnedData <- function(binned.data.list, filename, header=TRUE, separate.files=TRUE) { |
|
17 |
+# #' Export genome browser viewable files |
|
18 |
+# #' |
|
19 |
+# #' Export read counts (RPKM) as genome browser viewable file |
|
20 |
+# #' |
|
21 |
+# #' Export read counts from \code{\link{binned.data}} as a file which can be uploaded into a genome browser. Read counts are exported in WIGGLE format as RPKM (.wig.gz). |
|
22 |
+# #' |
|
23 |
+# #' @author Aaron Taudt |
|
24 |
+# #' @param binned.data.list A \code{list()} of \code{\link{binned.data}} objects or vector of files that contain such objects. |
|
25 |
+# #' @param filename The name of the file that will be written. The ending ".wig.gz" for read counts will be appended. Any existing file will be overwritten. |
|
26 |
+# #' @param header A logical indicating whether the output file will have a heading track line (\code{TRUE}) or not (\code{FALSE}). |
|
27 |
+# #' @param separate.files A logical indicating whether or not to produce separate files for each object in \code{binned.data.list}. |
|
28 |
+# #' @param trackname Name that will be used in the "track name" field of the file. |
|
29 |
+# #' @return \code{NULL} |
|
30 |
+# #' @seealso \code{\link{exportUnivariates}}, \code{\link{exportMultivariate}} |
|
31 |
+# #' @importFrom utils write.table |
|
32 |
+# #' @importFrom grDevices col2rgb |
|
33 |
+# #' @examples |
|
34 |
+# #'## Get an example BAM file |
|
35 |
+# #'file <- system.file("extdata", "euratrans", |
|
36 |
+# #' "lv-H3K27me3-BN-male-bio2-tech1.bam", |
|
37 |
+# #' package="chromstaRData") |
|
38 |
+# #'## Bin the file into bin size 1000bp |
|
39 |
+# #'data(rn4_chrominfo) |
|
40 |
+# #'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000, |
|
41 |
+# #' chromosomes='chr12') |
|
42 |
+# #'## Export the binned read counts |
|
43 |
+# #'exportBinnedData(list(binned), filename=tempfile()) |
|
44 |
+# #' |
|
45 |
+exportBinnedData <- function(binned.data.list, filename, header=TRUE, separate.files=TRUE, trackname=NULL) { |
|
46 | 46 |
|
47 | 47 |
## Load data |
48 | 48 |
binned.data.list <- loadHmmsFromFiles(binned.data.list) |
... | ... |
@@ -78,12 +78,18 @@ exportBinnedData <- function(binned.data.list, filename, header=TRUE, separate.f |
78 | 78 |
binsize <- width(b[1]) |
79 | 79 |
name <- names(binned.data.list)[imod] |
80 | 80 |
if (header) { |
81 |
- cat(paste0('track type=wiggle_0 name="read count for ',name,'" description="read count for ',name,'" visibility=full autoScale=on color=',readcol,' maxHeightPixels=100:50:20 graphType=bar priority=',priority,'\n'), file=filename.gz, append=TRUE) |
|
81 |
+ if (is.null(trackname)) { |
|
82 |
+ trackname.string <- paste0("read count for ", name) |
|
83 |
+ } else { |
|
84 |
+ trackname.string <- paste0("read count for ", name, ", ", trackname) |
|
85 |
+ } |
|
86 |
+ cat(paste0('track type=wiggle_0 name="',trackname.string,'" description="',trackname.string,'" visibility=full autoScale=on color=',readcol,' maxHeightPixels=100:50:20 graphType=bar priority=',priority,'\n'), file=filename.gz, append=TRUE) |
|
82 | 87 |
} |
83 | 88 |
# Write read data |
89 |
+ b$counts <- rpkm.vector(b$counts, binsize=mean(width(b))) # RPKM normalization |
|
84 | 90 |
for (chrom in unique(b$chromosome)) { |
85 | 91 |
cat(paste0("fixedStep chrom=",chrom," start=1 step=",binsize," span=",binsize,"\n"), file=filename.gz, append=TRUE) |
86 |
- utils::write.table(mcols(b[b$chromosome==chrom])$counts, file=filename.gz, append=TRUE, row.names=FALSE, col.names=FALSE, sep='\t') |
|
92 |
+ utils::write.table(b[b$chromosome==chrom]$counts, file=filename.gz, append=TRUE, row.names=FALSE, col.names=FALSE, sep='\t') |
|
87 | 93 |
} |
88 | 94 |
if (separate.files) { |
89 | 95 |
close(filename.gz) |
... | ... |
@@ -98,41 +104,41 @@ exportBinnedData <- function(binned.data.list, filename, header=TRUE, separate.f |
98 | 104 |
#===================================================== |
99 | 105 |
# Export univariate HMMs |
100 | 106 |
#===================================================== |
101 |
-#' Export genome browser viewable files |
|
102 |
-#' |
|
103 |
-#' Export univariate peak-calls and read counts as genome browser viewable file |
|
104 |
-#' |
|
105 |
-#' Export \code{\link{uniHMM}} objects as files which can be uploaded into a genome browser. Peak-calls are exported in BED format (.bed.gz) and read counts are exported in WIGGLE format (.wig.gz). |
|
106 |
-#' |
|
107 |
-#' @author Aaron Taudt |
|
108 |
-#' @param hmm.list A \code{list()} of \code{\link{uniHMM}} objects or vector of files that contain such objects. |
|
109 |
-#' @param filename The name of the file that will be written. The appropriate ending will be appended, either ".bed.gz" for peak-calls or ".wig.gz" for read counts. Any existing file will be overwritten. |
|
110 |
-#' @param what A character vector specifying what will be exported. Supported are \code{c('peaks', 'counts')}. |
|
111 |
-#' @param header A logical indicating whether the output file will have a heading track line (\code{TRUE}) or not (\code{FALSE}). |
|
112 |
-#' @param separate.files A logical indicating whether or not to produce separate files for each hmm in \code{hmm.list}. |
|
113 |
-#' @return \code{NULL} |
|
114 |
-#' @seealso \code{\link{exportBinnedData}}, \code{\link{exportMultivariate}} |
|
115 |
-#' @export |
|
116 |
-#' @examples |
|
117 |
-#'## Get an example BAM file |
|
118 |
-#'file <- system.file("extdata", "euratrans", |
|
119 |
-#' "lv-H3K27me3-BN-male-bio2-tech1.bam", |
|
120 |
-#' package="chromstaRData") |
|
121 |
-#'## Bin the file into bin size 1000bp |
|
122 |
-#'data(rn4_chrominfo) |
|
123 |
-#'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000, |
|
124 |
-#' chromosomes='chr12') |
|
125 |
-#'## Fit the univariate Hidden Markov Model |
|
126 |
-#'hmm <- callPeaksUnivariate(binned, max.time=60, eps=1) |
|
127 |
-#'## Export |
|
128 |
-#'exportUnivariates(list(hmm), filename=tempfile(), what=c('peaks','counts')) |
|
129 |
-#' |
|
130 |
-exportUnivariates <- function(hmm.list, filename, what=c('peaks', 'counts'), header=TRUE, separate.files=TRUE) { |
|
107 |
+# #' Export genome browser viewable files |
|
108 |
+# #' |
|
109 |
+# #' Export univariate peak-calls and read counts (RPKM) as genome browser viewable file |
|
110 |
+# #' |
|
111 |
+# #' Export \code{\link{uniHMM}} objects as files which can be uploaded into a genome browser. Peak-calls are exported in BED format (.bed.gz) and read counts are exported in WIGGLE format as RPKM (.wig.gz). |
|
112 |
+# #' |
|
113 |
+# #' @author Aaron Taudt |
|
114 |
+# #' @param hmm.list A \code{list()} of \code{\link{uniHMM}} objects or vector of files that contain such objects. |
|
115 |
+# #' @param filename The name of the file that will be written. The appropriate ending will be appended, either ".bed.gz" for peak-calls or ".wig.gz" for read counts. Any existing file will be overwritten. |
|
116 |
+# #' @param what A character vector specifying what will be exported. Supported are \code{c('peaks', 'counts')}. |
|
117 |
+# #' @param header A logical indicating whether the output file will have a heading track line (\code{TRUE}) or not (\code{FALSE}). |
|
118 |
+# #' @param separate.files A logical indicating whether or not to produce separate files for each hmm in \code{hmm.list}. |
|
119 |
+# #' @param trackname Name that will be used in the "track name" field of the file. |
|
120 |
+# #' @return \code{NULL} |
|
121 |
+# #' @seealso \code{\link{exportBinnedData}}, \code{\link{exportMultivariate}} |
|
122 |
+# #' @examples |
|
123 |
+# #'## Get an example BAM file |
|
124 |
+# #'file <- system.file("extdata", "euratrans", |
|
125 |
+# #' "lv-H3K27me3-BN-male-bio2-tech1.bam", |
|
126 |
+# #' package="chromstaRData") |
|
127 |
+# #'## Bin the file into bin size 1000bp |
|
128 |
+# #'data(rn4_chrominfo) |
|
129 |
+# #'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000, |
|
130 |
+# #' chromosomes='chr12') |
|
131 |
+# #'## Fit the univariate Hidden Markov Model |
|
132 |
+# #'hmm <- callPeaksUnivariate(binned, max.time=60, eps=1) |
|
133 |
+# #'## Export |
|
134 |
+# #'exportUnivariates(list(hmm), filename=tempfile(), what=c('peaks','counts')) |
|
135 |
+# #' |
|
136 |
+exportUnivariates <- function(hmm.list, filename, what=c('peaks', 'counts'), header=TRUE, separate.files=TRUE, trackname=NULL) { |
|
131 | 137 |
if ('peaks' %in% what) { |
132 |
- exportUnivariatePeaks(hmm.list, filename=paste0(filename, '_peaks'), header=header, separate.files=separate.files) |
|
138 |
+ exportUnivariatePeaks(hmm.list, filename=paste0(filename, '_peaks'), header=header, separate.files=separate.files, trackname=trackname) |
|
133 | 139 |
} |
134 | 140 |
if ('counts' %in% what) { |
135 |
- exportUnivariateCounts(hmm.list, filename=paste0(filename, '_counts'), header=header, separate.files=separate.files) |
|
141 |
+ exportUnivariateCounts(hmm.list, filename=paste0(filename, '_counts'), header=header, separate.files=separate.files, trackname=trackname) |
|
136 | 142 |
} |
137 | 143 |
} |
138 | 144 |
|
... | ... |
@@ -141,14 +147,14 @@ exportUnivariates <- function(hmm.list, filename, what=c('peaks', 'counts'), hea |
141 | 147 |
#---------------------------------------------------- |
142 | 148 |
#' @importFrom utils write.table |
143 | 149 |
#' @importFrom grDevices col2rgb |
144 |
-exportUnivariatePeaks <- function(hmm.list, filename, header=TRUE, separate.files=TRUE) { |
|
150 |
+exportUnivariatePeaks <- function(hmm.list, filename, header=TRUE, separate.files=TRUE, trackname=NULL) { |
|
145 | 151 |
|
146 | 152 |
## Load models |
147 | 153 |
hmm.list <- loadHmmsFromFiles(hmm.list, check.class=class.univariate.hmm) |
148 | 154 |
|
149 | 155 |
## Transform to GRanges |
150 |
- hmm.grl <- lapply(hmm.list, '[[', 'segments') |
|
151 |
- hmm.grl <- lapply(hmm.grl, insertchr) |
|
156 |
+ peaklist <- lapply(hmm.list, '[[', 'peaks') |
|
157 |
+ peaklist <- lapply(peaklist, insertchr) |
|
152 | 158 |
|
153 | 159 |
# Variables |
154 | 160 |
nummod <- length(hmm.list) |
... | ... |
@@ -157,16 +163,6 @@ exportUnivariatePeaks <- function(hmm.list, filename, header=TRUE, separate.file |
157 | 163 |
filename.gz <- gzfile(filename, 'w') |
158 | 164 |
} |
159 | 165 |
|
160 |
- # Generate the colors |
|
161 |
- colors <- getStateColors(levels(hmm.grl[[1]]$state)) |
|
162 |
- RGBs <- t(grDevices::col2rgb(colors)) |
|
163 |
- RGBlist <- list() |
|
164 |
- for (i1 in 1:3) { |
|
165 |
- RGBlist[[i1]] <- RGBs[,i1] |
|
166 |
- } |
|
167 |
- RGBlist$sep <- ',' |
|
168 |
- RGBs <- do.call(paste, RGBlist) |
|
169 |
- |
|
170 | 166 |
# Write first line to file |
171 | 167 |
if (!separate.files) { |
172 | 168 |
ptm <- startTimedMessage('Writing to file ',filename, ' ...') |
... | ... |
@@ -187,24 +183,33 @@ exportUnivariatePeaks <- function(hmm.list, filename, header=TRUE, separate.file |
187 | 183 |
ptm <- startTimedMessage('Writing to file ',filename.sep, ' ...') |
188 | 184 |
cat("", file=filename.gz) |
189 | 185 |
} |
190 |
- hmm.gr <- hmm.grl[[imod]] |
|
186 |
+ peaks <- peaklist[[imod]] |
|
191 | 187 |
priority <- 51 + 4*imod |
192 | 188 |
if (header) { |
193 |
- cat(paste0("track name=\"univariate calls for ",ID,"\" description=\"univariate calls for ",ID,"\" visibility=1 itemRgb=On priority=",priority,"\n"), file=filename.gz, append=TRUE) |
|
189 |
+ if (is.null(trackname)) { |
|
190 |
+ trackname.string <- paste0("univariate peak calls for ", ID) |
|
191 |
+ } else { |
|
192 |
+ trackname.string <- paste0("univariate peak calls for ", ID, ", ", trackname) |
|
193 |
+ } |
|
194 |
+ cat(paste0("track name=\"",trackname.string,"\" description=\"",trackname.string,"\" visibility=1 itemRgb=On priority=",priority,"\n"), file=filename.gz, append=TRUE) |
|
194 | 195 |
} |
195 |
- if (is.null(hmm.gr$score)) { |
|
196 |
- hmm.gr$score <- 0 |
|
196 |
+ if (is.null(peaks$peakScores)) { |
|
197 |
+ peaks$peakScores <- 0 |
|
197 | 198 |
} |
198 |
- collapsed.calls <- as.data.frame(hmm.gr)[c('chromosome','start','end','state','score')] |
|
199 |
+ df <- as.data.frame(peaks) |
|
200 |
+ df$peakNumber <- paste0('peak_', 1:nrow(df)) |
|
201 |
+ df$strand <- sub('\\*', '.', df$strand) |
|
202 |
+ df <- df[,c('chromosome','start','end','peakNumber','peakScores','strand')] |
|
199 | 203 |
# Make score integer |
200 |
- collapsed.calls$score <- round(collapsed.calls$score*1000) |
|
201 |
- itemRgb <- RGBs[as.character(collapsed.calls$state)] |
|
202 |
- numsegments <- nrow(collapsed.calls) |
|
203 |
- df <- cbind(collapsed.calls, strand=rep(".",numsegments), thickStart=collapsed.calls$start, thickEnd=collapsed.calls$end, itemRgb=itemRgb) |
|
204 |
+ df$peakScores <- round(df$peakScores*1000) |
|
205 |
+ numsegments <- nrow(df) |
|
206 |
+ df <- cbind(df, thickStart=df$start, thickEnd=df$end) |
|
204 | 207 |
# Convert from 1-based closed to 0-based half open |
205 | 208 |
df$start <- df$start - 1 |
206 | 209 |
df$thickStart <- df$thickStart - 1 |
207 |
- df <- df[df$state=='modified',] |
|
210 |
+ # Colors |
|
211 |
+ RGB <- t(grDevices::col2rgb(getStateColors('modified'))) |
|
212 |
+ df$itemRgb <- apply(RGB,1,paste,collapse=",") |
|
208 | 213 |
if (nrow(df) == 0) { |