Browse code

bugfix in fold enrichment

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/chromstaR@122816 bc3139a8-67e5-0310-9ffc-ced21a209358

Aaron Taudt authored on 19/10/2016 13:12:20
Showing 47 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: chromstaR
2 2
 Type: Package
3 3
 Title: Combinatorial and Differential Chromatin State Analysis for ChIP-Seq Data
4
-Version: 1.1.0
5
-Date: 2014-05-03
4
+Version: 1.1.1
5
+Date: 2016-10-19
6 6
 Author: Aaron Taudt, Maria Colome Tatche, Matthias Heinig, Minh Anh Nguyen
7 7
 Maintainer: Aaron Taudt <aaron.taudt@gmail.com>
8 8
 Description: This package implements functions for combinatorial and differential analysis of ChIP-seq data. It includes uni- and multivariate peak-calling, export to genome browser viewable files, and functions for enrichment analyses.
... ...
@@ -6,17 +6,17 @@ export(binReads)
6 6
 export(callPeaksMultivariate)
7 7
 export(callPeaksReplicates)
8 8
 export(callPeaksUnivariate)
9
-export(changePostCutoff)
9
+export(changeFDR)
10 10
 export(collapseBins)
11 11
 export(combineMultivariates)
12 12
 export(dec2bin)
13
-export(exportBinnedData)
14
-export(exportCombinedMultivariate)
13
+export(exportCombinations)
14
+export(exportCounts)
15 15
 export(exportGRangesAsBedFile)
16
-export(exportMultivariate)
17
-export(exportUnivariates)
16
+export(exportPeaks)
18 17
 export(fixedWidthBins)
19 18
 export(genomicFrequencies)
19
+export(getCombinations)
20 20
 export(getDistinctColors)
21 21
 export(getStateColors)
22 22
 export(heatmapCombinations)
... ...
@@ -24,9 +24,9 @@ export(heatmapCountCorrelation)
24 24
 export(heatmapTransitionProbs)
25 25
 export(loadHmmsFromFiles)
26 26
 export(plotEnrichCountHeatmap)
27
+export(plotEnrichment)
27 28
 export(plotExpression)
28 29
 export(plotFoldEnrichHeatmap)
29
-export(plotFoldEnrichment)
30 30
 export(plotHistogram)
31 31
 export(readBamFileAsGRanges)
32 32
 export(readBedFileAsGRanges)
33 33
new file mode 100644
... ...
@@ -0,0 +1,26 @@
1
+CHANGES IN VERSION 1.1.1
2
+------------------------
3
+
4
+NEW FEATURES
5
+
6
+    o Selection of peaks can be done with 'changeFDR'.
7
+
8
+    o Peak calls are available in each chromstaR-object as list entry '$peaks'.
9
+
10
+SIGNIFICANT USER-LEVEL CHANGES
11
+
12
+    o 'plotFoldEnrichment' renamed to 'plotEnrichment'.
13
+
14
+    o 'exportBinnedData', 'exportUnivariates', 'exportMultivariates', 'exportCombinedMultivariates' replaced by 'exportCounts', 'exportPeaks', 'exportCombinations'.
15
+
16
+BUG FIXES
17
+
18
+    o Proper computation of fold enrichments, with < 1 indicating depletion and > 1 indicating enrichment.
19
+
20
+DEPRECATED AND DEFUNCT
21
+
22
+    o 'changePostCutoff'.
23
+
24
+    o 'plotFoldEnrichment'.
25
+
26
+    o 'exportBinnedData', 'exportUnivariates', 'exportMultivariates', 'exportCombinedMultivariates'.
... ...
@@ -11,8 +11,7 @@
11 11
 #' @param per.chrom If \code{per.chrom=TRUE} chromosomes will be treated separately. This tremendously speeds up the calculation but results might be noisier as compared to \code{per.chrom=FALSE}, where all chromosomes are concatenated for the HMM.
12 12
 #' @param chromosomes A vector specifying the chromosomes to use from the models in \code{hmms}. The default (\code{NULL}) uses all available chromosomes.
13 13
 #' @param eps Convergence threshold for the Baum-Welch algorithm.
14
-#' @param post.cutoff False discovery rate. The default \code{NULL} means that the state with maximum posterior probability will be chosen, irrespective of its absolute probability.
15
-#' @param keep.posteriors If set to \code{TRUE}, posteriors will be available in the output. This is useful to change the post.cutoff later, but increases the necessary disk space to store the result immense.
14
+#' @param keep.posteriors If set to \code{TRUE}, posteriors will be available in the output. This can be useful to change the posterior cutoff later, but increases the necessary disk space to store the result immensely.
16 15
 #' @param num.threads Number of threads to use. Setting this to >1 may give increased performance.
17 16
 #' @param max.time The maximum running time in seconds for the Baum-Welch algorithm. If this time is reached, the Baum-Welch will terminate after the current iteration finishes. The default \code{NULL} is no limit.
18 17
 #' @param max.iter The maximum number of iterations for the Baum-Welch algorithm. The default \code{NULL} is no limit.
... ...
@@ -50,7 +49,7 @@
50 49
 #'heatmapTransitionProbs(multimodel)
51 50
 #'heatmapCountCorrelation(multimodel)
52 51
 #'
53
-callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=TRUE, chromosomes=NULL, eps=0.01, post.cutoff=NULL, keep.posteriors=TRUE, num.threads=1, max.time=NULL, max.iter=NULL, keep.densities=FALSE, verbosity=1) {
52
+callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=TRUE, chromosomes=NULL, eps=0.01, keep.posteriors=FALSE, num.threads=1, max.time=NULL, max.iter=NULL, keep.densities=FALSE, verbosity=1) {
54 53
 
55 54
     ## Intercept user input
56 55
     if (!is.null(use.states)) {
... ...
@@ -71,9 +70,6 @@ callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=T
71 70
     if (check.logical(keep.posteriors)!=0) stop("argument 'keep.posteriors' expects a logical (TRUE or FALSE)")
72 71
     if (check.logical(keep.densities)!=0) stop("argument 'keep.densities' expects a logical (TRUE or FALSE)")
73 72
     if (check.integer(verbosity)!=0) stop("argument 'verbosity' expects an integer")
74
-    if (!is.null(post.cutoff)) {
75
-        if (post.cutoff>1 | post.cutoff<0) stop("argument 'post.cutoff' has to be between 0 and 1 if specified")
76
-    }
77 73
     if (length(hmms)==0) {
78 74
         stop("argument 'hmms' is of length=0. Cannot call multivariate peaks with no models.")
79 75
     }
... ...
@@ -90,9 +86,6 @@ callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=T
90 86
             result$bins$state <- factor(c(0,0,1))[hmm$bins$state]
91 87
             result$bins$posteriors <- matrix(hmm$bins$posterior.modified, ncol=1, dimnames=list(NULL, result$info$ID))
92 88
             result$bins$differential.score <- 0
93
-            if (!is.null(post.cutoff)) {
94
-                result$bins$state <- factor(as.integer(result$bins$posteriors >= post.cutoff))
95
-            }
96 89
         ## Add combinations
97 90
             mapping <- NULL
98 91
             if (!is.null(use.states)) {
... ...
@@ -125,8 +118,6 @@ callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=T
125 118
             # Distributions
126 119
             result$distributions <- list(hmm$distributions)
127 120
             names(result$distributions) <- result$info$ID
128
-            # post.cutoff
129
-            result$post.cutoff <- post.cutoff
130 121
         ## Convergence info
131 122
             convergenceInfo <- list(eps=NA, loglik=NA, loglik.delta=NA, num.iterations=NA, time.sec=NA)
132 123
             result$convergenceInfo <- convergenceInfo
... ...
@@ -167,7 +158,7 @@ callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=T
167 158
             ptm <- startTimedMessage("Running multivariate ...")
168 159
             models <- foreach (chrom = chromosomes, .packages='chromstaR') %dopar% {
169 160
                 bins <- p$bins[seqnames(p$bins)==chrom]
170
-                model <- runMultivariate(bins=bins, info=p$info, comb.states=p$comb.states, use.states=p$use.states, distributions=p$distributions, weights=p$weights, correlationMatrix=p$correlationMatrix, correlationMatrixInverse=p$correlationMatrixInverse, determinant=p$determinant, max.iter=max.iter, max.time=max.time, eps=eps, num.threads=1, post.cutoff=post.cutoff, keep.posteriors=keep.posteriors, keep.densities=keep.densities, verbosity=verbosity)
161
+                model <- runMultivariate(bins=bins, info=p$info, comb.states=p$comb.states, use.states=p$use.states, distributions=p$distributions, weights=p$weights, correlationMatrix=p$correlationMatrix, correlationMatrixInverse=p$correlationMatrixInverse, determinant=p$determinant, max.iter=max.iter, max.time=max.time, eps=eps, num.threads=1, keep.posteriors=keep.posteriors, keep.densities=keep.densities, verbosity=verbosity)
171 162
                 model
172 163
             }
173 164
             stopTimedMessage(ptm)
... ...
@@ -176,7 +167,7 @@ callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=T
176 167
             for (chrom in chromosomes) {
177 168
                 ptm <- startTimedMessage("Chromosome = ", chrom, "\n")
178 169
                 bins <- p$bins[seqnames(p$bins)==chrom]
179
-                model <- suppressMessages( runMultivariate(bins=bins, info=p$info, comb.states=p$comb.states, use.states=p$use.states, distributions=p$distributions, weights=p$weights, correlationMatrix=p$correlationMatrix, correlationMatrixInverse=p$correlationMatrixInverse, determinant=p$determinant, max.iter=max.iter, max.time=max.time, eps=eps, num.threads=1, post.cutoff=post.cutoff, keep.posteriors=keep.posteriors, keep.densities=keep.densities, verbosity=verbosity) )
170
+                model <- suppressMessages( runMultivariate(bins=bins, info=p$info, comb.states=p$comb.states, use.states=p$use.states, distributions=p$distributions, weights=p$weights, correlationMatrix=p$correlationMatrix, correlationMatrixInverse=p$correlationMatrixInverse, determinant=p$determinant, max.iter=max.iter, max.time=max.time, eps=eps, num.threads=1, keep.posteriors=keep.posteriors, keep.densities=keep.densities, verbosity=verbosity) )
180 171
                 message("Time spent for chromosome = ", chrom, ":", appendLF=FALSE)
181 172
                 stopTimedMessage(ptm)
182 173
                 models[[chrom]] <- model
... ...
@@ -191,7 +182,7 @@ callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=T
191 182
     ## Run multivariate for all chromosomes
192 183
     } else {
193 184
 
194
-        model <- runMultivariate(bins=p$bins, info=p$info, comb.states=p$comb.states, use.states=p$use.states, distributions=p$distributions, weights=p$weights, correlationMatrix=p$correlationMatrix, correlationMatrixInverse=p$correlationMatrixInverse, determinant=p$determinant, max.iter=max.iter, max.time=max.time, eps=eps, num.threads=num.threads, post.cutoff=post.cutoff, keep.posteriors=keep.posteriors, keep.densities=keep.densities, verbosity=verbosity)
185
+        model <- runMultivariate(bins=p$bins, info=p$info, comb.states=p$comb.states, use.states=p$use.states, distributions=p$distributions, weights=p$weights, correlationMatrix=p$correlationMatrix, correlationMatrixInverse=p$correlationMatrixInverse, determinant=p$determinant, max.iter=max.iter, max.time=max.time, eps=eps, num.threads=num.threads, keep.posteriors=keep.posteriors, keep.densities=keep.densities, verbosity=verbosity)
195 186
 
196 187
     }
197 188
 
... ...
@@ -200,7 +191,7 @@ callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=T
200 191
 }
201 192
 
202 193
 
203
-runMultivariate <- function(bins, info, comb.states, use.states, distributions, weights, correlationMatrix, correlationMatrixInverse, determinant, max.iter, max.time, eps, num.threads, post.cutoff, keep.posteriors, keep.densities, transitionProbs.initial=NULL, startProbs.initial=NULL, verbosity=1) {
194
+runMultivariate <- function(bins, info, comb.states, use.states, distributions, weights, correlationMatrix, correlationMatrixInverse, determinant, max.iter, max.time, eps, num.threads, keep.posteriors, keep.densities, transitionProbs.initial=NULL, startProbs.initial=NULL, verbosity=1) {
204 195
 
205 196
     ptm <- startTimedMessage("Starting multivariate HMM with ", length(comb.states), " combinatorial states")
206 197
     message("")
... ...
@@ -273,6 +264,12 @@ runMultivariate <- function(bins, info, comb.states, use.states, distributions,
273 264
         result$info <- info
274 265
     ## Bin coordinates, posteriors and states
275 266
         result$bins <- bins
267
+        if (!is.null(use.states$state)) {
268
+            state.levels <- levels(use.states$state)
269
+        } else {
270
+            state.levels <- hmm$comb.states
271
+        }
272
+        result$bins$state <- factor(hmm$states, levels=state.levels)
276 273
         if (get.posteriors) {
277 274
             ptm <- startTimedMessage("Transforming posteriors to `per sample` representation ...")
278 275
             hmm$posteriors <- matrix(hmm$posteriors, ncol=hmm$max.states)
... ...
@@ -281,20 +278,11 @@ runMultivariate <- function(bins, info, comb.states, use.states, distributions,
281 278
             post.per.track <- hmm$posteriors %*% binstates
282 279
             colnames(post.per.track) <- result$info$ID
283 280
             result$bins$posteriors <- post.per.track
284
-            result$bins$differential.score <- differentialScoreSum(result$bins$posteriors, result$info)
281
+            result$bins$peakScores <- getPeakScores(result$bins)
282
+            result$bins$differential.score <- differentialScoreSum(result$bins$peakScores, result$info)
285 283
             stopTimedMessage(ptm)
286 284
         }
287 285
         ptm <- startTimedMessage("Calculating states from posteriors ...")
288
-        if (!is.null(use.states$state)) {
289
-            state.levels <- levels(use.states$state)
290
-        } else {
291
-            state.levels <- hmm$comb.states
292
-        }
293
-        if (!is.null(post.cutoff)) {
294
-            result$bins$state <- factor(bin2dec(result$bins$posteriors >= post.cutoff), levels=state.levels)
295
-        } else {
296
-            result$bins$state <- factor(hmm$states, levels=state.levels)
297
-        }
298 286
         stopTimedMessage(ptm)
299 287
         if (keep.densities) {
300 288
             result$bins$densities <- matrix(hmm$densities, ncol=hmm$max.states)
... ...
@@ -314,6 +302,16 @@ runMultivariate <- function(bins, info, comb.states, use.states, distributions,
314 302
         if (!keep.posteriors) {
315 303
             result$bins$posteriors <- NULL
316 304
         }
305
+    ## Peaks
306
+        result$peaks <- list()
307
+        for (i1 in 1:ncol(result$segments$peakScores)) {
308
+            mask <- result$segments$peakScores[,i1] > 0
309
+            peaks <- result$segments[mask]
310
+            mcols(peaks) <- NULL
311
+            peaks$peakScores <- result$segments$peakScores[mask,i1]
312
+            result$peaks[[i1]] <- peaks
313
+        }
314
+        names(result$peaks) <- colnames(result$segments$peakScores)
317 315
     ## Parameters
318 316
         result$mapping <- mapping
319 317
         combinations <- mapping[as.character(comb.states)]
... ...
@@ -337,8 +335,6 @@ runMultivariate <- function(bins, info, comb.states, use.states, distributions,
337 335
         # Distributions
338 336
         result$distributions <- distributions
339 337
         names(result$distributions) <- result$info$ID
340
-        # post.cutoff
341
-        result$post.cutoff <- post.cutoff
342 338
     ## Convergence info
343 339
         convergenceInfo <- list(eps=eps, loglik=hmm$loglik, loglik.delta=hmm$loglik.delta, num.iterations=hmm$num.iterations, time.sec=hmm$time.sec)
344 340
         result$convergenceInfo <- convergenceInfo
... ...
@@ -420,7 +420,6 @@ callPeaksUnivariateAllChr <- function(binned.data, input.data=NULL, eps=0.01, in
420 420
                                                         ranges=ranges(binned.data),
421 421
                                                         counts=hmm$counts,
422 422
                                                         state=states) 
423
-        result$bins$score <- apply(hmm$posteriors, 1, max)
424 423
         result$bins$posteriors <- hmm$posteriors
425 424
         if (!control) {
426 425
             result$bins$posterior.modified <- hmm$posteriors[,'modified']
... ...
@@ -430,17 +429,22 @@ callPeaksUnivariateAllChr <- function(binned.data, input.data=NULL, eps=0.01, in
430 429
         }
431 430
         seqlengths(result$bins) <- seqlengths(binned.data)
432 431
         stopTimedMessage(ptm)
432
+    ## Peak score as maximum posterior in that peak
433
+        result$bins$peakScores <- getPeakScore.univariate(result$bins)
433 434
     ## Segmentation
434 435
         ptm <- startTimedMessage("Making segmentation ...")
435 436
         df <- as.data.frame(result$bins)
436
-        red.df <- suppressMessages(collapseBins(df, column2collapseBy='state', columns2getMax=c('score'), columns2drop=c('width',grep('posterior', names(df), value=TRUE), 'counts')))
437
-        red.gr <- GRanges(seqnames=red.df[,1], ranges=IRanges(start=red.df[,2], end=red.df[,3]), strand=red.df[,4], state=red.df[,'state'], score=red.df[,'max.score'])
438
-        result$segments <- red.gr
437
+        red.df <- suppressMessages(collapseBins(df, column2collapseBy='state', columns2drop=c('width',grep('posterior', names(df), value=TRUE), 'counts')))
438
+        result$segments <- methods::as(red.df, 'GRanges')
439 439
         seqlengths(result$segments) <- seqlengths(binned.data)[seqlevels(result$segments)]
440 440
         if (!keep.posteriors) {
441 441
             result$bins$posteriors <- NULL
442 442
         }
443 443
         stopTimedMessage(ptm)
444
+    ## Peaks
445
+        result$peaks <- result$segments[result$segments$state == 'modified']
446
+        result$peaks$state <- NULL
447
+        result$segments <- NULL
444 448
     ## Parameters
445 449
         # Weights
446 450
         result$weights <- hmm$weights
447 451
new file mode 100644
... ...
@@ -0,0 +1,199 @@
1
+#' Change the false discovery rate of a Hidden Markov Model
2
+#'
3
+#' Adjusts the peak calls of a \code{\link{uniHMM}} or \code{\link{multiHMM}} object with the given false discovery rate.
4
+#'
5
+#' Each peak has a peak score associated (between 0 and 1). The false discovery rate is a simple cutoff on 1-peakScore, e.g. for an \code{fdr = 0.01} only peaks with a peak score \code{>= 0.99} (1-fdr) will be selected.
6
+#' 
7
+#' @author Aaron Taudt
8
+#' @param model A \code{\link{uniHMM}} or \code{\link{multiHMM}} object with posteriors.
9
+#' @param fdr A vector of values between 0 and 1 for each column in \code{model$bins$peakScores}. If only one value is given, it will be reused for all columns. Values close to 0 will yield more stringent peak calls with lower false positive but higher false negative rate.
10
+#' @param invert Select peaks below (\code{FDR}) or above (\code{TRUE}) the given \code{fdr}. This is useful to select low confidence peaks.
11
+#' @return The input object is returned with adjusted peak calls.
12
+#' @export
13
+#' @examples
14
+#'## Get an example BAM file
15
+#'file <- system.file("extdata", "euratrans",
16
+#'                       "lv-H3K27me3-BN-male-bio2-tech1.bam",
17
+#'                        package="chromstaRData")
18
+#'## Bin the file into bin size 1000bp
19
+#'data(rn4_chrominfo)
20
+#'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000,
21
+#'                   chromosomes='chr12')
22
+#'## Fit the univariate Hidden Markov Model
23
+#'hmm <- callPeaksUnivariate(binned, max.time=60, eps=1)
24
+#'## Compare fits with different fdrs
25
+#'plotHistogram(hmm) + ylim(0,0.25) + ylim(0,0.3)
26
+#'plotHistogram(changeFDR(hmm, fdr=0.01)) + ylim(0,0.3)
27
+#'plotHistogram(changeFDR(hmm, fdr=1e-12)) + ylim(0,0.3)
28
+#'
29
+changeFDR <- function(model, fdr=0.01, invert=FALSE) {
30
+
31
+    if (!is.numeric(fdr)) {
32
+        warning("'fdr' is not numeric. Nothing done.")
33
+        return(model)
34
+    }
35
+    if (is(model, class.univariate.hmm)) {
36
+        model.new <- changeFDR.univariate(model=model, fdr=fdr, invert=invert)
37
+    } else if (is(model, class.multivariate.hmm) | is(model, class.combined.multivariate.hmm)) {
38
+        model.new <- changeFDR.multivariate(model=model, fdr=fdr, invert=invert)
39
+    }
40
+    return(model.new)
41
+    
42
+}
43
+
44
+
45
+changeFDR.multivariate <- function(model, fdr, invert=FALSE) {
46
+  
47
+    ## Make fdr vector
48
+    if (is.null(model$bins$peakScores)) stop("Cannot recalculate states because peakScores are missing.")
49
+    numcol <- ncol(model$bins$peakScores)
50
+    if (length(fdr) == 1) {
51
+        fdr <- rep(fdr, numcol)
52
+    }
53
+    if (length(fdr) != numcol) {
54
+        stop("Need ", numcol, " values in 'fdr' but only ", length(fdr), " are provided.")
55
+    }
56
+    
57
+    for (fdr.i in fdr) {
58
+        if (fdr.i < 0 | fdr.i > 1) {
59
+            stop("Values for 'fdr' need to be inside the interval [0,1].")
60
+        }
61
+    }
62
+    names(fdr) <- colnames(model$bins$peakScores)
63
+
64
+    # Check if replicates have the same fdr value
65
+    reps <- sub('-rep.*', '', model$info$ID)
66
+    if (any(sapply(split(fdr, reps), function(x) { Reduce('&', x==x[1]) }) == FALSE)) {
67
+        stop("Replicates must have the same fdr value.")
68
+    }
69
+    
70
+    ## fdr threshold
71
+    threshold <- 1-fdr
72
+
73
+    ### Multivariate HMM ###
74
+    if (is(model, class.multivariate.hmm)) {
75
+        ## Calculate states
76
+        ptm <- startTimedMessage("Calculating states from peakScores ...")
77
+        p <- model$bins$peakScores
78
+        p.thresholded <- matrix(FALSE, ncol=ncol(p), nrow=nrow(p))
79
+        for (icol in 1:ncol(p)) {
80
+            if (!invert) {
81
+                p.thresholded[,icol] <- p[,icol] >= threshold[icol]
82
+            } else {
83
+                p.thresholded[,icol] <- p[,icol] < threshold[icol] & p[,icol] > 0
84
+            }
85
+        }
86
+        states <- factor(bin2dec(p.thresholded), levels=levels(model$bins$state))
87
+        model$bins$state <- states
88
+        ## Combinations
89
+        if (!is.null(model$bins$combination)) {
90
+            mapping <- model$mapping
91
+            model$bins$combination <- factor(mapping[as.character(model$bins$state)], levels=mapping[as.character(levels(model$bins$state))])
92
+        }
93
+        stopTimedMessage(ptm)
94
+        ## Redo segmentation
95
+        model$segments <- multivariateSegmentation(model$bins, column2collapseBy='state')
96
+        ## Redo peaks
97
+        model$peaks <- list()
98
+        for (i1 in 1:ncol(model$segments$peakScores)) {
99
+            mask <- model$segments$peakScores[,i1] > 0
100
+            peaks <- model$segments[mask]
101
+            mcols(peaks) <- NULL
102
+            peaks$peakScores <- model$segments$peakScores[mask,i1]
103
+            model$peaks[[i1]] <- peaks
104
+        }
105
+        names(model$peaks) <- colnames(model$segments$peakScores)
106
+        
107
+    } else if (is(model, class.combined.multivariate.hmm)) {
108
+        mapping.df <- stateBrewer(model$info[,setdiff(names(model$info), 'ID')], mode='full')
109
+        mapping <- mapping.df$combination
110
+        names(mapping) <- mapping.df$state
111
+        p <- model$bins$peakScores
112
+        p.thresholded <- matrix(FALSE, ncol=ncol(p), nrow=nrow(p))
113
+        for (icol in 1:ncol(p)) {
114
+            if (!invert) {
115
+                p.thresholded[,icol] <- p[,icol] >= threshold[icol]
116
+            } else {
117
+                p.thresholded[,icol] <- p[,icol] < threshold[icol] & p[,icol] > 0
118
+            }
119
+        }
120
+        states <- factor(bin2dec(p.thresholded), levels=mapping.df$state)
121
+        ## Make fake multiHMM
122
+        multiHMM <- list()
123
+        class(multiHMM) <- class.multivariate.hmm
124
+        multiHMM$info <- model$info
125
+        multiHMM$bins <- model$bins
126
+        multiHMM$bins$combination <- mapping[as.character(states)]
127
+        multiHMM$bins$state <- factor(states)
128
+        multiHMM$bins$posteriors <- model$bins$posteriors
129
+        multiHMM$bins$peakScores <- p
130
+        multiHMM$mapping <- mapping
131
+        multiHMM$peaks <- model$peaks
132
+        model <- combineMultivariates(list(multiHMM), mode='full')
133
+        ## Apply FDR on peaks ##
134
+        for (i1 in 1:length(model$peaks)) {
135
+            peaks <- model$peaks[[i1]]
136
+            if (!invert) {
137
+                model$peaks[[i1]] <- peaks[peaks$peakScores >= threshold[names(model$peaks)[i1]]]
138
+            } else {
139
+                model$peaks[[i1]] <- peaks[peaks$peakScores < threshold[names(model$peaks)[i1]]]
140
+            }
141
+        }
142
+    } else {
143
+        stop("Supply either a uniHMM, multiHMM or combinedMultiHMM object.")
144
+    }
145
+    ## Return model
146
+    model$fdr <- fdr
147
+    return(model)
148
+    
149
+
150
+    
151
+}
152
+
153
+
154
+changeFDR.univariate <- function(model, fdr, invert=FALSE) {
155
+  
156
+    fdr <- suppressWarnings( as.numeric(fdr) )
157
+    if (!is.numeric(fdr) | is.na(fdr)) {
158
+        warning("Not changing false discovery rate because given 'fdr' is not numeric.")
159
+        return(model)
160
+    } else if (fdr < 0 | fdr > 1) {
161
+        warning("fdr has to be inside the interval [0,1]. Nothing done.")
162
+        return(model)
163
+    }
164
+
165
+    ## fdr threshold
166
+    threshold <- 1-fdr
167
+
168
+    if (is.null(model$bins$peakScores)) stop("Cannot recalculate states because column 'peakScores' is missing.")
169
+    ## Calculate states
170
+    ptm <- startTimedMessage("Calculating states from peakScores ...")
171
+    states <- model$bins$state
172
+    states[model$bins$state == 'modified'] <- 'unmodified'
173
+    if (!invert) {
174
+        states[ model$bins$peakScores >= threshold ] <- 'modified'
175
+    } else {
176
+        states[ model$bins$peakScores < threshold & model$bins$peakScores > 0 ] <- 'modified'
177
+    }
178
+    model$bins$state <- states
179
+    stopTimedMessage(ptm)
180
+    ## Redo segmentation
181
+    ptm <- startTimedMessage("Making segmentation ...")
182
+    gr <- model$bins
183
+    df <- as.data.frame(gr)
184
+    red.df <- suppressMessages(collapseBins(df, column2collapseBy='state', columns2average=c('score'), columns2drop=c('width',grep('posteriors', names(df), value=TRUE), 'counts')))
185
+    model$segments <- methods::as(red.df, 'GRanges')
186
+    seqlengths(model$segments) <- seqlengths(model$bins)[seqlevels(model$segments)]
187
+    ## Redo peaks
188
+    model$peaks <- model$segments[model$segments$state == 'modified']
189
+    model$peaks$state <- NULL
190
+    model$segments <- NULL
191
+    ## Redo weights
192
+    model$weights <- table(model$bins$state) / length(model$bins)
193
+    stopTimedMessage(ptm)
194
+
195
+    ## Return model
196
+    model$fdr <- fdr
197
+    return(model)
198
+  
199
+}
... ...
@@ -1,4 +1,4 @@
1
-#' Change the false discovery rate of a Hidden Markov Model
1
+#' Change the posterior cutoff of a Hidden Markov Model
2 2
 #'
3 3
 #' Adjusts the peak calls of a \code{\link{uniHMM}} or \code{\link{multiHMM}} object with the given posterior cutoff.
4 4
 #'
... ...
@@ -8,25 +8,24 @@
8 8
 #' @param model A \code{\link{uniHMM}} or \code{\link{multiHMM}} object with posteriors.
9 9
 #' @param post.cutoff A vector of posterior cutoff values between 0 and 1 the same length as \code{ncol(model$bins$posteriors)}. If only one value is given, it will be reused for all columns. Values close to 1 will yield more stringent peak calls with lower false positive but higher false negative rate.
10 10
 #' @return The input object is returned with adjusted peak calls.
11
-#' @export
12
-#' @examples
13
-#'## Get an example BAM file
14
-#'file <- system.file("extdata", "euratrans",
15
-#'                       "lv-H3K27me3-BN-male-bio2-tech1.bam",
16
-#'                        package="chromstaRData")
17
-#'## Bin the file into bin size 1000bp
18
-#'data(rn4_chrominfo)
19
-#'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000,
20
-#'                   chromosomes='chr12')
21
-#'## Fit the univariate Hidden Markov Model
22
-#'# !Keep posteriors to change the post.cutoff later!
23
-#'hmm <- callPeaksUnivariate(binned, max.time=60, eps=1,
24
-#'                           keep.posteriors=TRUE)
25
-#'## Compare fits with different post.cutoffs
26
-#'plotHistogram(changePostCutoff(hmm, post.cutoff=0.01)) + ylim(0,0.25)
27
-#'plotHistogram(hmm) + ylim(0,0.25)
28
-#'plotHistogram(changePostCutoff(hmm, post.cutoff=0.99)) + ylim(0,0.25)
29
-#'
11
+# #' @examples
12
+# #'## Get an example BAM file
13
+# #'file <- system.file("extdata", "euratrans",
14
+# #'                       "lv-H3K27me3-BN-male-bio2-tech1.bam",
15
+# #'                        package="chromstaRData")
16
+# #'## Bin the file into bin size 1000bp
17
+# #'data(rn4_chrominfo)
18
+# #'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000,
19
+# #'                   chromosomes='chr12')
20
+# #'## Fit the univariate Hidden Markov Model
21
+# #'# !Keep posteriors to change the post.cutoff later!
22
+# #'hmm <- callPeaksUnivariate(binned, max.time=60, eps=1,
23
+# #'                           keep.posteriors=TRUE)
24
+# #'## Compare fits with different post.cutoffs
25
+# #'plotHistogram(changePostCutoff(hmm, post.cutoff=0.01)) + ylim(0,0.25)
26
+# #'plotHistogram(hmm) + ylim(0,0.25)
27
+# #'plotHistogram(changePostCutoff(hmm, post.cutoff=0.99)) + ylim(0,0.25)
28
+# #'
30 29
 changePostCutoff <- function(model, post.cutoff=0.5) {
31 30
 
32 31
     if (!is.numeric(post.cutoff)) {
... ...
@@ -80,7 +79,7 @@ changePostCutoff.multivariate <- function(model, post.cutoff) {
80 79
         for (icol in 1:ncol(post)) {
81 80
             post.thresholded[,icol] <- post[,icol] >= threshold[icol]
82 81
         }
83
-        states <- factor(bin2dec(model$bins$posteriors >= threshold), levels=levels(model$bins$state))
82
+        states <- factor(bin2dec(post.thresholded), levels=levels(model$bins$state))
84 83
         model$bins$state <- states
85 84
         ## Combinations
86 85
         if (!is.null(model$bins$combination)) {
... ...
@@ -90,6 +90,10 @@ combineMultivariates <- function(hmms, mode) {
90 90
         counts[[1]] <- hmm$bins$counts
91 91
         posteriors <- list()
92 92
         posteriors[[1]] <- hmm$bins$posteriors
93
+        peakScores <- list()
94
+        peakScores[[1]] <- hmm$bins$peakScores
95
+        peaks <- list()
96
+        peaks[[1]] <- hmm$peaks
93 97
         binstates <- list()
94 98
         binstates[[1]] <- dec2bin(hmm$bins$state, colnames=hmm$info$ID)
95 99
         stopTimedMessage(ptm)
... ...
@@ -102,6 +106,8 @@ combineMultivariates <- function(hmms, mode) {
102 106
                 combs[[i1]] <- hmm$bins$combination
103 107
                 counts[[i1]] <- hmm$bins$counts
104 108
                 posteriors[[i1]] <- hmm$bins$posteriors
109
+                peakScores[[i1]] <- hmm$bins$peakScores
110
+                peaks[[i1]] <- hmm$peaks
105 111
                 binstates[[i1]] <- dec2bin(hmm$bins$state, colnames=hmm$info$ID)
106 112
                 stopTimedMessage(ptm)
107 113
             }
... ...
@@ -109,12 +115,13 @@ combineMultivariates <- function(hmms, mode) {
109 115
         ptm <- startTimedMessage("Concatenating conditions ...")
110 116
         counts <- do.call(cbind, counts)
111 117
         posteriors <- do.call(cbind, posteriors)
118
+        peakScores <- do.call(cbind, peakScores)
112 119
         infos <- do.call(rbind, infos)
113 120
         conditions <- unique(infos$condition)
114 121
         states <- factor(bin2dec(do.call(cbind, binstates)))
115 122
         names(states) <- NULL
116 123
         names(combs) <- conditions
117
-        combs.df <- as(combs,'DataFrame')
124
+        combs.df <- methods::as(combs,'DataFrame')
118 125
         stopTimedMessage(ptm)
119 126
         
120 127
     } else if (mode == 'differential') {
... ...
@@ -122,6 +129,8 @@ combineMultivariates <- function(hmms, mode) {
122 129
         infos <- list()
123 130
         counts <- list()
124 131
         posteriors <- list()
132
+        peakScores <- list()
133
+        peaks <- list()
125 134
         binstates <- list()
126 135
         for (i1 in 1:length(hmms)) {
127 136
             ptm <- startTimedMessage("Processing HMM ",i1," ...")
... ...
@@ -129,12 +138,15 @@ combineMultivariates <- function(hmms, mode) {
129 138
             infos[[i1]] <- hmm$info
130 139
             counts[[i1]] <- hmm$bins$counts
131 140
             posteriors[[i1]] <- hmm$bins$posteriors
141
+            peakScores[[i1]] <- hmm$bins$peakScores
142
+            peaks[[i1]] <- hmm$peaks
132 143
             binstates[[i1]] <- dec2bin(hmm$bins$state, colnames=hmm$info$ID)
133 144
             stopTimedMessage(ptm)
134 145
         }
135 146
         ptm <- startTimedMessage("Concatenating HMMs ...")
136 147
         counts <- do.call(cbind, counts)
137 148
         posteriors <- do.call(cbind, posteriors)
149
+        peakScores <- do.call(cbind, peakScores)
138 150
         infos <- do.call(rbind, infos)
139 151
         conditions <- unique(infos$condition)
140 152
         binstates <- do.call(cbind, binstates)
... ...
@@ -160,7 +172,7 @@ combineMultivariates <- function(hmms, mode) {
160 172
             combs[[condition]] <- mapping[as.character(states.cond)]
161 173
         }
162 174
         combs.df <- as.data.frame(combs) # get factors instead of characters
163
-        combs.df <- as(combs.df, 'DataFrame')
175
+        combs.df <- methods::as(combs.df, 'DataFrame')
164 176
         stopTimedMessage(ptm)
165 177
         
166 178
     } else if (mode == 'full') {
... ...
@@ -175,6 +187,8 @@ combineMultivariates <- function(hmms, mode) {
175 187
         conditions <- unique(infos$condition)
176 188
         counts <- hmm$bins$counts
177 189
         posteriors <- hmm$bins$posteriors
190
+        peakScores <- hmm$bins$peakScores
191
+        peaks <- hmm$peaks
178 192
         states <- hmm$bins$state
179 193
         combs <- list()
180 194
         for (condition in conditions) {
... ...
@@ -186,7 +200,7 @@ combineMultivariates <- function(hmms, mode) {
186 200
             stopTimedMessage(ptm)
187 201
         }
188 202
         combs.df <- as.data.frame(combs) # get factors instead of characters
189
-        combs.df <- as(combs.df, 'DataFrame')
203
+        combs.df <- methods::as(combs.df, 'DataFrame')
190 204
         
191 205
     } else if (mode == 'replicate') {
192 206
         ## Load first HMM for coordinates
... ...
@@ -201,6 +215,10 @@ combineMultivariates <- function(hmms, mode) {
201 215
         counts[[1]] <- hmm$bins$counts
202 216
         posteriors <- list()
203 217
         posteriors[[1]] <- hmm$bins$posteriors
218
+        peakScores <- list()
219
+        peakScores[[1]] <- hmm$bins$peakScores
220
+        peaks <- list()
221
+        peaks[[1]] <- hmm$peaks
204 222
         binstates <- list()
205 223
         binstates[[1]] <- dec2bin(hmm$bins$state, colnames=hmm$info$ID)
206 224
         stopTimedMessage(ptm)
... ...
@@ -212,6 +230,8 @@ combineMultivariates <- function(hmms, mode) {
212 230
                 infos[[i1]] <- hmm$info
213 231
                 counts[[i1]] <- hmm$bins$counts
214 232
                 posteriors[[i1]] <- hmm$bins$posteriors
233
+                peakScores[[i1]] <- hmm$bins$peakScores
234
+                peaks[[i1]] <- hmm$peaks
215 235
                 binstates[[i1]] <- dec2bin(hmm$bins$state, colnames=hmm$info$ID)
216 236
                 stopTimedMessage(ptm)
217 237
             }
... ...
@@ -219,6 +239,7 @@ combineMultivariates <- function(hmms, mode) {
219 239
         ptm <- startTimedMessage("Concatenating mark-conditions ...")
220 240
         counts <- do.call(cbind, counts)
221 241
         posteriors <- do.call(cbind, posteriors)
242
+        peakScores <- do.call(cbind, peakScores)
222 243
         binstates <- do.call(cbind, binstates)
223 244
         infos <- do.call(rbind, infos)
224 245
         conditions <- unique(infos$condition)
... ...
@@ -236,7 +257,7 @@ combineMultivariates <- function(hmms, mode) {
236 257
             combs[[condition]] <- mapping[as.character(states)]
237 258
         }
238 259
         names(combs) <- conditions
239
-        combs.df <- as(combs,'DataFrame')
260
+        combs.df <- methods::as(combs,'DataFrame')
240 261
         stopTimedMessage(ptm)
241 262
     } else {
242 263
         stop("Unknown mode '", mode, "'.")
... ...
@@ -261,9 +282,10 @@ combineMultivariates <- function(hmms, mode) {
261 282
     ## Transferring counts and posteriors
262 283
     bins$counts <- counts
263 284
     bins$posteriors <- posteriors
285
+    bins$peakScores <- peakScores
264 286
 
265 287
     ## Add differential score ##
266
-    bins$differential.score <- differentialScoreSum(bins$posteriors, infos)
288
+    bins$differential.score <- differentialScoreSum(bins$peakScores, infos)
267 289
 
268 290
     ### Redo the segmentation for all conditions combined
269 291
     ptm <- startTimedMessage("Redoing segmentation for all conditions combined ...")
... ...
@@ -273,20 +295,23 @@ combineMultivariates <- function(hmms, mode) {
273 295
     
274 296
     ### Redo the segmentation for each condition separately
275 297
     ptm <- startTimedMessage("Redoing segmentation for each condition separately ...")
276
-    segments.separate <- list()
298
+    segments.per.condition <- list()
277 299
     for (cond in names(combs)) {
278 300
         bins.cond <- bins
279 301
         mcols(bins.cond) <- mcols(bins)[paste0('combination.',cond)]
280 302
         df <- as.data.frame(bins.cond)
281 303
         names(df)[6] <- cond
282 304
         segments.cond <- suppressMessages( collapseBins(df, column2collapseBy=cond, columns2drop=c('width', grep('posteriors', names(df), value=TRUE))) )
283
-        segments.cond <- as(segments.cond, 'GRanges')
305
+        segments.cond <- methods::as(segments.cond, 'GRanges')
284 306
         names(mcols(segments.cond)) <- 'combination'
285 307
         seqlengths(segments.cond) <- seqlengths(bins)[seqlevels(segments.cond)]
286
-        segments.separate[[cond]] <- segments.cond
308
+        segments.per.condition[[cond]] <- segments.cond
287 309
     }
288 310
     stopTimedMessage(ptm)
289 311
     
312
+    ### Flatten peak list ###
313
+    peaks <- unlist(peaks)
314
+    
290 315
     ### Make return object
291 316
     hmm <- list()
292 317
     class(hmm) <- class.combined.multivariate.hmm
... ...
@@ -294,7 +319,8 @@ combineMultivariates <- function(hmms, mode) {
294 319
     rownames(hmm$info) <- NULL
295 320
     hmm$bins <- bins
296 321
     hmm$segments <- segments
297
-    hmm$segments.separate <- segments.separate
322
+    hmm$segments.per.condition <- segments.per.condition
323
+    hmm$peaks <- peaks
298 324
     hmm$frequencies <- freqs$table
299 325
     return(hmm)
300 326
     
... ...
@@ -41,6 +41,19 @@ NULL
41 41
 NULL
42 42
 
43 43
 
44
+#' Gene coordinates for rn4
45
+#'
46
+#' A data.frame containing gene coordinates and biotypes of the rn4 assembly.
47
+#'
48
+#' @docType data
49
+#' @name genes_rn4
50
+#' @format A data.frame.
51
+#' @examples
52
+#'data(genes_rn4)
53
+#'head(genes_rn4)
54
+NULL
55
+
56
+
44 57
 #' chromstaR objects
45 58
 #'
46 59
 #' @description
... ...
@@ -136,7 +149,7 @@ NULL
136 149
 #' A \code{list()} with the following entries:
137 150
 #' \item{bins}{A \code{\link[GenomicRanges]{GRanges}} object containing genomic bin coordinates and human readable combinations for the combined \code{\link{multiHMM}} objects.}
138 151
 #' \item{segments}{Same as \code{bins}, but consecutive bins with the same state are collapsed into segments.}
139
-#' \item{segments.separate}{A \code{list} with segments for each condition separately.}
152
+#' \item{segments.per.condition}{A \code{list} with segments for each condition separately.}
140 153
 #' @seealso \code{\link{combineMultivariates}}, \code{\link{uniHMM}}, \code{\link{multiHMM}}
141 154
 #' @name combinedMultiHMM
142 155
 #' @aliases combinedHMM
... ...
@@ -18,7 +18,7 @@
18 18
 #' \describe{
19 19
 #'   \item{\code{\link{heatmapCountCorrelation}}}{Heatmap of read count correlations.}
20 20
 #'   \item{\code{\link{plotEnrichCountHeatmap}}}{Heatmap of read counts around annotation.}
21
-#'   \item{\code{\link{plotFoldEnrichment}}}{Enrichment of combinatorial states around annotation.}
21
+#'   \item{\code{\link{plotEnrichment}}}{Enrichment of combinatorial states around annotation.}
22 22
 #'   \item{\code{\link{plotFoldEnrichHeatmap}}}{Enrichment of combinatorial states at multiple annotations.}
23 23
 #'   \item{\code{\link{plotExpression}}}{Boxplot of expression values that overlap combinatorial states.}
24 24
 #' }
... ...
@@ -32,17 +32,17 @@
32 32
 #'### Make the enrichment plots ###
33 33
 #'# We expect promoter [H3K4me3] and bivalent-promoter signatures [H3K4me3+H3K27me3]
34 34
 #'# to be enriched at transcription start sites.
35
-#'    plotFoldEnrichment(hmm = model, annotation = genes, bp.around.annotation = 15000) +
35
+#'    plotEnrichment(hmm = model, annotation = genes, bp.around.annotation = 15000) +
36 36
 #'    ggtitle('Fold enrichment around genes') +
37 37
 #'    xlab('distance from gene body')
38 38
 #'  
39 39
 #'# Plot enrichment only at TSS. We make use of the fact that TSS is the start of a gene.
40
-#'    plotFoldEnrichment(model, genes, region = 'start') +
40
+#'    plotEnrichment(model, genes, region = 'start') +
41 41
 #'    ggtitle('Fold enrichment around TSS') +
42 42
 #'    xlab('distance from TSS in [bp]')
43 43
 #'# Note: If you want to facet the plot because you have many combinatorial states you
44 44
 #'# can do that with
45
-#'    plotFoldEnrichment(model, genes, region = 'start') +
45
+#'    plotEnrichment(model, genes, region = 'start') +
46 46
 #'    facet_wrap(~ combination)
47 47
 #'  
48 48
 #'# Another form of visualization that shows every TSS in a heatmap
... ...
@@ -65,6 +65,7 @@ NULL
65 65
 #' @param annotations A \code{list()} with \code{\link{GRanges}} objects containing coordinates of multiple annotations The names of the list entries will be used to name the return values.
66 66
 #' @param plot A logical indicating whether the plot or an array with the fold enrichment values is returned.
67 67
 #' @importFrom S4Vectors subjectHits queryHits
68
+#' @importFrom IRanges subsetByOverlaps
68 69
 #' @importFrom reshape2 melt
69 70
 #' @export
70 71
 plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combinations=NULL, marks=NULL, plot=TRUE) {
... ...
@@ -89,7 +90,8 @@ plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combina
89 90
         mark.levels <- marks
90 91
     }
91 92
     genome <- sum(as.numeric(width(bins)))
92
-    feature.lengths <- lapply(annotations, function(x) { sum(as.numeric(width(x))) })
93
+    annotationsAtBins <- lapply(annotations, function(x) { IRanges::subsetByOverlaps(x, bins) })
94
+    feature.lengths <- lapply(annotationsAtBins, function(x) { sum(as.numeric(width(x))) })
93 95
     
94 96
     if (what == 'peaks') {
95 97
         binstates <- dec2bin(bins$state, colnames=hmm$info$ID)
... ...
@@ -98,16 +100,17 @@ plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combina
98 100
     ## Fold enrichment
99 101
     ggplts <- list()
100 102
     folds <- list()
103
+    maxfolds <- list()
101 104
     for (condition in conditions) {
102 105
         if (what == 'combinations') {
103 106
             bins$combination <- mcols(bins)[,paste0('combination.', condition)]
104
-            fold <- array(NA, dim=c(length(annotations), length(comb.levels)), dimnames=list(annotation=names(annotations), combination=comb.levels))
107
+            fold <- array(NA, dim=c(length(annotationsAtBins), length(comb.levels)), dimnames=list(annotation=names(annotationsAtBins), combination=comb.levels))
105 108
             for (icomb in 1:length(comb.levels)) {
106 109
                 mask <- bins$combination == comb.levels[icomb]
107 110
                 bins.mask <- bins[mask]
108 111
                 combstate.length <- sum(as.numeric(width(bins.mask)))
109
-                for (ifeat in 1:length(annotations)) {
110
-                    feature <- annotations[[ifeat]]
112
+                for (ifeat in 1:length(annotationsAtBins)) {
113
+                    feature <- annotationsAtBins[[ifeat]]
111 114
                     ind <- findOverlaps(bins.mask, feature)
112 115
     
113 116
                     binsinfeature <- bins.mask[unique(S4Vectors::queryHits(ind))]
... ...
@@ -121,7 +124,7 @@ plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combina
121 124
             }
122 125
     
123 126
         } else if (what == 'peaks') {
124
-            fold <- array(NA, dim=c(length(annotations), length(mark.levels)), dimnames=list(annotation=names(annotations), mark=mark.levels))
127
+            fold <- array(NA, dim=c(length(annotationsAtBins), length(mark.levels)), dimnames=list(annotation=names(annotationsAtBins), mark=mark.levels))
125 128
             for (imark in 1:length(mark.levels)) {
126 129
                 mark <- mark.levels[imark]
127 130
                 colmask <- hmm$info$mark == mark
... ...
@@ -133,8 +136,8 @@ plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combina
133 136
                 mask <- binstates.cond
134 137
                 bins.mask <- bins[mask]
135 138
                 combstate.length <- sum(as.numeric(width(bins.mask)))
136
-                for (ifeat in 1:length(annotations)) {
137
-                    feature <- annotations[[ifeat]]
139
+                for (ifeat in 1:length(annotationsAtBins)) {
140
+                    feature <- annotationsAtBins[[ifeat]]
138 141
                     ind <- findOverlaps(bins.mask, feature)
139 142
     
140 143
                     binsinfeature <- bins.mask[unique(S4Vectors::queryHits(ind))]
... ...
@@ -151,16 +154,22 @@ plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combina
151 154
       
152 155
         if (plot) {
153 156
             df <- reshape2::melt(fold, value.name='foldEnrichment')
157
+            maxfolds[[condition]] <- max(df$foldEnrichment, na.rm=TRUE)
154 158
             if (what == 'combinations') {
155
-                ggplt <- ggplot(df) + geom_tile(aes_string(x='combination', y='annotation', fill='foldEnrichment')) + theme(axis.text.x = element_text(angle=90, hjust=1)) + scale_fill_gradient(low='white', high='blue')
159
+                ggplt <- ggplot(df) + geom_tile(aes_string(x='combination', y='annotation', fill='foldEnrichment'))
156 160
             } else if (what == 'peaks') {
157
-                ggplt <- ggplot(df) + geom_tile(aes_string(x='mark', y='annotation', fill='foldEnrichment')) + theme(axis.text.x = element_text(angle=90, hjust=1)) + scale_fill_gradient(low='white', high='blue')
161
+                ggplt <- ggplot(df) + geom_tile(aes_string(x='mark', y='annotation', fill='foldEnrichment'))
158 162
             }
163
+            ggplt <- ggplt + theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5))
159 164
             ggplts[[condition]] <- ggplt
160 165
         } else {
161 166
             folds[[condition]] <- fold
162 167
         }
163 168
     }
169
+    if (plot) {
170
+        maxfolds <- unlist(maxfolds)
171
+        ggplts <- lapply(ggplts, function(ggplt) { ggplt + scale_fill_gradientn(colors=grDevices::colorRampPalette(c("blue","white","red"))(20), values=c(seq(0,1,length.out=10), seq(1,max(maxfolds, na.rm=TRUE),length.out=10)), rescaler=function(x,...) {x}, oob=identity, limits=c(0,max(maxfolds, na.rm=TRUE))) })
172
+    }
164 173
 
165 174
     if (class(hmm) == class.multivariate.hmm) {
166 175
         if (plot) {
... ...
@@ -183,6 +192,7 @@ plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combina
183 192
 #' @inheritParams enrichmentAtAnnotation
184 193
 #' @param max.rows An integer specifying the number of randomly subsampled rows that are plotted from the \code{annotation} object. This is necessary to avoid crashing for heatmaps with too many rows.
185 194
 #' @importFrom reshape2 melt
195
+#' @importFrom IRanges subsetByOverlaps
186 196
 #' @export
187 197
 plotEnrichCountHeatmap <- function(hmm, annotation, bp.around.annotation=10000, max.rows=1000) {
188 198
 
... ...
@@ -190,24 +200,27 @@ plotEnrichCountHeatmap <- function(hmm, annotation, bp.around.annotation=10000,
190 200
     ## Variables
191 201
     bins <- hmm$bins
192 202
     if (class(hmm) == class.combined.multivariate.hmm) {
203
+        conditions <- sub('combination.', '', grep('combination', names(mcols(bins)), value=TRUE))
204
+        comb.levels <- levels(mcols(bins)[,paste0('combination.', conditions[1])])
205
+        ## Create new column combination with all conditions combined
206
+        combinations <- list()
207
+        for (condition in conditions) {
208
+            combinations[[condition]] <- paste0(condition, ":", mcols(bins)[,paste0('combination.', condition)])
209
+        }
210
+        combinations$sep <- ', '
211
+        bins$combination <- factor(do.call(paste, combinations))
193 212
     } else if (class(hmm) == class.multivariate.hmm) {
194
-        # Rename 'combination' to 'combination.' for coherence with combinedMultiHMM
195
-        names(mcols(bins))[grep('combination', names(mcols(bins)))] <- 'combination.'
213
+        comb.levels <- levels(bins$combination)
196 214
     }
197
-    conditions <- sub('combination.', '', grep('combination', names(mcols(bins)), value=TRUE))
198
-    comb.levels <- levels(mcols(bins)[,paste0('combination.', conditions[1])])
199 215
     binsize <- width(bins)[1]
200 216
     around <- round(bp.around.annotation/binsize)
201
-    ## Create new column combination with all conditions combined
202
-    combinations <- list()
203
-    for (condition in conditions) {
204
-        combinations[[condition]] <- paste0(condition, ":", mcols(bins)[,paste0('combination.', condition)])
205
-    }
206
-    combinations$sep <- ', '
207
-    bins$combination <- factor(do.call(paste, combinations))
217
+    
218
+    ## Get RPKM values
219
+    bins$counts <- sweep(bins$counts, MARGIN = 2, STATS = colSums(bins$counts), FUN = '/')
220
+    bins$counts <- bins$counts * 1e6 * 1000/mean(width(bins))
208 221
 
209 222
     # Subsampling for plotting of huge data.frames
210
-    annotation <- subsetByOverlaps(annotation, bins)
223
+    annotation <- IRanges::subsetByOverlaps(annotation, bins)
211 224
     if (length(annotation)>max.rows) {
212 225
         annotation <- sample(annotation, size=max.rows, replace=FALSE)
213 226
     }
... ...
@@ -262,7 +275,9 @@ plotEnrichCountHeatmap <- function(hmm, annotation, bp.around.annotation=10000,
262 275
         panel.grid = element_blank(),
263 276
         panel.border = element_rect(fill='NA'),
264 277
         panel.background = element_rect(fill='white'),
265
-        axis.text.y = element_blank()
278
+        axis.text.y = element_blank(),
279
+        axis.ticks.y = element_blank(),
280
+        axis.line.y = element_blank()
266 281
     )
267 282
     
268 283
     ## Prepare data.frame
... ...
@@ -272,18 +287,18 @@ plotEnrichCountHeatmap <- function(hmm, annotation, bp.around.annotation=10000,
272 287
     comb2keep <- names(num.comb)[num.comb/sum(num.comb) > 0.005]
273 288
     counts <- counts[comb2keep]
274 289
     df <- reshape2::melt(counts)
275
-    names(df) <- c('id','position','counts','track','combination')
290
+    names(df) <- c('id','position','RPKM','track','combination')
276 291
     df$id <- factor(df$id, levels=rev(unique(df$id)))
277 292
     df$combination <- factor(df$combination, levels=unique(df$combination))
278 293
     df$track <- factor(df$track, levels=colnames(bins$counts))
279 294
     
280 295
     ## Plot as heatmap
281 296
     ggplt <- ggplot(df) + geom_tile(aes_string(x='position', y='id', color='combination'))
282
-    ggplt <- ggplt + geom_tile(aes_string(x='position', y='id', fill='counts'), alpha=0.6)
297
+    ggplt <- ggplt + scale_color_manual(values = getDistinctColors(length(unique(df$combination))))
298
+    ggplt <- ggplt + geom_tile(aes_string(x='position', y='id', fill='RPKM'), alpha=0.6)
283 299
     ggplt <- ggplt + facet_wrap( ~ track, nrow=1) + custom_theme
284 300
     ggplt <- ggplt + xlab('distance from annotation in [bp]') + ylab('')
285
-    breaks <- sort(c(0, 10^(0:5), max(df$counts, na.rm = TRUE)))
286
-    ggplt <- ggplt + scale_fill_continuous(trans='log1p', breaks=breaks, labels=breaks, low='white', high='black')
301
+    ggplt <- ggplt + scale_fill_continuous(trans='log1p', low='white', high='black')
287 302
     # Insert horizontal lines
288 303
     y.lines <- sapply(split(df$id, df$combination), function(x) { max(as.integer(x)) })
289 304
     df.lines <- data.frame(y=sort(y.lines[-1]) + 0.5)
... ...
@@ -300,15 +315,19 @@ plotEnrichCountHeatmap <- function(hmm, annotation, bp.around.annotation=10000,
300 315
 #' @inheritParams enrichmentAtAnnotation
301 316
 #' @importFrom reshape2 melt
302 317
 #' @export
303
-plotFoldEnrichment <- function(hmm, annotation, bp.around.annotation=10000, region=c("start","inside","end"), num.intervals=20, what='combinations', combinations=NULL, marks=NULL) {
318
+plotEnrichment <- function(hmm, annotation, bp.around.annotation=10000, region=c("start","inside","end"), num.intervals=20, what='combinations', combinations=NULL, marks=NULL, statistic='fold') {
304 319
 
305 320
     ## Check user input
306 321
     if ((!what %in% c('combinations','peaks','counts')) | length(what) > 1) {
307 322
         stop("argument 'what' must be one of c('combinations','peaks','counts')")
308 323
     }
324
+    if (!is.null(marks) & what!='peaks') {
325
+        stop("Please set argument 'what=\"peaks\"' if you want to plot marks instead of combinations.")
326
+    }
309 327
   
310 328
     ## Variables
311 329
     hmm <- loadHmmsFromFiles(hmm, check.class=c(class.multivariate.hmm, class.combined.multivariate.hmm))[[1]]
330
+    hmm$bins$counts <- rpkm.matrix(hmm$bins$counts, binsize=mean(width(hmm$bins)))
312 331
     bins <- hmm$bins
313 332
     if (class(hmm) == class.combined.multivariate.hmm) {
314 333
     } else if (class(hmm) == class.multivariate.hmm) {
... ...
@@ -329,14 +348,15 @@ plotFoldEnrichment <- function(hmm, annotation, bp.around.annotation=10000, regi
329 348
 
330 349
     if (what %in% c('peaks','counts')) {
331 350
         ### Get fold enrichment
332
-        enrich <- enrichmentAtAnnotation(hmm$bins, hmm$info, annotation, bp.around.annotation=bp.around.annotation, region=region, what=what, num.intervals=num.intervals)
351
+        enrich <- enrichmentAtAnnotation(hmm$bins, hmm$info, annotation, bp.around.annotation=bp.around.annotation, region=region, what=what, num.intervals=num.intervals, statistic=statistic)
333 352
     }
334 353
     ggplts <- list()
354
+    maxfolds <- list()
335 355
     for (condition in conditions) {
336 356
         if (what == 'combinations') {
337 357
             ### Get fold enrichment
338 358
             bins$combination <- mcols(bins)[,paste0('combination.', condition)]
339
-            enrich.cond <- enrichmentAtAnnotation(bins, hmm$info, annotation, bp.around.annotation=bp.around.annotation, region=region, what=what, num.intervals=num.intervals)
359
+            enrich.cond <- enrichmentAtAnnotation(bins, hmm$info, annotation, bp.around.annotation=bp.around.annotation, region=region, what=what, num.intervals=num.intervals, statistic=statistic)
340 360
         } else {
341 361
             enrich.cond <- enrich
342 362
         }
... ...
@@ -344,15 +364,21 @@ plotFoldEnrichment <- function(hmm, annotation, bp.around.annotation=10000, regi
344 364
         df <- reshape2::melt(enrich.cond)
345 365
         df$L1 <- factor(df$L1, levels=c('start','inside','end'))
346 366
         df <- rbind(df[df$L1 == 'start',], df[df$L1 == 'inside',], df[df$L1 == 'end',])
347
-        if (length(region)>=2) {
367
+        if (length(region)>=2 & 'inside' %in% region) {
348 368
             df <- df[!(df$L1 == 'start' & df$lag > 0),]
349 369
             df <- df[!(df$L1 == 'end' & df$lag < 0),]
350 370
             df$position <- apply(data.frame(df$interval, df$lag), 1, max, na.rm = TRUE)
371
+        } else if (length(region)==2 & ! 'inside' %in% region) {
372
+            df <- df[!(df$L1 == 'start' & df$lag > 0),]
373
+            df <- df[!(df$L1 == 'end' & df$lag < 0),]
374
+            df$position <- df$lag
351 375
         } else if (length(region)==1) {
352 376
             df <- df[df$L1 == region,]
353 377
             df$position <- df$lag
354 378
         }
355
-        df$position[df$L1 == 'end'] <- df$position[df$L1 == 'end'] + bp.around.annotation
379
+        if ('inside' %in% region) {
380
+            df$position[df$L1 == 'end'] <- df$position[df$L1 == 'end'] + bp.around.annotation
381
+        }
356 382
         df$position[df$L1 == 'inside'] <- df$position[df$L1 == 'inside'] * bp.around.annotation
357 383
         if (what == 'combinations') {
358 384
             df <- df[df$combination %in% comb.levels,]
... ...
@@ -367,20 +393,42 @@ plotFoldEnrichment <- function(hmm, annotation, bp.around.annotation=10000, regi
367 393
 
368 394
         ### Plot
369 395
         if (what == 'combinations') {
370
-            ggplt <- ggplot(df) + geom_line(aes_string(x='position', y='value', col='combination'), size=2) + ylab('fold enrichment')
396
+            ggplt <- ggplot(df) + geom_line(aes_string(x='position', y='value', col='combination'), size=2)
397
+            if (statistic == 'fold') {
398
+                ggplt <- ggplt + ylab('fold enrichment')
399
+                ggplt <- ggplt + geom_hline(yintercept=1, lty=2)
400
+            } else if (statistic == 'fraction') {
401
+                ggplt <- ggplt + ylab('fraction')
402
+            }
403
+            ggplt <- ggplt + scale_color_manual(values = getDistinctColors(length(unique(df$combination))))
371 404
         } else if (what == 'peaks') {
372
-            ggplt <- ggplot(df) + geom_line(aes_string(x='position', y='value', col='mark'), size=2) + ylab('fraction of positions in peak')
405
+            ggplt <- ggplot(df) + geom_line(aes_string(x='position', y='value', col='mark'), size=2)
406
+            if (statistic == 'fold') {
407
+                ggplt <- ggplt + ylab('fold enrichment')
408
+                ggplt <- ggplt + geom_hline(yintercept=1, lty=2)
409
+            } else if (statistic == 'fraction') {
410
+                ggplt <- ggplt + ylab('fraction')
411
+            }
412
+            ggplt <- ggplt + scale_color_manual(values = getDistinctColors(length(unique(df$mark))))
373 413
         } else if (what == 'counts') {
374
-            ggplt <- ggplot(df) + geom_line(aes_string(x='position', y='value', col='track'), size=2) + ylab('read count')
414
+            ggplt <- ggplot(df) + geom_line(aes_string(x='position', y='value', col='track'), size=2) + ylab('RPKM')
415
+            ggplt <- ggplt + scale_color_manual(values = getDistinctColors(length(unique(df$track))))
375 416
         }
376 417
         ggplt <- ggplt + theme_bw() + xlab('distance from annotation in [bp]')
377
-        if (length(region)>=2) {
418
+        if (length(region)>=2 & 'inside' %in% region) {
378 419
             breaks <- c(c(-1, -0.5, 0, 0.5, 1, 1.5, 2) * bp.around.annotation)
379 420
             labels <- c(-bp.around.annotation, -bp.around.annotation/2, '0%', '50%', '100%', bp.around.annotation/2, bp.around.annotation)
380 421
             ggplt <- ggplt + scale_x_continuous(breaks=breaks, labels=labels)
381 422
         }
423
+        maxfolds[[condition]] <- max(df$value, na.rm=TRUE)
382 424
         ggplts[[condition]] <- ggplt
383 425
     }
426
+    maxfolds <- unlist(maxfolds)
427
+    if (statistic == 'fraction' & what %in% c('combinations','peaks')) {
428
+        ggplts <- lapply(ggplts, function(ggplt) { ggplt + scale_y_continuous(limits=c(0,1)) })
429
+    } else {
430
+        ggplts <- lapply(ggplts, function(ggplt) { ggplt + scale_y_continuous(limits=c(0,max(maxfolds, na.rm=TRUE)*1.1)) })
431
+    }
384 432
     if (class(hmm) == class.multivariate.hmm) {
385 433
         return(ggplts[[1]])
386 434
     } else if (class(hmm) == class.combined.multivariate.hmm) {
... ...
@@ -402,9 +450,10 @@ plotFoldEnrichment <- function(hmm, annotation, bp.around.annotation=10000, regi
402 450
 #' @param region A combination of \code{c('start','inside','end')} specifying the region of the annotation for which the enrichment will be calculated. Select \code{'start'} if you have a point-sized annotation like transcription start sites. Select \code{c('start','inside','end')} if you have long annotations like genes.
403 451
 #' @param what One of \code{c('combinations','peaks','counts')} specifying which statistic to calculate.
404 452
 #' @param num.intervals Number of intervals for enrichment 'inside' of annotation.
453
+#' @param statistic The statistic to calculate. Either 'fold' for fold enrichments or 'fraction' for fraction of bins falling into the annotation.
405 454
 #' @return A \code{list()} containing \code{data.frame()}s for enrichment of combinatorial states and binary states at the start, end and inside of the annotation.
406 455
 #' @importFrom S4Vectors as.factor subjectHits queryHits
407
-enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation=10000, region=c('start','inside','end'), what=c('combinations','peaks','counts'), num.intervals=21) {
456
+enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation=10000, region=c('start','inside','end'), what=c('combinations','peaks','counts'), num.intervals=21, statistic='fold') {
408 457
 
409 458
     ## Check user input
410 459
     if ((!what %in% c('combinations','peaks','counts')) | length(what) > 1) {
... ...
@@ -427,6 +476,7 @@ enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation=
427 476
         binstates <- dec2bin(bins$state, colnames=info$ID)
428 477
         # Remove replicates
429 478
         binstates <- binstates[ ,info.dedup$ID]
479
+        colsums.binstates <- colSums(binstates)
430 480
     }
431 481
     if ('counts' %in% what) {
432 482
         counts <- bins$counts
... ...
@@ -455,14 +505,25 @@ enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation=
455 505
             index.inside.minus <- index.inside.minus[!is.na(index.inside.minus)]
456 506
             index <- c(index.inside.plus, index.inside.minus)
457 507
             index <- index[index>0 & index<=length(bins)] # index could cross chromosome boundaries, but we risk it
458
-            if ('peaks' %in% what) binstates.inside[as.character(interval),] <- colMeans(binstates[index,])
508
+            if ('peaks' %in% what) {
509
+                if (statistic == 'fraction') {
510
+                    binstates.inside[as.character(interval),] <- colSums(binstates[index,]) / length(index) # or colMeans
511
+                } else if (statistic == 'fold') {
512
+                    binstates.inside[as.character(interval),] <- colSums(binstates[index,]) / length(index) / colsums.binstates * length(bins)
513
+                }
514
+            }
459 515
             if ('combinations' %in% what) {
460
-                fold <- table(combinations[index]) / tcombinations / length(annotation) * length(bins) # fold enrichment
461
-#                 fold <- table(combinations[index]) / length(annotation) # percentage enrichment
516
+                if (statistic == 'fraction') {
517
+                    fold <- table(combinations[index]) / length(index)
518
+                } else if (statistic == 'fold') {
519
+                    fold <- table(combinations[index]) / length(index) / tcombinations * length(bins) # fold enrichment
520
+                }
462 521
                 fold[is.na(fold)] <- 0
463 522
                 combinations.inside[as.character(interval),] <- fold
464 523
             }
465
-            if ('counts' %in% what) counts.inside[as.character(interval),] <- colMeans(counts[index,])
524
+            if ('counts' %in% what) {
525
+                counts.inside[as.character(interval),] <- colMeans(counts[index,])
526
+            }
466 527
         }
467 528
         if ('peaks' %in% what) {
468 529
             enrich$peaks$inside <- binstates.inside
... ...
@@ -491,14 +552,25 @@ enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation=
491 552
         for (ilag in -lag:lag) {
492 553
             index <- c(index.start.plus+ilag, index.start.minus-ilag)
493 554
             index <- index[index>0 & index<=length(bins)]
494
-            if ('peaks' %in% what) binstates.start[as.character(ilag),] <- colMeans(binstates[index,])
555
+            if ('peaks' %in% what) {
556
+                if (statistic == 'fraction') {
557
+                    binstates.start[as.character(ilag),] <- colSums(binstates[index,]) / length(index)
558
+                } else if (statistic == 'fold') {
559
+                    binstates.start[as.character(ilag),] <- colSums(binstates[index,]) / length(index) / colsums.binstates * length(bins)
560
+                }
561
+            }
495 562
             if ('combinations' %in% what) {
496
-                fold <- table(combinations[index]) / tcombinations / length(annotation) * length(bins) # fold enrichment
497
-#                 fold <- table(combinations[index]) / length(annotation) # percentage enrichment
563
+                if (statistic == 'fraction') {
564
+                    fold <- table(combinations[index]) / length(index)
565
+                } else if (statistic == 'fold') {
566
+                    fold <- table(combinations[index]) / length(index) / tcombinations * length(bins) # fold enrichment
567
+                }
498 568
                 fold[is.na(fold)] <- 0
499 569
                 combinations.start[as.character(ilag),] <- fold
500 570
             }
501
-            if ('counts' %in% what) counts.start[as.character(ilag),] <- colMeans(counts[index,])
571
+            if ('counts' %in% what) {
572
+                counts.start[as.character(ilag),] <- colMeans(counts[index,])
573
+            }
502 574
         }
503 575
         if ('peaks' %in% what) {
504 576
             rownames(binstates.start) <- as.numeric(rownames(binstates.start)) * binsize
... ...
@@ -530,14 +602,25 @@ enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation=
530 602
         for (ilag in -lag:lag) {
531 603
             index <- c(index.end.plus+ilag, index.end.minus-ilag)
532 604
             index <- index[index>0 & index<=length(bins)]
533
-            if ('peaks' %in% what) binstates.end[as.character(ilag),] <- colMeans(binstates[index,])
605
+            if ('peaks' %in% what) {
606
+                if (statistic == 'fraction') {
607
+                    binstates.end[as.character(ilag),] <- colSums(binstates[index,]) / length(index)
608
+                } else if (statistic == 'fold') {
609
+                    binstates.end[as.character(ilag),] <- colSums(binstates[index,]) / length(index) / colsums.binstates * length(bins)
610
+                }
611
+            }
534 612
             if ('combinations' %in% what) {
535
-                fold <- table(combinations[index]) / tcombinations / length(annotation) * length(bins) # fold enrichment
536
-#                 fold <- table(combinations[index]) / length(annotation) # percentage enrichment
613
+                if (statistic == 'fraction') {
614
+                    fold <- table(combinations[index]) / length(index)
615
+                } else if (statistic == 'fold') {
616
+                    fold <- table(combinations[index]) / length(index) / tcombinations * length(bins) # fold enrichment
617
+                }
537 618
                 fold[is.na(fold)] <- 0
538 619
                 combinations.end[as.character(ilag),] <- fold
539 620
             }
540
-            if ('counts' %in% what) counts.end[as.character(ilag),] <- colMeans(counts[index,])
621
+            if ('counts' %in% what) {
622
+                counts.end[as.character(ilag),] <- colMeans(counts[index,])
623
+            }
541 624
         }
542 625
         if ('peaks' %in% what) {
543 626
             rownames(binstates.end) <- as.numeric(rownames(binstates.end)) * binsize
... ...
@@ -14,35 +14,35 @@ insertchr <- function(gr) {
14 14
 #=====================================================
15 15
 # Export binned data
16 16
 #=====================================================
17
-#' Export genome browser viewable files
18
-#'
19
-#' Export read counts as genome browser viewable file
20
-#'
21
-#' Export read counts from \code{\link{binned.data}} as a file which can be uploaded into a genome browser. Read counts are exported in WIGGLE format (.wig.gz).
22
-#'
23
-#' @author Aaron Taudt
24
-#' @param binned.data.list A \code{list()} of \code{\link{binned.data}} objects or vector of files that contain such objects.
25
-#' @param filename The name of the file that will be written. The ending ".wig.gz" for read counts will be appended. Any existing file will be overwritten.
26
-#' @param header A logical indicating whether the output file will have a heading track line (\code{TRUE}) or not (\code{FALSE}).
27
-#' @param separate.files A logical indicating whether or not to produce separate files for each object in \code{binned.data.list}.
28
-#' @return \code{NULL}
29
-#' @seealso \code{\link{exportUnivariates}}, \code{\link{exportMultivariate}}
30
-#' @importFrom utils write.table
31
-#' @importFrom grDevices col2rgb
32
-#' @export
33
-#' @examples
34
-#'## Get an example BAM file
35
-#'file <- system.file("extdata", "euratrans",
36
-#'                       "lv-H3K27me3-BN-male-bio2-tech1.bam",
37
-#'                        package="chromstaRData")
38
-#'## Bin the file into bin size 1000bp
39
-#'data(rn4_chrominfo)
40
-#'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000,
41
-#'                   chromosomes='chr12')
42
-#'## Export the binned read counts
43
-#'exportBinnedData(list(binned), filename=tempfile())
44
-#'
45
-exportBinnedData <- function(binned.data.list, filename, header=TRUE, separate.files=TRUE) {
17
+# #' Export genome browser viewable files
18
+# #'
19
+# #' Export read counts (RPKM) as genome browser viewable file
20
+# #'
21
+# #' Export read counts from \code{\link{binned.data}} as a file which can be uploaded into a genome browser. Read counts are exported in WIGGLE format as RPKM (.wig.gz).
22
+# #'
23
+# #' @author Aaron Taudt
24
+# #' @param binned.data.list A \code{list()} of \code{\link{binned.data}} objects or vector of files that contain such objects.
25
+# #' @param filename The name of the file that will be written. The ending ".wig.gz" for read counts will be appended. Any existing file will be overwritten.
26
+# #' @param header A logical indicating whether the output file will have a heading track line (\code{TRUE}) or not (\code{FALSE}).
27
+# #' @param separate.files A logical indicating whether or not to produce separate files for each object in \code{binned.data.list}.
28
+# #' @param trackname Name that will be used in the "track name" field of the file.
29
+# #' @return \code{NULL}
30
+# #' @seealso \code{\link{exportUnivariates}}, \code{\link{exportMultivariate}}
31
+# #' @importFrom utils write.table
32
+# #' @importFrom grDevices col2rgb
33
+# #' @examples
34
+# #'## Get an example BAM file
35
+# #'file <- system.file("extdata", "euratrans",
36
+# #'                       "lv-H3K27me3-BN-male-bio2-tech1.bam",
37
+# #'                        package="chromstaRData")
38
+# #'## Bin the file into bin size 1000bp
39
+# #'data(rn4_chrominfo)
40
+# #'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000,
41
+# #'                   chromosomes='chr12')
42
+# #'## Export the binned read counts
43
+# #'exportBinnedData(list(binned), filename=tempfile())
44
+# #'
45
+exportBinnedData <- function(binned.data.list, filename, header=TRUE, separate.files=TRUE, trackname=NULL) {
46 46
 
47 47
     ## Load data
48 48
     binned.data.list <- loadHmmsFromFiles(binned.data.list)
... ...
@@ -78,12 +78,18 @@ exportBinnedData <- function(binned.data.list, filename, header=TRUE, separate.f
78 78
         binsize <- width(b[1])
79 79
         name <- names(binned.data.list)[imod]
80 80
         if (header) {
81
-            cat(paste0('track type=wiggle_0 name="read count for ',name,'" description="read count for ',name,'" visibility=full autoScale=on color=',readcol,' maxHeightPixels=100:50:20 graphType=bar priority=',priority,'\n'), file=filename.gz, append=TRUE)
81
+            if (is.null(trackname)) {
82
+                trackname.string <- paste0("read count for ", name)
83
+            } else {
84
+                trackname.string <- paste0("read count for ", name, ", ", trackname)
85
+            }
86
+            cat(paste0('track type=wiggle_0 name="',trackname.string,'" description="',trackname.string,'" visibility=full autoScale=on color=',readcol,' maxHeightPixels=100:50:20 graphType=bar priority=',priority,'\n'), file=filename.gz, append=TRUE)
82 87
         }
83 88
         # Write read data
89
+        b$counts <- rpkm.vector(b$counts, binsize=mean(width(b))) # RPKM normalization
84 90
         for (chrom in unique(b$chromosome)) {
85 91
             cat(paste0("fixedStep chrom=",chrom," start=1 step=",binsize," span=",binsize,"\n"), file=filename.gz, append=TRUE)
86
-            utils::write.table(mcols(b[b$chromosome==chrom])$counts, file=filename.gz, append=TRUE, row.names=FALSE, col.names=FALSE, sep='\t')
92
+            utils::write.table(b[b$chromosome==chrom]$counts, file=filename.gz, append=TRUE, row.names=FALSE, col.names=FALSE, sep='\t')
87 93
         }
88 94
         if (separate.files) {
89 95
             close(filename.gz)
... ...
@@ -98,41 +104,41 @@ exportBinnedData <- function(binned.data.list, filename, header=TRUE, separate.f
98 104
 #=====================================================
99 105
 # Export univariate HMMs
100 106
 #=====================================================
101
-#' Export genome browser viewable files
102
-#'
103
-#' Export univariate peak-calls and read counts as genome browser viewable file
104
-#'
105
-#' Export \code{\link{uniHMM}} objects as files which can be uploaded into a genome browser. Peak-calls are exported in BED format (.bed.gz) and read counts are exported in WIGGLE format (.wig.gz).
106
-#'
107
-#' @author Aaron Taudt
108
-#' @param hmm.list A \code{list()} of \code{\link{uniHMM}} objects or vector of files that contain such objects.
109
-#' @param filename The name of the file that will be written. The appropriate ending will be appended, either ".bed.gz" for peak-calls or ".wig.gz" for read counts. Any existing file will be overwritten.
110
-#' @param what A character vector specifying what will be exported. Supported are \code{c('peaks', 'counts')}.
111
-#' @param header A logical indicating whether the output file will have a heading track line (\code{TRUE}) or not (\code{FALSE}).
112
-#' @param separate.files A logical indicating whether or not to produce separate files for each hmm in \code{hmm.list}.
113
-#' @return \code{NULL}
114
-#' @seealso \code{\link{exportBinnedData}}, \code{\link{exportMultivariate}}
115
-#' @export
116
-#' @examples
117
-#'## Get an example BAM file
118
-#'file <- system.file("extdata", "euratrans",
119
-#'                       "lv-H3K27me3-BN-male-bio2-tech1.bam",
120
-#'                        package="chromstaRData")
121
-#'## Bin the file into bin size 1000bp
122
-#'data(rn4_chrominfo)
123
-#'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000,
124
-#'                   chromosomes='chr12')
125
-#'## Fit the univariate Hidden Markov Model
126
-#'hmm <- callPeaksUnivariate(binned, max.time=60, eps=1)
127
-#'## Export
128
-#'exportUnivariates(list(hmm), filename=tempfile(), what=c('peaks','counts'))
129
-#'
130
-exportUnivariates <- function(hmm.list, filename, what=c('peaks', 'counts'), header=TRUE, separate.files=TRUE) {
107
+# #' Export genome browser viewable files
108
+# #'
109
+# #' Export univariate peak-calls and read counts (RPKM) as genome browser viewable file
110
+# #'
111
+# #' Export \code{\link{uniHMM}} objects as files which can be uploaded into a genome browser. Peak-calls are exported in BED format (.bed.gz) and read counts are exported in WIGGLE format as RPKM (.wig.gz).
112
+# #'
113
+# #' @author Aaron Taudt
114
+# #' @param hmm.list A \code{list()} of \code{\link{uniHMM}} objects or vector of files that contain such objects.
115
+# #' @param filename The name of the file that will be written. The appropriate ending will be appended, either ".bed.gz" for peak-calls or ".wig.gz" for read counts. Any existing file will be overwritten.
116
+# #' @param what A character vector specifying what will be exported. Supported are \code{c('peaks', 'counts')}.
117
+# #' @param header A logical indicating whether the output file will have a heading track line (\code{TRUE}) or not (\code{FALSE}).
118
+# #' @param separate.files A logical indicating whether or not to produce separate files for each hmm in \code{hmm.list}.
119
+# #' @param trackname Name that will be used in the "track name" field of the file.
120
+# #' @return \code{NULL}
121
+# #' @seealso \code{\link{exportBinnedData}}, \code{\link{exportMultivariate}}
122
+# #' @examples
123
+# #'## Get an example BAM file
124
+# #'file <- system.file("extdata", "euratrans",
125
+# #'                       "lv-H3K27me3-BN-male-bio2-tech1.bam",
126
+# #'                        package="chromstaRData")
127
+# #'## Bin the file into bin size 1000bp
128
+# #'data(rn4_chrominfo)
129
+# #'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000,
130
+# #'                   chromosomes='chr12')
131
+# #'## Fit the univariate Hidden Markov Model
132
+# #'hmm <- callPeaksUnivariate(binned, max.time=60, eps=1)
133
+# #'## Export
134
+# #'exportUnivariates(list(hmm), filename=tempfile(), what=c('peaks','counts'))
135
+# #'
136
+exportUnivariates <- function(hmm.list, filename, what=c('peaks', 'counts'), header=TRUE, separate.files=TRUE, trackname=NULL) {
131 137
     if ('peaks' %in% what) {
132
-        exportUnivariatePeaks(hmm.list, filename=paste0(filename, '_peaks'), header=header, separate.files=separate.files)
138
+        exportUnivariatePeaks(hmm.list, filename=paste0(filename, '_peaks'), header=header, separate.files=separate.files, trackname=trackname)
133 139
     }
134 140
     if ('counts' %in% what) {
135
-        exportUnivariateCounts(hmm.list, filename=paste0(filename, '_counts'), header=header, separate.files=separate.files)
141
+        exportUnivariateCounts(hmm.list, filename=paste0(filename, '_counts'), header=header, separate.files=separate.files, trackname=trackname)
136 142
     }
137 143
 }
138 144
 
... ...
@@ -141,14 +147,14 @@ exportUnivariates <- function(hmm.list, filename, what=c('peaks', 'counts'), hea
141 147
 #----------------------------------------------------
142 148
 #' @importFrom utils write.table
143 149
 #' @importFrom grDevices col2rgb
144
-exportUnivariatePeaks <- function(hmm.list, filename, header=TRUE, separate.files=TRUE) {
150
+exportUnivariatePeaks <- function(hmm.list, filename, header=TRUE, separate.files=TRUE, trackname=NULL) {
145 151
 
146 152
     ## Load models
147 153
     hmm.list <- loadHmmsFromFiles(hmm.list, check.class=class.univariate.hmm)
148 154
 
149 155
     ## Transform to GRanges
150
-    hmm.grl <- lapply(hmm.list, '[[', 'segments')
151
-    hmm.grl <- lapply(hmm.grl, insertchr)
156
+    peaklist <- lapply(hmm.list, '[[', 'peaks')
157
+    peaklist <- lapply(peaklist, insertchr)
152 158
 
153 159
     # Variables
154 160
     nummod <- length(hmm.list)
... ...
@@ -157,16 +163,6 @@ exportUnivariatePeaks <- function(hmm.list, filename, header=TRUE, separate.file
157 163
         filename.gz <- gzfile(filename, 'w')
158 164
     }
159 165
 
160
-    # Generate the colors
161
-    colors <- getStateColors(levels(hmm.grl[[1]]$state))
162
-    RGBs <- t(grDevices::col2rgb(colors))
163
-    RGBlist <- list()
164
-    for (i1 in 1:3) {
165
-        RGBlist[[i1]] <- RGBs[,i1]
166
-    }
167
-    RGBlist$sep <- ','
168
-    RGBs <- do.call(paste, RGBlist)
169
-
170 166
     # Write first line to file
171 167
     if (!separate.files) {
172 168
         ptm <- startTimedMessage('Writing to file ',filename, ' ...')
... ...
@@ -187,24 +183,33 @@ exportUnivariatePeaks <- function(hmm.list, filename, header=TRUE, separate.file
187 183
             ptm <- startTimedMessage('Writing to file ',filename.sep, ' ...')
188 184
             cat("", file=filename.gz)
189 185
         }
190
-        hmm.gr <- hmm.grl[[imod]]
186
+        peaks <- peaklist[[imod]]
191 187
         priority <- 51 + 4*imod
192 188
         if (header) {
193
-            cat(paste0("track name=\"univariate calls for ",ID,"\" description=\"univariate calls for ",ID,"\" visibility=1 itemRgb=On priority=",priority,"\n"), file=filename.gz, append=TRUE)
189
+            if (is.null(trackname)) {
190
+                trackname.string <- paste0("univariate peak calls for ", ID)
191
+            } else {
192
+                trackname.string <- paste0("univariate peak calls for ", ID, ", ", trackname)
193
+            }
194
+            cat(paste0("track name=\"",trackname.string,"\" description=\"",trackname.string,"\" visibility=1 itemRgb=On priority=",priority,"\n"), file=filename.gz, append=TRUE)
194 195
         }
195
-        if (is.null(hmm.gr$score)) {
196
-            hmm.gr$score <- 0
196
+        if (is.null(peaks$peakScores)) {
197
+            peaks$peakScores <- 0
197 198
         }
198
-        collapsed.calls <- as.data.frame(hmm.gr)[c('chromosome','start','end','state','score')]
199
+        df <- as.data.frame(peaks)
200
+        df$peakNumber <- paste0('peak_', 1:nrow(df))
201
+        df$strand <- sub('\\*', '.', df$strand)
202
+        df <- df[,c('chromosome','start','end','peakNumber','peakScores','strand')]
199 203
         # Make score integer
200
-        collapsed.calls$score <- round(collapsed.calls$score*1000)
201
-        itemRgb <- RGBs[as.character(collapsed.calls$state)]
202
-        numsegments <- nrow(collapsed.calls)
203
-        df <- cbind(collapsed.calls, strand=rep(".",numsegments), thickStart=collapsed.calls$start, thickEnd=collapsed.calls$end, itemRgb=itemRgb)
204
+        df$peakScores <- round(df$peakScores*1000)
205
+        numsegments <- nrow(df)
206
+        df <- cbind(df, thickStart=df$start, thickEnd=df$end)
204 207
         # Convert from 1-based closed to 0-based half open
205 208
         df$start <- df$start - 1
206 209
         df$thickStart <- df$thickStart - 1
207
-        df <- df[df$state=='modified',]
210
+        # Colors
211
+        RGB <- t(grDevices::col2rgb(getStateColors('modified')))
212
+        df$itemRgb <- apply(RGB,1,paste,collapse=",")
208 213
         if (nrow(df) == 0) {