Browse code

Updated to version 1.1.2

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/chromstaR@126431 bc3139a8-67e5-0310-9ffc-ced21a209358

Aaron Taudt authored on 03/02/2017 14:09:56
Showing 40 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: chromstaR
2 2
 Type: Package
3 3
 Title: Combinatorial and Differential Chromatin State Analysis for ChIP-Seq Data
4
-Version: 1.1.1
4
+Version: 1.1.2
5 5
 Date: 2016-10-19
6 6
 Author: Aaron Taudt, Maria Colome Tatche, Matthias Heinig, Minh Anh Nguyen
7 7
 Maintainer: Aaron Taudt <aaron.taudt@gmail.com>
... ...
@@ -25,7 +25,8 @@ Imports:
25 25
     reshape2,
26 26
     Rsamtools,
27 27
     GenomicAlignments,
28
-    bamsignals
28
+    bamsignals,
29
+    mvtnorm
29 30
 Suggests:
30 31
     knitr,
31 32
     BiocStyle,
... ...
@@ -36,5 +37,5 @@ License: Artistic-2.0
36 37
 LazyLoad: yes
37 38
 VignetteBuilder: knitr
38 39
 Packaged: 2014-05-03 00:00:00 CET+1; Taudt
39
-biocViews: Software, DifferentialPeakCalling, HiddenMarkovModel, ChIPSeq, MultipleComparison
40
+biocViews: Software, DifferentialPeakCalling, HiddenMarkovModel, ChIPSeq, HistoneModification, MultipleComparison
40 41
 RoxygenNote: 5.0.1
... ...
@@ -1,5 +1,8 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3
+S3method(print,combinedMultiHMM)
4
+S3method(print,multiHMM)
5
+S3method(print,uniHMM)
3 6
 export(Chromstar)
4 7
 export(bin2dec)
5 8
 export(binReads)
... ...
@@ -7,6 +10,7 @@ export(callPeaksMultivariate)
7 10
 export(callPeaksReplicates)
8 11
 export(callPeaksUnivariate)
9 12
 export(changeFDR)
13
+export(changePostCutoff)
10 14
 export(collapseBins)
11 15
 export(combineMultivariates)
12 16
 export(dec2bin)
... ...
@@ -31,6 +35,7 @@ export(plotHistogram)
31 35
 export(readBamFileAsGRanges)
32 36
 export(readBedFileAsGRanges)
33 37
 export(readCustomBedFile)
38
+export(removeCondition)
34 39
 export(stateBrewer)
35 40
 export(transitionFrequencies)
36 41
 export(unis2pseudomulti)
... ...
@@ -57,9 +62,11 @@ importFrom(S4Vectors,runmean)
57 62
 importFrom(S4Vectors,subjectHits)
58 63
 importFrom(bamsignals,bamCount)
59 64
 importFrom(grDevices,col2rgb)
65
+importFrom(grDevices,rgb2hsv)
60 66
 importFrom(graphics,hist)
61 67
 importFrom(methods,as)
62 68
 importFrom(methods,is)
69
+importFrom(mvtnorm,rmvnorm)
63 70
 importFrom(reshape2,melt)
64 71
 importFrom(stats,aggregate)
65 72
 importFrom(stats,cutree)
... ...
@@ -1,3 +1,15 @@
1
+CHANGES IN VERSION 1.1.2
2
+------------------------
3
+
4
+NEW FEATURES
5
+
6
+    o Proper print() methods for all objects.
7
+
8
+BUG FIXES
9
+
10
+    o Fixed a bug where chromosomes with a single bin where making problems.
11
+
12
+
1 13
 CHANGES IN VERSION 1.1.1
2 14
 ------------------------
3 15
 
... ...
@@ -1,4 +1,4 @@
1
-#' Wrapper function for the \code{\link{chromstaR}} package
1
+#' Wrapper function for the \pkg{\link{chromstaR}} package
2 2
 #' 
3 3
 #' This function performs \code{\link[chromstaR:binReads]{binning}}, \code{\link[chromstaR:callPeaksUnivariate]{univariate peak calling}} and \code{\link[chromstaR:callPeaksMultivariate]{multivariate peak calling}} from a list of input files.
4 4
 #' 
... ...
@@ -7,7 +7,7 @@
7 7
 #' Convert aligned reads from .bam or .bed(.gz) files into read counts in equidistant windows (bins). This function uses \code{\link[GenomicRanges]{countOverlaps}} to calculate the read counts, or alternatively \code{\link[bamsignals]{bamProfile}} if option \code{use.bamsignals} is set (only effective for .bam files).
8 8
 #'
9 9
 #' @aliases binning
10
-#' @param file A file with aligned reads. Alternatively a \code{\link{GRanges}} with aligned reads if format is set to 'GRanges'.
10
+#' @param file A file with aligned reads. Alternatively a \code{\link{GRanges}} with aligned reads.
11 11
 #' @param experiment.table An \code{\link{experiment.table}} containing the supplied \code{file}. This is necessary to uniquely identify the file in later steps of the workflow. Set to \code{NULL} if you don't have it (not recommended).
12 12
 #' @inheritParams readBamFileAsGRanges
13 13
 #' @inheritParams readBedFileAsGRanges
... ...
@@ -17,6 +17,7 @@
17 17
 #' @param variable.width.reference A BAM file that is used as reference to produce variable width bins. See \code{\link{variableWidthBins}} for details.
18 18
 #' @param chromosomes If only a subset of the chromosomes should be binned, specify them here.
19 19
 #' @param use.bamsignals If \code{TRUE} the \pkg{\link[bamsignals]{bamsignals}} package is used for parsing of BAM files. This gives tremendous speed advantage for only one binsize but linearly increases for multiple binsizes, while \code{use.bamsignals=FALSE} has a binsize dependent runtime and might be faster if many binsizes are calculated.
20
+#' @param format One of \code{c('bed','bam','GRanges')} or \code{NULL} if the format is to be determined automatically.
20 21
 #' @return If only one bin size was specified for option \code{binsizes}, the function returns a single \code{\link{GRanges}} object with meta data column 'counts' that contains the read count. If multiple \code{binsizes} were specified , the function returns a named \code{list()} of \link{GRanges} objects.
21 22
 #' @importFrom Rsamtools BamFile indexBam
22 23
 #' @importFrom bamsignals bamCount
... ...
@@ -35,14 +36,21 @@
35 36
 #'                   chromosomes='chr12')
36 37
 #'print(binned)
37 38
 #'
38
-binReads <- function(file, experiment.table=NULL, assembly, bamindex=file, chromosomes=NULL, pairedEndReads=FALSE, min.mapq=10, remove.duplicate.reads=TRUE, max.fragment.width=1000, blacklist=NULL, binsizes=1000, reads.per.bin=NULL, bins=NULL, variable.width.reference=NULL, use.bamsignals=TRUE) {
39
+binReads <- function(file, experiment.table=NULL, assembly, bamindex=file, chromosomes=NULL, pairedEndReads=FALSE, min.mapq=10, remove.duplicate.reads=TRUE, max.fragment.width=1000, blacklist=NULL, binsizes=1000, reads.per.bin=NULL, bins=NULL, variable.width.reference=NULL, use.bamsignals=TRUE, format=NULL) {
39 40
 
40 41
     ## Determine format
41
-    if (is.character(file)) {
42
-        file.clean <- sub('\\.gz$','', file)
43
-        format <- rev(strsplit(file.clean, '\\.')[[1]])[1]
44
-    } else if (class(file)=='GRanges') {
45
-        format <- 'GRanges'
42
+    if (is.null(format)) {
43
+        if (is.character(file)) {
44
+            file.clean <- sub('\\.gz$','', file)
45
+            format <- rev(strsplit(file.clean, '\\.')[[1]])[1]
46
+        } else if (class(file)=='GRanges') {
47
+            format <- 'GRanges'
48
+        } else {
49
+            stop("Could not determine format automatically. Please specify it via the 'format' parameter.")
50
+        }
51
+    }
52
+    if (! format %in% c('bed','bam','GRanges')) {
53
+        stop("Unknown format. Needs to be one of 'bed', 'bam' or 'GRanges'.")
46 54
     }
47 55
 
48 56
     if (format=='bed') {
... ...
@@ -111,6 +119,8 @@ binReads <- function(file, experiment.table=NULL, assembly, bamindex=file, chrom
111 119
         ## GRanges (1-based)
112 120
         data <- file
113 121
         chrom.lengths <- seqlengths(data)
122
+    } else {
123
+        stop("Unknown file format.")
114 124
     }
115 125
 
116 126
     ## Select chromosomes to bin
... ...
@@ -78,6 +78,11 @@ callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=T
78 78
         ### Make return object ###
79 79
             result <- list()
80 80
             result$info <- hmm$info
81
+            if (is.null(result$info)) {
82
+                n <- 1
83
+                result$info <- data.frame(file=rep(NA, n), mark=1:n, condition=1:n, replicate=1, pairedEndReads=rep(NA, n), controlFiles=rep(NA, n))
84
+                result$info$ID <- paste0(result$info$mark, '-', result$info$condition, '-rep', result$info$replicate)
85
+            }
81 86
         ## Bin coordinates, posteriors and states
82 87
             result$bins <- hmm$bins
83 88
             result$bins$score <- NULL
... ...
@@ -133,7 +138,7 @@ callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=T
133 138
     p <- prepareMultivariate(hmms, use.states=use.states, max.states=max.states, chromosomes=chromosomes)
134 139
 
135 140
     if (is.null(chromosomes)) {
136
-        chromosomes <- seqlevels(p$bins)
141
+        chromosomes <- intersect(seqlevels(p$bins), unique(seqnames(p$bins)))
137 142
     }
138 143
 
139 144
     ## Run multivariate per chromosome
... ...
@@ -167,7 +172,7 @@ callPeaksMultivariate <- function(hmms, use.states, max.states=NULL, per.chrom=T
167 172
             for (chrom in chromosomes) {
168 173
                 ptm <- startTimedMessage("Chromosome = ", chrom, "\n")
169 174
                 bins <- p$bins[seqnames(p$bins)==chrom]
170
-                model <- suppressMessages( runMultivariate(bins=bins, info=p$info, comb.states=p$comb.states, use.states=p$use.states, distributions=p$distributions, weights=p$weights, correlationMatrix=p$correlationMatrix, correlationMatrixInverse=p$correlationMatrixInverse, determinant=p$determinant, max.iter=max.iter, max.time=max.time, eps=eps, num.threads=1, keep.posteriors=keep.posteriors, keep.densities=keep.densities, verbosity=verbosity) )
175
+                model <- runMultivariate(bins=bins, info=p$info, comb.states=p$comb.states, use.states=p$use.states, distributions=p$distributions, weights=p$weights, correlationMatrix=p$correlationMatrix, correlationMatrixInverse=p$correlationMatrixInverse, determinant=p$determinant, max.iter=max.iter, max.time=max.time, eps=eps, num.threads=1, keep.posteriors=keep.posteriors, keep.densities=keep.densities, verbosity=verbosity)
171 176
                 message("Time spent for chromosome = ", chrom, ":", appendLF=FALSE)
172 177
                 stopTimedMessage(ptm)
173 178
                 models[[chrom]] <- model
... ...
@@ -396,6 +401,11 @@ prepareMultivariate = function(hmms, use.states=NULL, max.states=NULL, chromosom
396 401
     }
397 402
     info <- do.call(rbind, info)
398 403
     rownames(info) <- NULL
404
+    if (is.null(info)) {
405
+        n <- nummod
406
+        info <- data.frame(file=rep(NA, n), mark=1:n, condition=1:n, replicate=rep(1, n), pairedEndReads=rep(NA, n), controlFiles=rep(NA, n))
407
+        info$ID <- paste0(info$mark, '-', info$condition, '-rep', info$replicate)
408
+    }
399 409
     bins$counts <- counts
400 410
     colnames(bins$counts) <- info$ID
401 411
     maxcounts <- max(bins$counts)
... ...
@@ -507,7 +517,8 @@ prepareMultivariate = function(hmms, use.states=NULL, max.states=NULL, chromosom
507 517
         }
508 518
         temp <- tryCatch({
509 519
             if (nrow(z.temp) > 100) {
510
-                correlationMatrix[,,istate] <- cor(z.temp)
520
+                correlationMatrix[,,istate] <- suppressWarnings( cor(z.temp) )
521
+                correlationMatrix[,,istate][is.na(correlationMatrix[,,istate])] <- 0
511 522
                 determinant[istate] <- det( correlationMatrix[,,istate] )
512 523
                 correlationMatrixInverse[,,istate] <- chol2inv(chol(correlationMatrix[,,istate]))
513 524
             } else {
... ...
@@ -516,12 +527,10 @@ prepareMultivariate = function(hmms, use.states=NULL, max.states=NULL, chromosom
516 527
                 correlationMatrixInverse[,,istate] <- diag(nummod) # solve(diag(x)) == diag(x)
517 528
             }
518 529
             0
519
-        }, warning = function(war) {
520
-            1
521 530
         }, error = function(err) {
522
-            1
531
+            2
523 532
         })
524
-        if (temp!=0) {
533
+        if (temp==2) {
525 534
             correlationMatrix[,,istate] <- diag(nummod)
526 535
             determinant[istate] <- 1
527 536
             correlationMatrixInverse[,,istate] <- diag(nummod)
... ...
@@ -572,3 +581,13 @@ prepareMultivariate = function(hmms, use.states=NULL, max.states=NULL, chromosom
572 581
     return(out)
573 582
 }
574 583
 
584
+
585
+### Get real transition probabilities ###
586
+transProbs <- function(model) {
587
+    bins <- model$bins
588
+    df <- data.frame(from = bins$combination[-length(bins)], to = bins$combination[-1])
589
+    t <- table(df)
590
+    t <- t[rownames(model$transitionProbs), colnames(model$transitionProbs)]
591
+    t <- sweep(t, MARGIN = 1, STATS = rowSums(t), FUN = '/')
592
+    return(t)
593
+}
... ...
@@ -64,8 +64,8 @@ callPeaksReplicates <- function(hmm.list, max.states=32, force.equal=FALSE, eps=
64 64
 
65 65
         ## Univariate replicateInfo
66 66
         ids <- sapply(hmms, function(x) { x$info$ID })
67
-        weight.univariate <- unlist(lapply(hmms, function(x) { x$weights['modified'] }))
68
-        total.count <- unlist(lapply(hmms, function(x) { sum(x$bins$counts) }))
67
+        weight.univariate <- sapply(hmms, function(x) { x$weights['modified'] })
68
+        total.count <- sapply(hmms, function(x) { sum(x$bins$counts) })
69 69
         info.df <- data.frame(total.count=total.count, weight.univariate=weight.univariate)
70 70
         if (!is.null(unlist(ids))) {
71 71
             rownames(info.df) <- ids
... ...
@@ -57,7 +57,7 @@ callPeaksUnivariate <- function(binned.data, input.data=NULL, prefit.on.chr=NULL
57 57
     }
58 58
     if (!is.null(input.data)) {
59 59
         if (class(input.data) == 'character') { 
60
-            message("Loading input file ",input.data)
60
+            message("Loading input file(s) ", paste0(input.data, collapse=', '))
61 61
             input.datas <- loadHmmsFromFiles(input.data)
62 62
             input.data <- input.datas[[1]]
63 63
             if (length(input.datas) > 1) {
... ...
@@ -8,24 +8,25 @@
8 8
 #' @param model A \code{\link{uniHMM}} or \code{\link{multiHMM}} object with posteriors.
9 9
 #' @param post.cutoff A vector of posterior cutoff values between 0 and 1 the same length as \code{ncol(model$bins$posteriors)}. If only one value is given, it will be reused for all columns. Values close to 1 will yield more stringent peak calls with lower false positive but higher false negative rate.
10 10
 #' @return The input object is returned with adjusted peak calls.
11
-# #' @examples
12
-# #'## Get an example BAM file
13
-# #'file <- system.file("extdata", "euratrans",
14
-# #'                       "lv-H3K27me3-BN-male-bio2-tech1.bam",
15
-# #'                        package="chromstaRData")
16
-# #'## Bin the file into bin size 1000bp
17
-# #'data(rn4_chrominfo)
18
-# #'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000,
19
-# #'                   chromosomes='chr12')
20
-# #'## Fit the univariate Hidden Markov Model
21
-# #'# !Keep posteriors to change the post.cutoff later!
22
-# #'hmm <- callPeaksUnivariate(binned, max.time=60, eps=1,
23
-# #'                           keep.posteriors=TRUE)
24
-# #'## Compare fits with different post.cutoffs
25
-# #'plotHistogram(changePostCutoff(hmm, post.cutoff=0.01)) + ylim(0,0.25)
26
-# #'plotHistogram(hmm) + ylim(0,0.25)
27
-# #'plotHistogram(changePostCutoff(hmm, post.cutoff=0.99)) + ylim(0,0.25)
28
-# #'
11
+#' @export
12
+#' @examples
13
+#'## Get an example BAM file
14
+#'file <- system.file("extdata", "euratrans",
15
+#'                       "lv-H3K27me3-BN-male-bio2-tech1.bam",
16
+#'                        package="chromstaRData")
17
+#'## Bin the file into bin size 1000bp
18
+#'data(rn4_chrominfo)
19
+#'binned <- binReads(file, assembly=rn4_chrominfo, binsizes=1000,
20
+#'                   chromosomes='chr12')
21
+#'## Fit the univariate Hidden Markov Model
22
+#'# !Keep posteriors to change the post.cutoff later!
23
+#'hmm <- callPeaksUnivariate(binned, max.time=60, eps=1,
24
+#'                           keep.posteriors=TRUE)
25
+#'## Compare fits with different post.cutoffs
26
+#'plotHistogram(changePostCutoff(hmm, post.cutoff=0.01)) + ylim(0,0.25)
27
+#'plotHistogram(hmm) + ylim(0,0.25)
28
+#'plotHistogram(changePostCutoff(hmm, post.cutoff=0.99)) + ylim(0,0.25)
29
+#'
29 30
 changePostCutoff <- function(model, post.cutoff=0.5) {
30 31
 
31 32
     if (!is.numeric(post.cutoff)) {
... ...
@@ -108,10 +109,27 @@ changePostCutoff.multivariate <- function(model, post.cutoff) {
108 109
         multiHMM$bins$state <- factor(states)
109 110
         multiHMM$bins$posteriors <- post
110 111
         multiHMM$mapping <- mapping
112
+        multiHMM$peaks <- model$peaks
111 113
         model <- combineMultivariates(list(multiHMM), mode='full')
112 114
     } else {
113 115
         stop("Supply either a uniHMM, multiHMM or combinedMultiHMM object.")
114 116
     }
117
+    
118
+    ## Redo peaks
119
+    ptm <- startTimedMessage("Recalculating peaks ...")
120
+    binstates <- dec2bin(model$segments$state, colnames=colnames(post))
121
+    segments <- model$segments
122
+    mcols(segments) <- NULL
123
+    for (icol in 1:ncol(binstates)) {
124
+        segments$peakScores <- model$segments$peakScores[,icol]
125
+        segments$state <- binstates[,icol]
126
+        red.df <- suppressMessages( collapseBins(as.data.frame(segments), column2collapseBy = 'state') )
127
+        red.df <- red.df[red.df$state == 1,]
128
+        red.df$state <- NULL
129
+        model$peaks[[icol]] <- methods::as(red.df, 'GRanges')
130
+    }
131
+    stopTimedMessage(ptm)
132
+    
115 133
     ## Return model
116 134
     model$post.cutoff <- threshold
117 135
     return(model)
... ...
@@ -150,10 +168,11 @@ changePostCutoff.univariate <- function(model, post.cutoff) {
150 168
     ptm <- startTimedMessage("Making segmentation ...")
151 169
     gr <- model$bins
152 170
     df <- as.data.frame(gr)
153
-    red.df <- suppressMessages(collapseBins(df, column2collapseBy='state', columns2average=c('score'), columns2drop=c('width',grep('posteriors', names(df), value=TRUE), 'counts')))
154
-    red.gr <- GRanges(seqnames=red.df[,1], ranges=IRanges(start=red.df[,2], end=red.df[,3]), strand=red.df[,4], state=red.df[,'state'], score=red.df[,'mean.score'])
155
-    model$segments <- red.gr
156
-    seqlengths(model$segments) <- seqlengths(model$bins)[seqlevels(model$segments)]
171
+    red.df <- suppressMessages(collapseBins(df, column2collapseBy='state', columns2drop=c('width', 'posterior.modified', grep('posteriors', names(df), value=TRUE), 'counts')))
172
+    segments <- methods::as(red.df, 'GRanges')
173
+    model$peaks <- segments[segments$state == 'modified']
174
+    model$peaks$state <- NULL
175
+    seqlengths(model$peaks) <- seqlengths(model$bins)[seqlevels(model$peaks)]
157 176
     stopTimedMessage(ptm)
158 177
 
159 178
     ## Return model
... ...
@@ -144,12 +144,22 @@ combineMultivariates <- function(hmms, mode) {
144 144
             stopTimedMessage(ptm)
145 145
         }
146 146
         ptm <- startTimedMessage("Concatenating HMMs ...")
147
+        # Slightly more complicated selection procedure for conditions in case one mark is missing
148
+        conds.help <- lapply(infos, function(x) { unique(x$condition) })
149
+        conditions <- conds.help[[which.max(sapply(conds.help, length))]]
150
+        infos <- do.call(rbind, infos)
151
+        infos$condition <- factor(infos$condition, levels=conditions)
152
+        infos <- infos[order(infos$condition, infos$mark, infos$replicate),]
153
+        infos$condition <- as.character(infos$condition)
154
+        # Reorder everything according to conditions
147 155
         counts <- do.call(cbind, counts)
156
+        counts <- counts[,infos$ID]
148 157
         posteriors <- do.call(cbind, posteriors)
158
+        posteriors <- posteriors[,infos$ID]
149 159
         peakScores <- do.call(cbind, peakScores)
150
-        infos <- do.call(rbind, infos)
151
-        conditions <- unique(infos$condition)
160
+        peakScores <- peakScores[,infos$ID]
152 161
         binstates <- do.call(cbind, binstates)
162
+        binstates <- binstates[,infos$ID]
153 163
         states <- factor(bin2dec(binstates))
154 164
         names(states) <- NULL
155 165
         stopTimedMessage(ptm)
... ...
@@ -3,8 +3,9 @@
3 3
 #' Plotting functions for enrichment analysis of \code{\link{multiHMM}} or \code{\link{combinedMultiHMM}} objects with any annotation of interest, specified as a \code{\link[GenomicRanges]{GRanges}} object.
4 4
 #' 
5 5
 #' @name enrichment_analysis
6
-#' @param combinations A vector with combinations for which the fold enrichment will be calculated. If \code{NULL} all combinations will be considered.
6
+#' @param combinations A vector with combinations for which the enrichment will be calculated. If \code{NULL} all combinations will be considered.
7 7
 #' @param marks A vector with marks for which the enrichment is plotted. If \code{NULL} all marks will be considered.
8
+#' @param logscale Set to \code{TRUE} to plot fold enrichment on log-scale. Ignored if \code{statistic = 'fraction'}.
8 9
 #' @return A \code{\link[ggplot2:ggplot]{ggplot}} object containing the plot or a list() with \code{\link[ggplot2:ggplot]{ggplot}} objects if several plots are returned. For \code{plotFoldEnrichHeatmap} a named array with fold enrichments if \code{plot=FALSE}.
9 10
 #' @author Aaron Taudt
10 11
 #' @seealso \code{\link{plotting}}
... ...
@@ -54,6 +55,7 @@
54 55
 #'# Fold enrichment with different biotypes, showing that protein coding genes are
55 56
 #'# enriched with (bivalent) promoter combinations [H3K4me3] and [H3K4me3+H3K27me3],
56 57
 #'# while rRNA is enriched with the empty [] and repressive combinations [H3K27me3].
58
+#'    tss <- resize(genes, width = 3, fix = 'start')
57 59
 #'    biotypes <- split(tss, tss$biotype)
58 60
 #'    plotFoldEnrichHeatmap(model, annotations=biotypes) + coord_flip()
59 61
 #'
... ...
@@ -64,11 +66,12 @@ NULL
64 66
 #' @param hmm A \code{\link{combinedMultiHMM}} or \code{\link{multiHMM}} object or a file that contains such an object.
65 67
 #' @param annotations A \code{list()} with \code{\link{GRanges}} objects containing coordinates of multiple annotations The names of the list entries will be used to name the return values.
66 68
 #' @param plot A logical indicating whether the plot or an array with the fold enrichment values is returned.
69
+#' @param what One of \code{c('combinations','peaks','counts','transitions')} specifying on which feature the statistic is calculated.
67 70
 #' @importFrom S4Vectors subjectHits queryHits
68 71
 #' @importFrom IRanges subsetByOverlaps
69 72
 #' @importFrom reshape2 melt
70 73
 #' @export
71
-plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combinations=NULL, marks=NULL, plot=TRUE) {
74
+plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combinations=NULL, marks=NULL, plot=TRUE, logscale=TRUE) {
72 75
     
73 76
     hmm <- loadHmmsFromFiles(hmm, check.class=c(class.multivariate.hmm, class.combined.multivariate.hmm))[[1]]
74 77
     ## Variables
... ...
@@ -76,9 +79,17 @@ plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combina
76 79
     if (class(hmm) == class.combined.multivariate.hmm) {
77 80
     } else if (class(hmm) == class.multivariate.hmm) {
78 81
         # Rename 'combination' to 'combination.' for coherence with combinedMultiHMM
79
-        names(mcols(bins))[grep('combination', names(mcols(bins)))] <- 'combination.'
82
+        names(mcols(bins))[grep('combination', names(mcols(bins)))] <- paste0('combination.', unique(hmm$info$condition))
80 83
     }
81 84
     conditions <- sub('combination.', '', grep('combination', names(mcols(bins)), value=TRUE))
85
+    comb.levels <- levels(mcols(bins)[,paste0('combination.', conditions[1])])
86
+    ## Create new column combination with all conditions combined
87
+    combs <- list()
88
+    for (condition in conditions) {
89
+        combs[[condition]] <- paste0(condition, ":", mcols(bins)[,paste0('combination.', condition)])
90
+    }
91
+    combs$sep <- ', '
92
+    bins$transitions <- factor(do.call(paste, combs))
82 93
     if (is.null(combinations)) {
83 94
         comb.levels <- levels(mcols(bins)[,paste0('combination.', conditions[1])])
84 95
     } else {
... ...
@@ -93,85 +104,137 @@ plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combina
93 104
     annotationsAtBins <- lapply(annotations, function(x) { IRanges::subsetByOverlaps(x, bins) })
94 105
     feature.lengths <- lapply(annotationsAtBins, function(x) { sum(as.numeric(width(x))) })
95 106
     
96
-    if (what == 'peaks') {
97
-        binstates <- dec2bin(bins$state, colnames=hmm$info$ID)
98
-    }
99
-    
100
-    ## Fold enrichment
101 107
     ggplts <- list()
102 108
     folds <- list()
103 109
     maxfolds <- list()
104
-    for (condition in conditions) {
105
-        if (what == 'combinations') {
106
-            bins$combination <- mcols(bins)[,paste0('combination.', condition)]
107
-            fold <- array(NA, dim=c(length(annotationsAtBins), length(comb.levels)), dimnames=list(annotation=names(annotationsAtBins), combination=comb.levels))
108
-            for (icomb in 1:length(comb.levels)) {
109
-                mask <- bins$combination == comb.levels[icomb]
110
-                bins.mask <- bins[mask]
111
-                combstate.length <- sum(as.numeric(width(bins.mask)))
112
-                for (ifeat in 1:length(annotationsAtBins)) {
113
-                    feature <- annotationsAtBins[[ifeat]]
114
-                    ind <- findOverlaps(bins.mask, feature)
110
+    minfolds <- list()
115 111
     
116
-                    binsinfeature <- bins.mask[unique(S4Vectors::queryHits(ind))]
117
-                    sum.binsinfeature <- sum(as.numeric(width(binsinfeature)))
118
-    
119
-                    featuresinbins <- feature[unique(S4Vectors::subjectHits(ind))]
120
-                    sum.featuresinbins <- sum(as.numeric(width(featuresinbins)))
121
-    
122
-                    fold[ifeat,icomb] <- sum.binsinfeature / combstate.length / feature.lengths[[ifeat]] * genome
123
-                }
124
-            }
125
-    
126
-        } else if (what == 'peaks') {
127
-            fold <- array(NA, dim=c(length(annotationsAtBins), length(mark.levels)), dimnames=list(annotation=names(annotationsAtBins), mark=mark.levels))
128
-            for (imark in 1:length(mark.levels)) {
129
-                mark <- mark.levels[imark]
130
-                colmask <- hmm$info$mark == mark
131
-                colmask <- colmask & (!duplicated(paste0(hmm$info$mark, hmm$info$condition))) # remove replicates
132
-                if (condition != "") {
133
-                    colmask <- colmask & (hmm$info$condition == condition)
134
-                }
135
-                binstates.cond <- binstates[,colmask]
136
-                mask <- binstates.cond
137
-                bins.mask <- bins[mask]
138
-                combstate.length <- sum(as.numeric(width(bins.mask)))
139
-                for (ifeat in 1:length(annotationsAtBins)) {
140
-                    feature <- annotationsAtBins[[ifeat]]
141
-                    ind <- findOverlaps(bins.mask, feature)
142
-    
143
-                    binsinfeature <- bins.mask[unique(S4Vectors::queryHits(ind))]
144
-                    sum.binsinfeature <- sum(as.numeric(width(binsinfeature)))
145
-    
146
-                    featuresinbins <- feature[unique(S4Vectors::subjectHits(ind))]
147
-                    sum.featuresinbins <- sum(as.numeric(width(featuresinbins)))
148
-    
149
-                    fold[ifeat,imark] <- sum.binsinfeature / combstate.length / feature.lengths[[ifeat]] * genome
150
-                }
112
+    if (what == 'peaks') {
113
+        binstates <- dec2bin(bins$state, colnames=hmm$info$ID)
114
+    }
115
+    if (what == 'transitions') {
116
+        bins$combination <- bins$transitions
117
+        comb.levels <- names(sort(table(bins$combination), decreasing = TRUE))
118
+        fold <- array(NA, dim=c(length(annotationsAtBins), length(comb.levels)), dimnames=list(annotation=names(annotationsAtBins), combination=comb.levels))
119
+        for (icomb in 1:length(comb.levels)) {
120
+            mask <- bins$combination == comb.levels[icomb]
121
+            bins.mask <- bins[mask]
122
+            combstate.length <- sum(as.numeric(width(bins.mask)))
123
+            for (ifeat in 1:length(annotationsAtBins)) {
124
+                feature <- annotationsAtBins[[ifeat]]
125
+                ind <- findOverlaps(bins.mask, feature)
126
+
127
+                binsinfeature <- bins.mask[unique(S4Vectors::queryHits(ind))]
128
+                sum.binsinfeature <- sum(as.numeric(width(binsinfeature)))
129
+
130
+                featuresinbins <- feature[unique(S4Vectors::subjectHits(ind))]
131
+                sum.featuresinbins <- sum(as.numeric(width(featuresinbins)))
132
+
133
+                fold[ifeat,icomb] <- sum.binsinfeature / combstate.length / feature.lengths[[ifeat]] * genome
151 134
             }
152
-          
153 135
         }
154
-      
155 136
         if (plot) {
156 137
             df <- reshape2::melt(fold, value.name='foldEnrichment')
157
-            maxfolds[[condition]] <- max(df$foldEnrichment, na.rm=TRUE)
158
-            if (what == 'combinations') {
159
-                ggplt <- ggplot(df) + geom_tile(aes_string(x='combination', y='annotation', fill='foldEnrichment'))
160
-            } else if (what == 'peaks') {
161
-                ggplt <- ggplot(df) + geom_tile(aes_string(x='mark', y='annotation', fill='foldEnrichment'))
138
+            if (logscale) {
139
+                df$foldEnrichment <- log(df$foldEnrichment)
162 140
             }
141
+            foldsnoinf <- setdiff(df$foldEnrichment, c(Inf, -Inf))
142
+            maxfolds[[1]] <- max(foldsnoinf, na.rm=TRUE)
143
+            minfolds[[1]] <- min(foldsnoinf, na.rm=TRUE)
144
+            ggplt <- ggplot(df) + geom_tile(aes_string(x='combination', y='annotation', fill='foldEnrichment'))
163 145
             ggplt <- ggplt + theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5))
164
-            ggplts[[condition]] <- ggplt
146
+            ggplts[[1]] <- ggplt
165 147
         } else {
166
-            folds[[condition]] <- fold
148
+            folds[[1]] <- fold
149
+        }
150
+    }
151
+    
152
+    ## Fold enrichment
153
+    if (what %in% c('peaks', 'combinations')) {
154
+        for (condition in conditions) {
155
+            if (what == 'combinations') {
156
+                bins$combination <- mcols(bins)[,paste0('combination.', condition)]
157
+                fold <- array(NA, dim=c(length(annotationsAtBins), length(comb.levels)), dimnames=list(annotation=names(annotationsAtBins), combination=comb.levels))
158
+                for (icomb in 1:length(comb.levels)) {
159
+                    mask <- bins$combination == comb.levels[icomb]
160
+                    bins.mask <- bins[mask]
161
+                    combstate.length <- sum(as.numeric(width(bins.mask)))
162
+                    for (ifeat in 1:length(annotationsAtBins)) {
163
+                        feature <- annotationsAtBins[[ifeat]]
164
+                        ind <- findOverlaps(bins.mask, feature)
165
+        
166
+                        binsinfeature <- bins.mask[unique(S4Vectors::queryHits(ind))]
167
+                        sum.binsinfeature <- sum(as.numeric(width(binsinfeature)))
168
+        
169
+                        featuresinbins <- feature[unique(S4Vectors::subjectHits(ind))]
170
+                        sum.featuresinbins <- sum(as.numeric(width(featuresinbins)))
171
+        
172
+                        fold[ifeat,icomb] <- sum.binsinfeature / combstate.length / feature.lengths[[ifeat]] * genome
173
+                    }
174
+                }
175
+        
176
+            } else if (what == 'peaks') {
177
+                fold <- array(NA, dim=c(length(annotationsAtBins), length(mark.levels)), dimnames=list(annotation=names(annotationsAtBins), mark=mark.levels))
178
+                for (imark in 1:length(mark.levels)) {
179
+                    mark <- mark.levels[imark]
180
+                    colmask <- hmm$info$mark == mark
181
+                    colmask <- colmask & (!duplicated(paste0(hmm$info$mark, hmm$info$condition))) # remove replicates
182
+                    if (condition != "") {
183
+                        colmask <- colmask & (hmm$info$condition == condition)
184
+                    }
185
+                    binstates.cond <- binstates[,colmask]
186
+                    mask <- binstates.cond
187
+                    bins.mask <- bins[mask]
188
+                    combstate.length <- sum(as.numeric(width(bins.mask)))
189
+                    for (ifeat in 1:length(annotationsAtBins)) {
190
+                        feature <- annotationsAtBins[[ifeat]]
191
+                        ind <- findOverlaps(bins.mask, feature)
192
+        
193
+                        binsinfeature <- bins.mask[unique(S4Vectors::queryHits(ind))]
194
+                        sum.binsinfeature <- sum(as.numeric(width(binsinfeature)))
195
+        
196
+                        featuresinbins <- feature[unique(S4Vectors::subjectHits(ind))]
197
+                        sum.featuresinbins <- sum(as.numeric(width(featuresinbins)))
198
+        
199
+                        fold[ifeat,imark] <- sum.binsinfeature / combstate.length / feature.lengths[[ifeat]] * genome
200
+                    }
201
+                }
202
+              
203
+            }
204
+          
205
+            if (plot) {
206
+                df <- reshape2::melt(fold, value.name='foldEnrichment')
207
+                if (logscale) {
208
+                    df$foldEnrichment <- log(df$foldEnrichment)
209
+                }
210
+                foldsnoinf <- setdiff(df$foldEnrichment, c(Inf, -Inf))
211
+                maxfolds[[condition]] <- max(foldsnoinf, na.rm=TRUE)
212
+                minfolds[[condition]] <- min(foldsnoinf, na.rm=TRUE)
213
+                if (what == 'combinations') {
214
+                    ggplt <- ggplot(df) + geom_tile(aes_string(x='combination', y='annotation', fill='foldEnrichment'))
215
+                } else if (what == 'peaks') {
216
+                    ggplt <- ggplot(df) + geom_tile(aes_string(x='mark', y='annotation', fill='foldEnrichment'))
217
+                }
218
+                ggplt <- ggplt + theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5))
219
+                ggplts[[condition]] <- ggplt
220
+            } else {
221
+                folds[[condition]] <- fold
222
+            }
167 223
         }
168 224
     }
169 225
     if (plot) {
170
-        maxfolds <- unlist(maxfolds)
171
-        ggplts <- lapply(ggplts, function(ggplt) { ggplt + scale_fill_gradientn(colors=grDevices::colorRampPalette(c("blue","white","red"))(20), values=c(seq(0,1,length.out=10), seq(1,max(maxfolds, na.rm=TRUE),length.out=10)), rescaler=function(x,...) {x}, oob=identity, limits=c(0,max(maxfolds, na.rm=TRUE))) })
226
+        maxfold <- max(unlist(maxfolds), na.rm=TRUE)
227
+        minfold <- min(unlist(minfolds), na.rm=TRUE)
228
+        limits <- max(abs(maxfold), abs(minfold))
229
+        if (logscale) {
230
+            ggplts <- lapply(ggplts, function(ggplt) { ggplt + scale_fill_gradientn(name='log(observed/expected)', colors=grDevices::colorRampPalette(c("blue","white","red"))(20), values=c(seq(-limits,0,length.out=10), seq(0,limits,length.out=10)), rescaler=function(x,...) {x}, oob=identity, limits=c(-limits, limits)) })
231
+            ggplts <- lapply(ggplts, function(ggplt) { ggplt$data$foldEnrichment[ggplt$data$foldEnrichment == -Inf] <- -limits; ggplt })
232
+        } else {
233
+            ggplts <- lapply(ggplts, function(ggplt) { ggplt + scale_fill_gradientn(name='observed/expected', colors=grDevices::colorRampPalette(c("blue","white","red"))(20), values=c(seq(0,1,length.out=10), seq(1,maxfold,length.out=10)), rescaler=function(x,...) {x}, oob=identity, limits=c(0,maxfold)) })
234
+        }
172 235
     }
173 236
 
174
-    if (class(hmm) == class.multivariate.hmm) {
237
+    if (class(hmm) == class.multivariate.hmm | what == 'transitions') {
175 238
         if (plot) {
176 239
             return(ggplts[[1]])
177 240
         } else {
... ...
@@ -203,12 +266,12 @@ plotEnrichCountHeatmap <- function(hmm, annotation, bp.around.annotation=10000,
203 266
         conditions <- sub('combination.', '', grep('combination', names(mcols(bins)), value=TRUE))
204 267
         comb.levels <- levels(mcols(bins)[,paste0('combination.', conditions[1])])
205 268
         ## Create new column combination with all conditions combined
206
-        combinations <- list()
269
+        combs <- list()
207 270
         for (condition in conditions) {
208
-            combinations[[condition]] <- paste0(condition, ":", mcols(bins)[,paste0('combination.', condition)])
271
+            combs[[condition]] <- paste0(condition, ":", mcols(bins)[,paste0('combination.', condition)])
209 272
         }
210
-        combinations$sep <- ', '
211
-        bins$combination <- factor(do.call(paste, combinations))
273
+        combs$sep <- ', '
274
+        bins$combination <- factor(do.call(paste, combs))
212 275
     } else if (class(hmm) == class.multivariate.hmm) {
213 276
         comb.levels <- levels(bins$combination)
214 277
     }
... ...
@@ -254,18 +317,19 @@ plotEnrichCountHeatmap <- function(hmm, annotation, bp.around.annotation=10000,
254 317
     rownames(ext.index) <- 1:nrow(ext.index)
255 318
     stopTimedMessage(ptm)
256 319
     
257
-    ## Go through combinations and then tracks to get the read counts
320
+    ## Go through combinations and then IDs to get the read counts
258 321
     ptm <- startTimedMessage("Getting read counts")
259 322
     counts <- list()
260
-    for (combination in levels(bins$combination)) {
323
+    combinations <- names(sort(table(bins$combination[index]), decreasing = TRUE))
324
+    for (combination in combinations) {
261 325
         counts[[combination]] <- list()
262 326
         index.combination <- which(bins$combination[index]==combination)
263 327
         ext.index.combination <- ext.index[index.combination,]
264 328
         if (is.null(dim(ext.index.combination))) {
265 329
             ext.index.combination <- array(ext.index.combination, dim=c(1,dim(ext.index)[[2]]), dimnames=list(anno=rownames(ext.index)[index.combination], position=dimnames(ext.index)[[2]]))
266 330
         }
267
-        for (ntrack in colnames(bins$counts)) {
268
-            counts[[combination]][[ntrack]] <- array(bins$counts[ext.index.combination,ntrack], dim=dim(ext.index.combination), dimnames=dimnames(ext.index.combination))
331
+        for (nID in colnames(bins$counts)) {
332
+            counts[[combination]][[nID]] <- array(bins$counts[ext.index.combination,nID], dim=dim(ext.index.combination), dimnames=dimnames(ext.index.combination))
269 333
         }
270 334
     }
271 335
     stopTimedMessage(ptm)
... ...
@@ -287,16 +351,16 @@ plotEnrichCountHeatmap <- function(hmm, annotation, bp.around.annotation=10000,
287 351
     comb2keep <- names(num.comb)[num.comb/sum(num.comb) > 0.005]
288 352
     counts <- counts[comb2keep]
289 353
     df <- reshape2::melt(counts)
290
-    names(df) <- c('id','position','RPKM','track','combination')
354
+    names(df) <- c('id','position','RPKM','ID','combination')
291 355
     df$id <- factor(df$id, levels=rev(unique(df$id)))
292 356
     df$combination <- factor(df$combination, levels=unique(df$combination))
293
-    df$track <- factor(df$track, levels=colnames(bins$counts))
357
+    df$ID <- factor(df$ID, levels=hmm$info$ID)
294 358
     
295 359
     ## Plot as heatmap
296 360
     ggplt <- ggplot(df) + geom_tile(aes_string(x='position', y='id', color='combination'))
297 361
     ggplt <- ggplt + scale_color_manual(values = getDistinctColors(length(unique(df$combination))))
298 362
     ggplt <- ggplt + geom_tile(aes_string(x='position', y='id', fill='RPKM'), alpha=0.6)
299
-    ggplt <- ggplt + facet_wrap( ~ track, nrow=1) + custom_theme
363
+    ggplt <- ggplt + facet_wrap( ~ ID, nrow=1) + custom_theme
300 364
     ggplt <- ggplt + xlab('distance from annotation in [bp]') + ylab('')
301 365
     ggplt <- ggplt + scale_fill_continuous(trans='log1p', low='white', high='black')
302 366
     # Insert horizontal lines
... ...
@@ -315,7 +379,7 @@ plotEnrichCountHeatmap <- function(hmm, annotation, bp.around.annotation=10000,
315 379
 #' @inheritParams enrichmentAtAnnotation
316 380
 #' @importFrom reshape2 melt
317 381
 #' @export
318
-plotEnrichment <- function(hmm, annotation, bp.around.annotation=10000, region=c("start","inside","end"), num.intervals=20, what='combinations', combinations=NULL, marks=NULL, statistic='fold') {
382
+plotEnrichment <- function(hmm, annotation, bp.around.annotation=10000, region=c("start","inside","end"), num.intervals=20, what='combinations', combinations=NULL, marks=NULL, statistic='fold', logscale=TRUE) {
319 383
 
320 384
     ## Check user input
321 385
     if ((!what %in% c('combinations','peaks','counts')) | length(what) > 1) {
... ...
@@ -326,11 +390,17 @@ plotEnrichment <- function(hmm, annotation, bp.around.annotation=10000, region=c
326 390
     }
327 391
   
328 392
     ## Variables
329
-    hmm <- loadHmmsFromFiles(hmm, check.class=c(class.multivariate.hmm, class.combined.multivariate.hmm))[[1]]
330
-    hmm$bins$counts <- rpkm.matrix(hmm$bins$counts, binsize=mean(width(hmm$bins)))
393
+    hmm <- loadHmmsFromFiles(hmm, check.class=c(class.univariate.hmm, class.multivariate.hmm, class.combined.multivariate.hmm))[[1]]
331 394
     bins <- hmm$bins
332
-    if (class(hmm) == class.combined.multivariate.hmm) {
395
+    if (class(hmm) == class.univariate.hmm) {
396
+        bins$counts <- rpkm.vector(hmm$bins$counts, binsize=mean(width(hmm$bins)))
397
+        mcols(bins)['combination.'] <- bins$state
398
+        bins$state <- c('zero-inflation' = 0, 'unmodified' = 0, 'modified' = 1)[bins$state]
399
+        hmm$info <- data.frame(file=NA, mark=1, condition=1, replicate=1, pairedEndReads=NA, controlFiles=NA, ID='1-1-rep1')
400
+    } else if (class(hmm) == class.combined.multivariate.hmm) {
401
+        bins$counts <- rpkm.matrix(hmm$bins$counts, binsize=mean(width(hmm$bins)))
333 402
     } else if (class(hmm) == class.multivariate.hmm) {
403
+        bins$counts <- rpkm.matrix(hmm$bins$counts, binsize=mean(width(hmm$bins)))
334 404
         # Rename 'combination' to 'combination.' for coherence with combinedMultiHMM
335 405
         names(mcols(bins))[grep('combination', names(mcols(bins)))] <- 'combination.'
336 406
     }
... ...
@@ -348,10 +418,12 @@ plotEnrichment <- function(hmm, annotation, bp.around.annotation=10000, region=c
348 418
 
349 419
     if (what %in% c('peaks','counts')) {
350 420
         ### Get fold enrichment
351
-        enrich <- enrichmentAtAnnotation(hmm$bins, hmm$info, annotation, bp.around.annotation=bp.around.annotation, region=region, what=what, num.intervals=num.intervals, statistic=statistic)
421
+        enrich <- enrichmentAtAnnotation(bins, hmm$info, annotation, bp.around.annotation=bp.around.annotation, region=region, what=what, num.intervals=num.intervals, statistic=statistic)
352 422
     }
353 423
     ggplts <- list()
354 424
     maxfolds <- list()
425
+    minfolds <- list()
426
+    maxcols <- list()
355 427
     for (condition in conditions) {
356 428
         if (what == 'combinations') {
357 429
             ### Get fold enrichment
... ...
@@ -382,37 +454,59 @@ plotEnrichment <- function(hmm, annotation, bp.around.annotation=10000, region=c
382 454
         df$position[df$L1 == 'inside'] <- df$position[df$L1 == 'inside'] * bp.around.annotation
383 455
         if (what == 'combinations') {
384 456
             df <- df[df$combination %in% comb.levels,]
457
+            df$combination <- factor(df$combination, levels=comb.levels)
385 458
         } else if (what %in% c('peaks','counts')) {
386
-            df$mark <- sub("-.*", "", df$track)
459
+            df$mark <- sub("-.*", "", df$ID)
387 460
             df <- df[df$mark %in% mark.levels, ]
388
-            df$condition <- sapply(strsplit(as.character(df$track), '-'), '[[', 2)
461
+            df$condition <- sapply(strsplit(as.character(df$ID), '-'), '[[', 2)
389 462
             if (condition != "") {
390 463
                 df <- df[df$condition == condition, ]
391 464
             }
392 465
         }
466
+        if (logscale & !(statistic=='fraction')) {
467
+            df$value <- log(df$value)
468
+        }
393 469
 
394 470
         ### Plot
395 471
         if (what == 'combinations') {
396 472
             ggplt <- ggplot(df) + geom_line(aes_string(x='position', y='value', col='combination'), size=2)
397 473
             if (statistic == 'fold') {
398
-                ggplt <- ggplt + ylab('fold enrichment')
399
-                ggplt <- ggplt + geom_hline(yintercept=1, lty=2)
474
+                if (logscale) {
475
+                    ggplt <- ggplt + ylab('log(observed/expected)')
476
+                    ggplt <- ggplt + geom_hline(yintercept=0, lty=2)
477
+                } else {
478
+                    ggplt <- ggplt + ylab('observed/expected')
479
+                    ggplt <- ggplt + geom_hline(yintercept=1, lty=2)
480
+                }
400 481
             } else if (statistic == 'fraction') {
401 482
                 ggplt <- ggplt + ylab('fraction')
402 483
             }
403
-            ggplt <- ggplt + scale_color_manual(values = getDistinctColors(length(unique(df$combination))))
484
+            # ggplt <- ggplt + scale_color_manual(values = getDistinctColors(length(unique(df$combination))))
485
+            maxcols[[condition]] <- length(unique(df$combination))
404 486
         } else if (what == 'peaks') {
405 487
             ggplt <- ggplot(df) + geom_line(aes_string(x='position', y='value', col='mark'), size=2)
406 488
             if (statistic == 'fold') {
407
-                ggplt <- ggplt + ylab('fold enrichment')
408
-                ggplt <- ggplt + geom_hline(yintercept=1, lty=2)
489
+                if (logscale) {
490
+                    ggplt <- ggplt + ylab('log(observed/expected)')
491
+                    ggplt <- ggplt + geom_hline(yintercept=0, lty=2)
492
+                } else {
493
+                    ggplt <- ggplt + ylab('observed/expected')
494
+                    ggplt <- ggplt + geom_hline(yintercept=1, lty=2)
495
+                }
409 496
             } else if (statistic == 'fraction') {
410 497
                 ggplt <- ggplt + ylab('fraction')
411 498
             }
412
-            ggplt <- ggplt + scale_color_manual(values = getDistinctColors(length(unique(df$mark))))
499
+            # ggplt <- ggplt + scale_color_manual(values = getDistinctColors(length(unique(df$mark))))
500
+            maxcols[[condition]] <- length(unique(df$mark))
413 501
         } else if (what == 'counts') {
414
-            ggplt <- ggplot(df) + geom_line(aes_string(x='position', y='value', col='track'), size=2) + ylab('RPKM')
415
-            ggplt <- ggplt + scale_color_manual(values = getDistinctColors(length(unique(df$track))))
502
+            ggplt <- ggplot(df) + geom_line(aes_string(x='position', y='value', col='ID'), size=2)
503
+            if (logscale) {
504
+                ggplt <- ggplt + ylab('log(RPKM)')
505
+            } else {
506
+                ggplt <- ggplt + ylab('RPKM')
507
+            }
508
+            # ggplt <- ggplt + scale_color_manual(values = getDistinctColors(length(unique(df$ID))))
509
+            maxcols[[condition]] <- length(unique(df$ID))
416 510
         }
417 511
         ggplt <- ggplt + theme_bw() + xlab('distance from annotation in [bp]')
418 512
         if (length(region)>=2 & 'inside' %in% region) {
... ...
@@ -420,16 +514,23 @@ plotEnrichment <- function(hmm, annotation, bp.around.annotation=10000, region=c
420 514
             labels <- c(-bp.around.annotation, -bp.around.annotation/2, '0%', '50%', '100%', bp.around.annotation/2, bp.around.annotation)
421 515
             ggplt <- ggplt + scale_x_continuous(breaks=breaks, labels=labels)
422 516
         }
423
-        maxfolds[[condition]] <- max(df$value, na.rm=TRUE)
517
+        foldsnoinf <- setdiff(df$value, c(Inf, -Inf))
518
+        maxfolds[[condition]] <- max(foldsnoinf, na.rm=TRUE)
519
+        minfolds[[condition]] <- min(foldsnoinf, na.rm=TRUE)
424 520
         ggplts[[condition]] <- ggplt
425 521
     }
426
-    maxfolds <- unlist(maxfolds)
522
+    maxfold <- max(unlist(maxfolds), na.rm=TRUE)
523
+    minfold <- min(unlist(minfolds), na.rm=TRUE)
524
+    maxcol <- max(unlist(maxcols), na.rm=TRUE)
427 525
     if (statistic == 'fraction' & what %in% c('combinations','peaks')) {
428 526
         ggplts <- lapply(ggplts, function(ggplt) { ggplt + scale_y_continuous(limits=c(0,1)) })
429 527
     } else {
430
-        ggplts <- lapply(ggplts, function(ggplt) { ggplt + scale_y_continuous(limits=c(0,max(maxfolds, na.rm=TRUE)*1.1)) })
528
+        ggplts <- lapply(ggplts, function(ggplt) { ggplt + scale_y_continuous(limits=c(minfold*(1-sign(minfold)*0.1),maxfold*(1+sign(maxfold)*0.1))) })
431 529
     }
432
-    if (class(hmm) == class.multivariate.hmm) {
530
+    ggplts <- lapply(ggplts, function(ggplt) { ggplt + scale_color_manual(values=getDistinctColors(maxcol)) }) # Add color here like this because of weird bug
531
+    if (class(hmm) == class.univariate.hmm) {
532
+        return(ggplts[[1]])
533
+    } else if (class(hmm) == class.multivariate.hmm) {
433 534
         return(ggplts[[1]])
434 535
     } else if (class(hmm) == class.combined.multivariate.hmm) {
435 536
         return(ggplts)
... ...
@@ -448,12 +549,12 @@ plotEnrichment <- function(hmm, annotation, bp.around.annotation=10000, region=c
448 549
 #' @param annotation A \code{\link{GRanges}} object with the annotation of interest.
449 550
 #' @param bp.around.annotation An integer specifying the number of basepairs up- and downstream of the annotation for which the enrichment will be calculated.
450 551
 #' @param region A combination of \code{c('start','inside','end')} specifying the region of the annotation for which the enrichment will be calculated. Select \code{'start'} if you have a point-sized annotation like transcription start sites. Select \code{c('start','inside','end')} if you have long annotations like genes.
451
-#' @param what One of \code{c('combinations','peaks','counts')} specifying which statistic to calculate.
552
+#' @param what One of \code{c('combinations','peaks','counts')} specifying on which feature the statistic is calculated.
452 553
 #' @param num.intervals Number of intervals for enrichment 'inside' of annotation.
453 554
 #' @param statistic The statistic to calculate. Either 'fold' for fold enrichments or 'fraction' for fraction of bins falling into the annotation.
454 555
 #' @return A \code{list()} containing \code{data.frame()}s for enrichment of combinatorial states and binary states at the start, end and inside of the annotation.
455 556
 #' @importFrom S4Vectors as.factor subjectHits queryHits
456
-enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation=10000, region=c('start','inside','end'), what=c('combinations','peaks','counts'), num.intervals=21, statistic='fold') {
557
+enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation=10000, region=c('start','inside','end'), what='combinations', num.intervals=21, statistic='fold') {
457 558
 
458 559
     ## Check user input
459 560
     if ((!what %in% c('combinations','peaks','counts')) | length(what) > 1) {
... ...
@@ -475,7 +576,11 @@ enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation=
475 576
     if ('peaks' %in% what) {
476 577
         binstates <- dec2bin(bins$state, colnames=info$ID)
477 578
         # Remove replicates
478
-        binstates <- binstates[ ,info.dedup$ID]
579
+        if (ncol(binstates) > 1) {
580
+            binstates <- binstates[ ,info.dedup$ID]
581
+        } else {
582
+            binstates <- matrix(binstates[ ,info.dedup$ID], ncol=1)
583
+        }
479 584
         colsums.binstates <- colSums(binstates)
480 585
     }
481 586
     if ('counts' %in% what) {
... ...
@@ -490,9 +595,9 @@ enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation=
490 595
         widths.annotation <- width(annotation) - 1
491 596
         annotation.1bp <- resize(annotation, 1, fix='start')
492 597
         # Initialize arrays
493
-        if ('peaks' %in% what) binstates.inside <- array(dim=c(num.intervals+1, length(info.dedup$ID)), dimnames=list(interval=intervals, track=info.dedup$ID))
598
+        if ('peaks' %in% what) binstates.inside <- array(dim=c(num.intervals+1, length(info.dedup$ID)), dimnames=list(interval=intervals, ID=info.dedup$ID))
494 599
         if ('combinations' %in% what) combinations.inside <- array(dim=c(num.intervals+1, length(levels(bins$combination))), dimnames=list(interval=intervals, combination=levels(bins$combination)))
495
-        if ('counts' %in% what) counts.inside <- array(dim=c(num.intervals+1, length(info$ID)), dimnames=list(interval=intervals, track=info$ID))
600
+        if ('counts' %in% what) counts.inside <- array(dim=c(num.intervals+1, length(info$ID)), dimnames=list(interval=intervals, ID=info$ID))
496 601
 
497 602
         for (interval in intervals) {
498 603
             shift <- widths.annotation * interval * c(1,-1,1)[as.integer(strand(annotation))]
... ...
@@ -506,10 +611,15 @@ enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation=
506 611
             index <- c(index.inside.plus, index.inside.minus)
507 612
             index <- index[index>0 & index<=length(bins)] # index could cross chromosome boundaries, but we risk it
508 613
             if ('peaks' %in% what) {
614
+                if (ncol(binstates) > 1) {
615
+                    binstates.index <- binstates[index,]
616
+                } else {
617
+                    binstates.index <- matrix(binstates[index,], ncol=1)
618
+                }
509 619
                 if (statistic == 'fraction') {
510
-                    binstates.inside[as.character(interval),] <- colSums(binstates[index,]) / length(index) # or colMeans
620
+                    binstates.inside[as.character(interval),] <- colSums(binstates.index) / length(index) # or colMeans
511 621
                 } else if (statistic == 'fold') {
512
-                    binstates.inside[as.character(interval),] <- colSums(binstates[index,]) / length(index) / colsums.binstates * length(bins)
622
+                    binstates.inside[as.character(interval),] <- colSums(binstates.index) / length(index) / colsums.binstates * length(bins)
513 623
                 }
514 624
             }
515 625
             if ('combinations' %in% what) {
... ...
@@ -546,17 +656,22 @@ enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation=
546 656
         index.start.plus <- index.start.plus[!is.na(index.start.plus)]
547 657
         index.start.minus <- index.start.minus[!is.na(index.start.minus)]
548 658
         # Occurrences at every bin position relative to feature
549
-        if ('peaks' %in% what) binstates.start <- array(dim=c(length(-lag:lag), length(info.dedup$ID)), dimnames=list(lag=-lag:lag, track=info.dedup$ID))
659
+        if ('peaks' %in% what) binstates.start <- array(dim=c(length(-lag:lag), length(info.dedup$ID)), dimnames=list(lag=-lag:lag, ID=info.dedup$ID))
550 660
         if ('combinations' %in% what) combinations.start <- array(dim=c(length(-lag:lag), length(levels(bins$combination))), dimnames=list(lag=-lag:lag, combination=levels(bins$combination)))
551
-        if ('counts' %in% what) counts.start <- array(dim=c(length(-lag:lag), length(info$ID)), dimnames=list(lag=-lag:lag, track=info$ID))
661
+        if ('counts' %in% what) counts.start <- array(dim=c(length(-lag:lag), length(info$ID)), dimnames=list(lag=-lag:lag, ID=info$ID))
552 662
         for (ilag in -lag:lag) {
553 663
             index <- c(index.start.plus+ilag, index.start.minus-ilag)
554 664
             index <- index[index>0 & index<=length(bins)]
555 665
             if ('peaks' %in% what) {
666
+                if (ncol(binstates) > 1) {
667
+                    binstates.index <- binstates[index,]
668
+                } else {
669
+                    binstates.index <- matrix(binstates[index,], ncol=1)
670
+                }
556 671
                 if (statistic == 'fraction') {
557
-                    binstates.start[as.character(ilag),] <- colSums(binstates[index,]) / length(index)
672
+                    binstates.start[as.character(ilag),] <- colSums(binstates.index) / length(index)
558 673
                 } else if (statistic == 'fold') {
559
-                    binstates.start[as.character(ilag),] <- colSums(binstates[index,]) / length(index) / colsums.binstates * length(bins)
674
+                    binstates.start[as.character(ilag),] <- colSums(binstates.index) / length(index) / colsums.binstates * length(bins)
560 675
                 }
561 676
             }
562 677
             if ('combinations' %in% what) {
... ...
@@ -596,17 +711,22 @@ enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation=
596 711
         index.end.plus <- index.end.plus[!is.na(index.end.plus)]
597 712
         index.end.minus <- index.end.minus[!is.na(index.end.minus)]
598 713
         # Occurrences at every bin position relative to feature
599
-        if ('peaks' %in% what) binstates.end <- array(dim=c(length(-lag:lag), length(info.dedup$ID)), dimnames=list(lag=-lag:lag, track=info.dedup$ID))
714
+        if ('peaks' %in% what) binstates.end <- array(dim=c(length(-lag:lag), length(info.dedup$ID)), dimnames=list(lag=-lag:lag, ID=info.dedup$ID))
600 715
         if ('combinations' %in% what) combinations.end <- array(dim=c(length(-lag:lag), length(levels(bins$combination))), dimnames=list(lag=-lag:lag, combination=levels(bins$combination)))
601
-        if ('counts' %in% what) counts.end <- array(dim=c(length(-lag:lag), length(info$ID)), dimnames=list(lag=-lag:lag, track=info$ID))
716
+        if ('counts' %in% what) counts.end <- array(dim=c(length(-lag:lag), length(info$ID)), dimnames=list(lag=-lag:lag, ID=info$ID))
602 717
         for (ilag in -lag:lag) {
603 718
             index <- c(index.end.plus+ilag, index.end.minus-ilag)
604 719
             index <- index[index>0 & index<=length(bins)]
605 720
             if ('peaks' %in% what) {
721
+                if (ncol(binstates) > 1) {
722
+                    binstates.index <- binstates[index,]
723
+                } else {
724
+                    binstates.index <- matrix(binstates[index,], ncol=1)
725
+                }
606 726
                 if (statistic == 'fraction') {
607
-                    binstates.end[as.character(ilag),] <- colSums(binstates[index,]) / length(index)
727
+                    binstates.end[as.character(ilag),] <- colSums(binstates.index) / length(index)
608 728
                 } else if (statistic == 'fold') {
609
-                    binstates.end[as.character(ilag),] <- colSums(binstates[index,]) / length(index) / colsums.binstates * length(bins)
729
+                    binstates.end[as.character(ilag),] <- colSums(binstates.index) / length(index) / colsums.binstates * length(bins)
610 730
                 }
611 731
             }
612 732
             if ('combinations' %in% what) {
... ...
@@ -874,9 +874,9 @@ exportGRangesAsBedFile <- function(gr, trackname, filename, namecol='combination
874 874
 
875 875
     # Write first line to file
876 876
     if (append) {
877
-        message('appending to file ',filename)
877
+        ptm <- startTimedMessage('Appending to file ',filename, ' ...')
878 878
     } else {
879
-        message('Writing to file ',filename)
879
+        ptm <- startTimedMessage('Writing to file ',filename, ' ...')
880 880
         cat("", file=filename.gz)
881 881
     }
882 882
     
... ...
@@ -888,6 +888,10 @@ exportGRangesAsBedFile <- function(gr, trackname, filename, namecol='combination
888 888
         gr$score <- 0
889 889
     } else {
890 890
         gr$score <- mcols(gr)[,scorecol]
891
+				# Check if scorecolumn follows the UCSC convention
892
+				if (min(gr$score) < 0 | max(gr$score) > 1000 | all(gr$score<=1.2) | !is.integer(gr$score)) {
893
+						warning("Column '", scorecol, "' should contain integer values between 0 and 1000 for compatibility with the UCSC convention.")
894
+				}
891 895
     }
892 896
     regions <- as.data.frame(gr)[c('chromosome','start','end','score','strand')]
893 897
     regions$strand <- sub("\\*", ".", regions$strand)
... ...
@@ -897,13 +901,15 @@ exportGRangesAsBedFile <- function(gr, trackname, filename, namecol='combination
897 901
         regions$name <- mcols(gr)[,namecol]
898 902
     }
899 903
     regions <- regions[c('chromosome','start','end','name','score','strand')]
900
-    # Make score integer
901
-    if (min(regions$score) < 0 | max(regions$score) > 1000 | all(regions$score<=1.2) | !is.integer(regions$score)) {
902
-        warning("Column '", scorecol, "' should contain integer values between 0 and 1000 for compatibility with the UCSC convention.")
903
-    }
904
-    df <- cbind(regions, thickStart=regions$start, thickEnd=regions$end)
904
+    df <- regions
905
+    # Convert from 1-based closed to 0-based half open
906
+    df$start <- df$start - 1
907
+    
905 908
     
906 909
     if (!is.null(colorcol)) {
910
+        df <- cbind(regions, thickStart=regions$start, thickEnd=regions$end)
911
+        # Convert from 1-based closed to 0-based half open
912
+        df$thickStart <- df$thickStart - 1
907 913
         # Generate the colors for each element in 'namecol'
908 914
         if (colorcol %in% names(mcols(gr))) {
909 915
             if (is.null(colors)) {
... ...
@@ -921,9 +927,6 @@ exportGRangesAsBedFile <- function(gr, trackname, filename, namecol='combination
921 927
         }
922 928
     }
923 929
     
924
-    # Convert from 1-based closed to 0-based half open
925
-    df$start <- df$start - 1
926
-    df$thickStart <- df$thickStart - 1
927 930
     if (nrow(df) == 0) {
928 931
         warning('No regions in input')
929 932
     } else {
... ...
@@ -931,5 +934,6 @@ exportGRangesAsBedFile <- function(gr, trackname, filename, namecol='combination
931 934
     }
932 935
 
933 936
     close(filename.gz)
937
+    stopTimedMessage(ptm)
934 938
 
935 939
 }
... ...
@@ -19,11 +19,15 @@ genomicFrequencies <- function(multi.hmm, combinations=NULL, per.mark=FALSE) {
19 19
 
20 20
     multi.hmm <- loadHmmsFromFiles(multi.hmm, check.class=c(class.multivariate.hmm, class.combined.multivariate.hmm))[[1]]
21 21
     bins <- multi.hmm$bins
22
+    segs <- multi.hmm$segments
23
+    peaks <- multi.hmm$peaks
22 24
     
23 25
     if (per.mark) {
24
-        binstates <- dec2bin(bins$state, colnames=multi.hmm$info$ID)
25
-        t <- colSums(binstates) / nrow(binstates)
26
-        return(t)
26
+        # binstates <- dec2bin(bins$state, colnames=multi.hmm$info$ID)
27
+        # t <- colSums(binstates) / nrow(binstates)
28
+        t <- sapply(peaks, function(peak) { sum(as.numeric(width(peak))) }) / sum(as.numeric(width(bins)))
29
+        s <- sapply(peaks, length)
30
+        return(list(frequency=t, domains=s))
27 31
     }
28 32
       
29 33
     if (class(multi.hmm)==class.multivariate.hmm) {
... ...
@@ -35,18 +39,22 @@ genomicFrequencies <- function(multi.hmm, combinations=NULL, per.mark=FALSE) {
35 39
         }
36 40
         t <- table(bins$combination) / length(bins)
37 41
         t <- t[names(t) %in% comb.levels]
38
-        return(t)
42
+        s <- table(segs$combination)
43
+        s <- s[names(s) %in% comb.levels]
44
+        return(list(frequency=t, domains=s))
39 45
       
40 46
     } else if (class(multi.hmm)==class.combined.multivariate.hmm) {
41 47
       
42 48
         if (is.null(combinations)) {
43
-            comb.levels <- unique(as.vector(sapply(mcols(bins)[grepl('combination', names(mcols(bins)))], levels)))
49
+            comb.levels <- unique(as.vector(sapply(getCombinations(bins), levels)))
44 50
         } else {
45 51
             comb.levels <- combinations
46 52
         }
47 53
         t <- sapply(mcols(bins)[grepl('combination', names(mcols(bins)))], function(x) { table(x) / length(bins) })
48 54
         t <- t[rownames(t) %in% comb.levels,]
49
-        return(t)
55
+        s <- sapply(mcols(segs)[grepl('combination', names(mcols(segs)))], table)
56
+        s <- s[rownames(s) %in% comb.levels,]
57
+        return(list(frequency=t, domains=s))
50 58
       
51 59
     }
52 60
 }
... ...
@@ -140,6 +148,13 @@ transitionFrequencies <- function(multi.hmms=NULL, combined.hmm=NULL, zero.state
140 148
     # Cumulative frequencies
141 149
     freqtrans$cumulative.frequency <- cumsum(freqtrans$frequency)
142 150
     stopTimedMessage(ptm)
151
+    
152
+    ### Number of domains ###
153
+    ptm <- startTimedMessage("Number of domains ...")
154
+    rle.gentrans <- rle(as.character(gentrans))
155
+    ndomains <- table(rle.gentrans$values)
156
+    freqtrans$domains <- ndomains[freqtrans$transition]
157
+    stopTimedMessage(ptm)
143 158
 
144 159
     ### Assigning groups for frequency table ###
145 160
     ptm <- startTimedMessage("Assigning groups ...")
... ...
@@ -157,6 +172,9 @@ transitionFrequencies <- function(multi.hmms=NULL, combined.hmm=NULL, zero.state
157 172
     ## Remove unneeded column
158 173
     freqtrans$transition <- NULL
159 174
     
175
+    ## Reorder columns
176
+    freqtrans <- freqtrans[, c(grep('combination', names(freqtrans), value=TRUE), 'domains', 'frequency', 'cumulative.frequency', 'group')]
177
+    
160 178
     ## Return value ##
161 179
     return(list(table=freqtrans, per.bin=gentrans))
162 180
 
... ...
@@ -6,24 +6,40 @@
6 6
 #' 
7 7
 #' The function computes the euclidian distance between all \code{\link{colors}} and iteratively selects those that have the furthest closes distance to the set of already selected colors.
8 8
 #' 
9
-#' @param n Number of colors to select.
9
+#' @param n Number of colors to select. If \code{n} is a character vector, \code{length(n)} will be taken as the number of colors and the colors will be named by \code{n}.
10 10
 #' @param start.color Color to start the selection process from.
11 11
 #' @param exclude.colors Character vector with colors that should not be used.
12 12
 #' @param exclude.rgb.above Exclude colors where all RGB values are above. This is useful to exclude whitish colors.
13
+#' @param exclude.brightness.above Exclude colors where the 'brightness' value in HSV space is above. This is useful to obtain a matt palette.
13 14
 #' @return A character vector with colors.
14 15
 #' @author Aaron Taudt
15
-#' @importFrom grDevices col2rgb
16
-#' @importFrom stats dist
17 16
 #' @export
17
+#' @importFrom grDevices col2rgb rgb2hsv
18
+#' @importFrom stats dist
18 19
 #' @examples
19 20
 #'cols <- getDistinctColors(5)
20 21
 #'pie(rep(1,5), labels=cols, col=cols)
21 22
 #'
22
-getDistinctColors <- function(n, start.color='blue4', exclude.colors=c('white','black','gray','grey'), exclude.rgb.above=210) {
23
+getDistinctColors <- function(n, start.color='blue4', exclude.colors=c('white','black','gray','grey','\\<yellow\\>', 'yellow1', 'lemonchiffon'), exclude.brightness.above=1, exclude.rgb.above=210) {
23 24
     
25
+    n.names <- NULL
26
+    if (is.character(n)) {
27
+        n.names <- n
28
+        n <- length(n)
29
+    } else if (is.factor(n)) {
30
+        n.names <- as.character(n)
31
+        n <- length(n)
32
+    }
24 33
     cols <- grDevices::colors()
34
+    
25 35
     # Exclude unwanted colors
26 36
     cols <- grep(paste(exclude.colors, collapse='|'), cols, invert=TRUE, value=TRUE)
37
+    # Exclude too bright colors
38
+    colsrgb <- grDevices::col2rgb(cols)
39
+    colshsv <- t(mapply(grDevices::rgb2hsv, r=colsrgb[1,], g=colsrgb[2,], b=colsrgb[3,]))
40
+    rownames(colshsv) <- cols
41
+    colshsv <- colshsv[colshsv[,3] <= exclude.brightness.above,]
42
+    cols <- rownames(colshsv)
27 43
     # Get RGB values
28 44
     rgbs <- t(grDevices::col2rgb(cols))
29 45
     rownames(rgbs) <- cols
... ...
@@ -52,6 +68,7 @@ getDistinctColors <- function(n, start.color='blue4', exclude.colors=c('white','
52 68
     if (length(colors) > length(selected.cols)) {
53 69
         warning("Recycling colors because we only have ", length(selected.cols), " distinct colors to choose from.")
54 70
     }
71
+    names(colors) <- n.names
55 72
     return(colors)
56 73
     
57 74
 }
... ...
@@ -15,9 +15,15 @@
15 15
 #'
16 16
 #'@examples
17 17
 #'## Make fixed-width bins of size 500kb and 1Mb
18
-#'bins <- fixedWidthBins(assembly='mm10', chromosome.format='NCBI', binsizes=c(5e5,1e6))
18
+#'data(rn4_chrominfo)
19
+#'chrom.lengths <- rn4_chrominfo$length
20
+#'names(chrom.lengths) <- rn4_chrominfo$chromosome
21
+#'bins <- fixedWidthBins(chrom.lengths=chrom.lengths, binsizes=c(5e5,1e6))
19 22
 #'bins
20 23
 #'
24
+#'## Make bins using NCBI server (requires internet connection)
25
+#'# bins <- fixedWidthBins(assembly='mm10', chromosome.format='NCBI', binsizes=c(5e5,1e6))
26
+#'
21 27
 fixedWidthBins <- function(bamfile=NULL, assembly=NULL, chrom.lengths=NULL, chromosome.format, binsizes=1e6, chromosomes=NULL) {
22 28
 
23 29
     ### Check user input ###
... ...
@@ -78,11 +84,16 @@ fixedWidthBins <- function(bamfile=NULL, assembly=NULL, chrom.lengths=NULL, chro
78 84
       
79 85
         ptm <- startTimedMessage("Making fixed-width bins for bin size ", binsize, " ...")
80 86
         chrom.lengths.floor <- floor(chrom.lengths / binsize) * binsize
81
-        bins <- unlist(GenomicRanges::tileGenome(chrom.lengths.floor[chroms2use], tilewidth=binsize), use.names=FALSE)
87
+        clfloor2use <- chrom.lengths.floor[chroms2use]
88
+        clfloor2use <- clfloor2use[clfloor2use >= binsize]
89
+        if (length(clfloor2use) == 0) {
90
+            stop("All selected chromosomes are smaller than binsize ", binsize)
91
+        }
92
+        bins <- unlist(GenomicRanges::tileGenome(clfloor2use, tilewidth=binsize), use.names=FALSE)
82 93
         if (any(width(bins)!=binsize)) {
83 94
             stop("tileGenome failed")
84 95
         }
85
-        seqlengths(bins) <- chrom.lengths[chroms2use]
96
+        seqlengths(bins) <- chrom.lengths[seqlevels(bins)]
86 97
         bins.list[[as.character(binsize)]] <- bins
87 98
 
88 99
         skipped.chroms <- setdiff(seqlevels(bins), as.character(unique(seqnames(bins))))
... ...
@@ -54,9 +54,9 @@ plotHistograms <- function(model, ...) {
54 54
 
55 55
     model <- suppressMessages( loadHmmsFromFiles(model, check.class=class.multivariate.hmm)[[1]] )
56 56
     ## Make fake uni.hmm and plot
57
-    binmapping <- dec2bin(names(model$mapping))
57
+    binmapping <- dec2bin(levels(model$bins$state), colnames=model$info$ID)
58 58
     ggplts <- list()
59
-    for (i1 in 1:length(model$info$ID)) {
59
+    for (i1 in 1:ncol(model$bins$counts)) {
60 60
         uni.hmm <- list()
61 61
         uni.hmm$info <- model$info[i1,]
62 62
         uni.hmm$bins <- model$bins
... ...
@@ -371,3 +371,71 @@ plotBoxplot <- function(model) {
371 371
 }
372 372
 
373 373
 
374
+#' Plot a genome browser view
375
+#' 
376
+#' Plot a simple genome browser view. This is useful for scripted genome browser snapshots.
377
+#' 
378
+#' @param counts A \code{\link[GenomicRanges]{GRanges}} object with meta-data column 'counts'.
379
+#' @param peaklist A named list() of \code{\link[GenomicRanges]{GRanges}} objects containing peak coordinates.
380
+#' @param chr,start,end Chromosome, start and end coordinates for the plot.
381
+#' @param countcol A character giving the color for the counts.
382
+#' @param peakcols A character vector with colors for the peaks in \code{peaklist}.
383
+#' @param style One of \code{c('peaks', 'density')}.
384
+#' @param peakTrackHeight Relative height of the tracks given in \code{peaklist} compared to the \code{counts}.
385
+#' @return A \code{\link[ggplot2:ggplot]{ggplot}} object.
386
+plotGenomeBrowser <- function(counts, peaklist=NULL, chr, start, end, countcol='black', peakcols=NULL, style='peaks', peakTrackHeight=5) {
387
+  
388
+    ## Select ranges to plot
389
+    ranges2plot <- reduce(counts[counts@seqnames == chr & start(counts) >= start & start(counts) <= end])
390
+    
391
+    ## Counts
392
+    counts <- subsetByOverlaps(counts, ranges2plot)
393
+    
394
+    if (style == 'peaks') {
395
+        # df.start <- data.frame(x=start(counts), counts=counts$counts)
396
+        # df.end <- data.frame(x=end(counts), counts=counts$counts)
397
+        # df <- rbind(df.start, df.end)
398
+        # df <- df[order(df$x),]
399
+        df <- data.frame(x=(start(counts)+end(counts))/2, counts=counts$counts) # plot triangles centered at middle of the bin
400
+        ggplt <- ggplot(df) + geom_area(aes_string(x='x', y='counts')) + theme(panel.grid = element_blank(), panel.background = element_blank(), axis.text.x = element_blank(), axis.title = element_blank(), axis.ticks.x = element_blank(), axis.line = element_blank())
401
+        maxcounts <- max(counts$counts)
402
+        ggplt <- ggplt + scale_y_continuous(breaks=c(0, maxcounts))
403
+    } else if (style == 'density') {
404
+        df <- data.frame(xmin=start(counts), xmax=end(counts), counts=counts$counts)
405
+        
406
+        # # Rolling mean
407
+        # n <- 10
408
+        # cx <- cumsum(df$counts)
409
+        # rsum <- (cx[n:length(df$counts)] - c(0, cx[1:(length(df$counts) - n)])) / n
410
+        # df$counts[(n%/%2 + 1):(length(df$counts)-(n-1)%/%2)] <- rsum
411
+        
412
+        # # Expand high peaks
413
+        # fact <- 1e-5
414
+        # df$xmin <- df$xmin - (end-start)*df$counts * fact
415
+        # df$xmax <- df$xmax + (end-start)*df$counts * fact
416
+        
417
+        ggplt <- ggplot(df) + geom_rect(aes_string(xmin='xmin', xmax='xmax', ymin=0, ymax=4, alpha='counts')) + theme(panel.grid = element_blank(), panel.background = element_blank(), axis.text = element_blank(), axis.title = element_blank(), axis.ticks = element_blank(), axis.line = element_blank())
418
+    } else {
419
+        stop("Unknown value '", style, "' for parameter 'style'. Must be one of c('peaks', 'density').")
420
+    }
421
+    
422
+    ## Peaks
423
+    if (!is.null(peaklist)) {
424
+        if (is.null(peakcols)) {
425
+            peakcols <- getDistinctColors(length(peaklist))
426
+        }
427
+        for (i1 in 1:length(peaklist)) {
428
+            p <- peakTrackHeight
429
+            peaks <- subsetByOverlaps(peaklist[[i1]], ranges2plot)
430
+            if (length(peaks) > 0) {
431
+                df <- data.frame(start=start(peaks), end=end(peaks), ymin=-p*i1, ymax=-p*i1+0.9*p)
432
+                ggplt <- ggplt + geom_rect(data=df, mapping=aes_string(xmin='start', xmax='end', ymin='ymin', ymax='ymax'), col=peakcols[i1], fill=peakcols[i1])
433
+            }
434
+            trackname <- names(peaklist)[i1]
435
+            df <- data.frame(x=start(counts)[1], y=-p*i1+0.5*p, label=trackname)
436
+            ggplt <- ggplt + geom_text(data=df, mapping=aes_string(x='x', y='y', label='label'), vjust=0.5, hjust=0.5, col=peakcols[i1])
437
+        }
438
+    }
439
+    
440
+    return(ggplt)
441
+}
374 442
\ No newline at end of file
375 443
new file mode 100644
... ...
@@ -0,0 +1,49 @@
1
+#' Print uniHMM object
2
+#' 
3
+#' @param x An \code{\link{uniHMM}} object.
4
+#' @param ... Ignored.
5
+#' @return An invisible \code{NULL}.
6
+#' @export
7
+print.uniHMM <- function(x, ...) {
8
+    
9
+    message("$info")
10
+    print(x$info)
11
+    message("\n$peaks")
12
+    print(x$peaks)
13
+    message("\nUse the list operator $ to access all elements of this object.")
14
+  
15
+}
16
+
17
+
18
+#' Print multiHMM object
19
+#' 
20
+#' @param x An \code{\link{multiHMM}} object.
21
+#' @param ... Ignored.
22
+#' @return An invisible \code{NULL}.
23
+#' @export
24
+print.multiHMM <- function(x, ...) {
25
+    
26
+    message("$info")
27
+    print(x$info)
28
+    message("\n$segments")
29
+    print(x$segments)
30
+    message("\nUse the list operator $ to access all elements of this object.")
31
+  
32
+}
33
+
34
+
35
+#' Print combinedMultiHMM object
36
+#' 
37
+#' @param x An \code{\link{combinedMultiHMM}} object.
38
+#' @param ... Ignored.
39
+#' @return An invisible \code{NULL}.
40
+#' @export
41
+print.combinedMultiHMM <- function(x, ...) {
42
+    
43
+    message("$info")
44
+    print(x$info)
45
+    message("\n$segments")
46
+    print(x$segments)
47
+    message("\nUse the list operator $ to access all elements of this object.")
48
+  
49
+}
... ...
@@ -35,10 +35,12 @@ readCustomBedFile <- function(bedfile, col.names=c('chromosome','start','end','n
35 35
     } else {
36 36
         data$strand <- '*'
37 37
     }
38
-    # Adjust chromosome format
39
-    data$chromosome <- sub('^chr', '', data$chromosome)
40
-    if (chromosome.format=='UCSC') {
41
-        data$chromosome <- paste0('chr', data$chromosome)
38
+    if (nrow(data) > 0) {
39
+        # Adjust chromosome format
40
+        data$chromosome <- sub('^chr', '', data$chromosome)
41
+        if (chromosome.format=='UCSC') {
42
+            data$chromosome <- paste0('chr', data$chromosome)
43
+        }
42 44
     }
43 45
     # Convert to GRanges object
44 46
     gr <- methods::as(data, 'GRanges')
45 47
new file mode 100644
... ...
@@ -0,0 +1,136 @@
1
+#' Remove condition from model
2
+#' 
3
+#' Remove a condition from a \code{\link{combinedMultiHMM}} object.
4
+#' 
5
+#' @param model A \code{\link{combinedMultiHMM}} object or file which contains such an object.
6
+#' @param conditions A character vector with the condition(s) to be removed.
7
+#' @return The input \code{\link{combinedMultiHMM}} object with specified conditions removed.
8
+#' @export
9
+#' @examples 
10
+#'## Get an example HMM
11
+#'file <- system.file("data","combined_mode-differential.RData",
12
+#'                     package="chromstaR")
13
+#'model <- get(load(file))
14
+#'
15
+#'## Print available conditions
16
+#'print(unique(model$info$condition))
17
+#'
18
+#'## Remove condition SHR
19
+#'new.model <- removeCondition(model, conditions='SHR')
20
+#'
21
+removeCondition <- function(model, conditions) {
22
+  
23
+    model <- loadHmmsFromFiles(model, check.class=class.combined.multivariate.hmm)[[1]]
24
+    
25
+    ### Check input ###
26
+    cond.setdiff <- setdiff(conditions, model$info$condition)
27
+    if (length(cond.setdiff) == length(conditions)) {
28
+        warning("Conditions ", paste0(cond.setdiff, collapse=', '), " could not be found. Doing nothing.")
29
+        return(model)
30
+    }
31
+    if (length(cond.setdiff) > 0) {
32
+        warning("Conditions ", paste0(cond.setdiff, collapse=', '), " could not be found.")
33
+    }
34
+    conditions <- setdiff(conditions, cond.setdiff)
35
+    
36
+    ### Info ###
37
+    info <- model$info
38
+    mask <- !info$condition %in% conditions
39
+    info <- info[mask,]
40
+    model$info <- info
41
+    
42
+    ### Bins ###
43
+    # State
44
+    if (!is.null(model$bins$state)) {
45
+        ptm <- startTimedMessage("Recalculating states ...")
46
+        states <- model$bins$state
47
+        binstates <- dec2bin(states, colnames=model$info$ID)
48
+        removeconds <- paste0(paste0('-', conditions, '-'), collapse='|')
49
+        binstates <- binstates[, grep(removeconds, colnames(binstates), invert=TRUE)]
50
+        state <- factor(bin2dec(binstates))
51
+        model$bins$state <- state
52
+        stopTimedMessage(ptm)
53
+    }
54
+    # Combinations
55
+    ptm <- startTimedMessage("Selecting conditions ...")
56
+    removeconds <- paste0('combination.', conditions)
57
+    mcols(model$bins)[removeconds] <- NULL
58
+    # Counts
59
+    if (!is.null(model$bins$counts)) {
60
+        counts <- model$bins$counts
61
+        removeconds <- paste0(paste0('-', conditions, '-'), collapse='|')
62
+        keepconds <- grep(removeconds, colnames(counts), invert=TRUE, value=TRUE)
63
+        counts <- counts[,keepconds]
64
+        if (class(counts) != "matrix") {
65
+            counts <- matrix(counts, ncol=1, dimnames=list(NULL, keepconds))
66
+        }
67
+        model$bins$counts <- counts
68
+    }
69
+    if (!is.null(model$bins$posteriors)) {
70
+        posteriors <- model$bins$posteriors
71
+        removeconds <- paste0(paste0('-', conditions, '-'), collapse='|')
72
+        keepconds <- grep(removeconds, colnames(posteriors), invert=TRUE, value=TRUE)