Browse code

adjustments for chromHMM compatibility

ataudt authored on 16/07/2018 09:41:54
Showing 4 changed files

... ...
@@ -115,6 +115,8 @@ heatmapCountCorrelation <- function(model, cluster=TRUE) {
115 115
 #' Plot a heatmap of transition probabilities for a \code{\link{multiHMM}} model.
116 116
 #'
117 117
 #' @param model A \code{\link{multiHMM}} object or file that contains such an object.
118
+#' @param reorder.states Whether or not to reorder the states.
119
+#' @param transitionProbs A matrix with transition probabilities where \code{dimnames(emissionProbs)} gives the state labels. This option is helpful to plot transition probabilities directly without needing a \code{\link{chromstaR-objects}}. If specified, \code{model} will be ignored.
118 120
 #' @return A \code{\link[ggplot2]{ggplot}} object.
119 121
 #' @importFrom reshape2 melt
120 122
 #' @seealso \code{\link{plotting}}
... ...
@@ -125,14 +127,31 @@ heatmapCountCorrelation <- function(model, cluster=TRUE) {
125 127
 #'                     package="chromstaR")
126 128
 #'model <- get(load(file))
127 129
 #'## Plot transition probabilites as heatmap
128
-#'heatmapTransitionProbs(model)
130
+#'heatmapTransitionProbs(model, reorder.states=TRUE)
129 131
 #'
130
-heatmapTransitionProbs <- function(model) {
131
-
132
-    model <- suppressMessages( loadHmmsFromFiles(model, check.class=class.multivariate.hmm)[[1]] )
133
-    A <- reshape2::melt(model$transitionProbs, varnames=c('from','to'), value.name='prob')
134
-    A$from <- factor(A$from, levels=stateorderByTransition(model))
135
-    A$to <- factor(A$to, levels=stateorderByTransition(model))
132
+heatmapTransitionProbs <- function(model=NULL, reorder.states=TRUE, transitionProbs=NULL) {
133
+
134
+    if (is.null(transitionProbs)) {
135
+        model <- suppressMessages( loadHmmsFromFiles(model, check.class=c(class.multivariate.hmm, class.combined.multivariate.hmm))[[1]] )
136
+        transitionProbs <- model$transitionProbs
137
+        A <- reshape2::melt(transitionProbs, varnames=c('from','to'), value.name='prob')
138
+        if (reorder.states) {
139
+            A$from <- factor(A$from, levels=stateorderByTransition(transitionProbs))
140
+            A$to <- factor(A$to, levels=stateorderByTransition(transitionProbs))
141
+        } else {
142
+            A$from <- factor(A$from, levels=levels(model$bins$combination))
143
+            A$to <- factor(A$to, levels=levels(model$bins$combination))
144
+        }
145
+    } else {
146
+        A <- reshape2::melt(transitionProbs, varnames=c('from','to'), value.name='prob')
147
+        if (reorder.states) {
148
+            A$from <- factor(A$from, levels=stateorderByTransition(transitionProbs))
149
+            A$to <- factor(A$to, levels=stateorderByTransition(transitionProbs))
150
+        } else {
151
+            A$from <- factor(A$from, levels=colnames(transitionProbs))
152
+            A$to <- factor(A$to, levels=colnames(transitionProbs))
153
+        }
154
+    }
136 155
     ggplt <- ggplot(data=A) + geom_tile(aes_string(x='to', y='from', fill='prob')) + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.5)) + scale_fill_gradient(low="white", high="blue")
137 156
 
138 157
     return(ggplt)
... ...
@@ -1,20 +1,18 @@
1
-stateorderByTransition <- function(multi.hmm) {
2
-
3
-    multi.hmm <- loadHmmsFromFiles(multi.hmm, check.class=class.multivariate.hmm)[[1]]
1
+stateorderByTransition <- function(transitionProbs) {
4 2
 
5 3
     ## Calculate distance matrix
6
-    distances <- matrix(NA, ncol=ncol(multi.hmm$transitionProbs), nrow=nrow(multi.hmm$transitionProbs))
7
-    colnames(distances) <- colnames(multi.hmm$transitionProbs)
8
-    rownames(distances) <- rownames(multi.hmm$transitionProbs)
4
+    distances <- matrix(NA, ncol=ncol(transitionProbs), nrow=nrow(transitionProbs))
5
+    colnames(distances) <- colnames(transitionProbs)
6
+    rownames(distances) <- rownames(transitionProbs)
9 7
     for (irow in 1:nrow(distances)) {
10 8
         for (icol in 1:ncol(distances)) {
11
-            distances[irow,icol] <- 2 - (multi.hmm$transitionProbs[irow,icol] + multi.hmm$transitionProbs[icol,irow])
12
-#             distances[irow,icol] <- abs(multi.hmm$transitionProbs[irow,icol] - multi.hmm$transitionProbs[icol,irow])
9
+            distances[irow,icol] <- 2 - (transitionProbs[irow,icol] + transitionProbs[icol,irow])
10
+#             distances[irow,icol] <- abs(transitionProbs[irow,icol] - transitionProbs[icol,irow])
13 11
         }
14 12
     }
15 13
 
16 14
     ## Select ordering
17
-    comb.states <- colnames(multi.hmm$transitionProbs)
15
+    comb.states <- colnames(transitionProbs)
18 16
     stateorders <- matrix(NA, ncol=length(comb.states), nrow=length(comb.states))
19 17
     total.distance <- rep(0, length(comb.states))
20 18
     for (i1 in 1:length(comb.states)) {
... ...
@@ -30,8 +28,6 @@ stateorderByTransition <- function(multi.hmm) {
30 28
         }
31 29
     }
32 30
     stateorder <- stateorders[which.min(total.distance),]
33
-
34
-    ## Reorder the multi.hmm
35 31
     return(stateorder)
36 32
 
37 33
 }
... ...
@@ -4,10 +4,15 @@
4 4
 \alias{heatmapTransitionProbs}
5 5
 \title{Heatmap of transition probabilities}
6 6
 \usage{
7
-heatmapTransitionProbs(model)
7
+heatmapTransitionProbs(model = NULL, reorder.states = TRUE,
8
+  transitionProbs = NULL)
8 9
 }
9 10
 \arguments{
10 11
 \item{model}{A \code{\link{multiHMM}} object or file that contains such an object.}
12
+
13
+\item{reorder.states}{Whether or not to reorder the states.}
14
+
15
+\item{transitionProbs}{A matrix with transition probabilities where \code{dimnames(emissionProbs)} gives the state labels. This option is helpful to plot transition probabilities directly without needing a \code{\link{chromstaR-objects}}. If specified, \code{model} will be ignored.}
11 16
 }
12 17
 \value{
13 18
 A \code{\link[ggplot2]{ggplot}} object.
... ...
@@ -21,7 +26,7 @@ file <- system.file("data","multivariate_mode-combinatorial_condition-SHR.RData"
21 26
                     package="chromstaR")
22 27
 model <- get(load(file))
23 28
 ## Plot transition probabilites as heatmap
24
-heatmapTransitionProbs(model)
29
+heatmapTransitionProbs(model, reorder.states=TRUE)
25 30
 
26 31
 }
27 32
 \seealso{
28 33
new file mode 100644
... ...
@@ -0,0 +1,70 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/importOtherSegmentations.R
3
+\name{importChromHMMsegmentation}
4
+\alias{importChromHMMsegmentation}
5
+\title{Import ChromHMM segmentation}
6
+\usage{
7
+importChromHMMsegmentation(inputfolder, outputfolder, assembly, binsize,
8
+  experiment.table, chrom.lengths = NULL, chromosomes = NULL,
9
+  chromosome.format = "UCSC")
10
+}
11
+\arguments{
12
+\item{inputfolder}{Folder with ChromHMM output files (segmentation, emission and transition probabilities).}
13
+
14
+\item{outputfolder}{Output folder for the converted \code{\link{chromstaR-objects}}.}
15
+
16
+\item{assembly}{An assembly from which the chromosome lengths are determined. Please see \code{\link[GenomeInfoDb]{fetchExtendedChromInfoFromUCSC}} for available assemblies. Alternatively a data.frame generated by \code{\link[GenomeInfoDb]{fetchExtendedChromInfoFromUCSC}}.}
17
+
18
+\item{binsize}{The bin size in base pairs used in ChromHMM.}
19
+
20
+\item{experiment.table}{A \code{data.frame} or tab-separated text file with the structure of the experiment. See \code{\link{experiment.table}} for an example.}
21
+
22
+\item{chrom.lengths}{A named character vector with chromosome lengths. Names correspond to chromosomes.}
23
+
24
+\item{chromosomes}{A subset of chromosomes for which the bins are generated.}
25
+
26
+\item{chromosome.format}{A character specifying the format of the chromosomes if \code{assembly} is specified. Either 'NCBI' for (1,2,3 ...) or 'UCSC' for (chr1,chr2,chr3 ...).}
27
+}
28
+\value{
29
+A \code{\link{combinedMultiHMM}} object. Intermediate \code{\link{multiHMM}} objects are saved to \code{outputfolder}.
30
+}
31
+\description{
32
+Import a chromatin state segmentation produced by ChromHMM as \code{\link{chromstaR-objects}}. This is useful to perform comparative analyses or simply use chromstaR plotting or enrichment functions on a ChromHMM segmentation.
33
+}
34
+\details{
35
+The function takes the *segments.bed files, emissions_*.txt and transitions_*.txt files in \code{inputfolder}, and converts them into \code{\link{multiHMM}} objects for each condition that was specified in the \code{experiment.table}. All \code{\link{multiHMM}} objects are then combined into a \code{\link{combinedMultiHMM}} object and returned.
36
+}
37
+\examples{
38
+inputfolder <- "differential_CD4_numstates_16_binsize_1000bp"
39
+outputfolder <- "testconvertChromHMM2chromstar"
40
+assembly <- "mm9"
41
+binsize <- 1000
42
+chrom.lengths <- NULL
43
+chromosomes <- paste0("chr", c(1:19, "X"))
44
+chromosome.format <- "UCSC"
45
+experiment.table <- "differential_CD4_numstates_16_binsize_1000bp/experiment_table_differential_CD4.tsv"
46
+model <- importChromHMMsegmentation(inputfolder, outputfolder, assembly, binsize, experiment.table, chrom.lengths, chromosomes, chromosome.format)
47
+
48
+### Plot transition and emission probabilities of ChromHMM model ###
49
+heatmapTransitionProbs(reorder.states=FALSE, transitionProbs=model$transitionProbs)
50
+heatmapCombinations(emissionProbs = model$emissionProbs)
51
+
52
+### Use plotEnrichment function on imported ChromHMM segmentation ###
53
+library(biomaRt)
54
+ensembl <- useMart('ENSEMBL_MART_ENSEMBL', host='may2012.archive.ensembl.org',
55
+                  dataset='mmusculus_gene_ensembl')
56
+genes <- getBM(attributes=c('ensembl_gene_id', 'chromosome_name', 'start_position',
57
+                           'end_position', 'strand', 'external_gene_id',
58
+                           'gene_biotype'),
59
+              mart=ensembl)
60
+# Transform to GRanges for easier handling
61
+genes <- GRanges(seqnames=paste0('chr',genes$chromosome_name),
62
+                ranges=IRanges(start=genes$start, end=genes$end),
63
+                strand=genes$strand,
64
+                name=genes$external_gene_id, biotype=genes$gene_biotype)
65
+print(genes)
66
+
67
+ggplts <- plotEnrichment(model, annotation=genes)
68
+ggplts[[1]] + facet_wrap(~combination)
69
+
70
+}