Browse code

more galaxy fixes, updated docs for chromstar objects

chakalakka authored on 17/02/2018 15:11:32
Showing 8 changed files

... ...
@@ -343,6 +343,7 @@ combineMultivariates <- function(hmms, mode) {
343 343
     hmm$segments.per.condition <- segments.per.condition
344 344
     hmm$peaks <- peaks
345 345
     hmm$frequencies <- freqs$table
346
+    hmm$mode <- mode
346 347
     return(hmm)
347 348
     
348 349
 }
... ...
@@ -77,13 +77,14 @@ NULL
77 77
 
78 78
 #' Univariate HMM object
79 79
 #'
80
-#' The univariate HMM object is output of the function \code{\link{callPeaksUnivariate}} and is a \code{list()} with various entries. The class() attribute of this list was set to "uniHMM". For a given hmm, the entries can be accessed with the list operators 'hmm[[]]' or 'hmm$'.
80
+#' The univariate HMM object is output of the function \code{\link{callPeaksUnivariate}} and is a \code{list()} with various entries. The \code{class()} attribute of this list was set to "uniHMM". For a given hmm, the entries can be accessed with the list operators 'hmm[[]]' or 'hmm$'.
81 81
 #'
82 82
 #' @return
83 83
 #' A \code{list()} with the following entries:
84
-#' \item{ID}{An identifier that is used in various \pkg{\link{chromstaR}} functions.}
84
+#' \item{info}{Experiment table for this object.}
85
+#' \item{bincounts}{A \code{\link[GenomicRanges]{GRanges}} object containing the genomic bin coordinates and original binned read count values for different offsets.}
85 86
 #' \item{bins}{A \code{\link[GenomicRanges]{GRanges}} object containing the genomic bin coordinates, their read count, (optional) posteriors and state classification.}
86
-#' \item{segments}{Same as \code{bins}, but consecutive bins with the same state are collapsed into segments.}
87
+#' \item{peaks}{A \code{list()} with \code{\link[GenomicRanges]{GRanges}} containing peak coordinates for each ID in \code{info}.}
87 88
 #' \item{weights}{Weight for each component. Same as \code{apply(hmm$posteriors,2,mean)}.}
88 89
 #' \item{transitionProbs}{Matrix of transition probabilities from each state (row) into each state (column).}
89 90
 #' \item{transitionProbs.initial}{Initial \code{transitionProbs} at the beginning of the Baum-Welch.}
... ...
@@ -91,7 +92,7 @@ NULL
91 92
 #' \item{startProbs.initial}{Initial \code{startProbs} at the beginning of the Baum-Welch.}
92 93
 #' \item{distributions}{Estimated parameters of the emission distributions.}
93 94
 #' \item{distributions.initial}{Distribution parameters at the beginning of the Baum-Welch.}
94
-#' \item{post.cutoff}{Cutoff for posterior probabilities to call peaks (default=0.5).}
95
+#' \item{post.cutoff}{Cutoff for posterior probabilities to call peaks.}
95 96
 #' \item{convergenceInfo}{Contains information about the convergence of the Baum-Welch algorithm.}
96 97
 #' \item{convergenceInfo$eps}{Convergence threshold for the Baum-Welch.}
97 98
 #' \item{convergenceInfo$loglik}{Final loglikelihood after the last iteration.}
... ...
@@ -107,13 +108,15 @@ NULL
107 108
 
108 109
 #' Multivariate HMM object
109 110
 #'
110
-#' The multivariate HMM object is output of the function \code{\link{callPeaksMultivariate}} and is a \code{list()} with various entries. The class() attribute of this list was set to "multiHMM". For a given hmm, the entries can be accessed with the list operators 'hmm[[]]' or 'hmm$'.
111
+#' The multivariate HMM object is output of the function \code{\link{callPeaksMultivariate}} and is a \code{list()} with various entries. The \code{class()} attribute of this list was set to "multiHMM". For a given hmm, the entries can be accessed with the list operators 'hmm[[]]' or 'hmm$'.
111 112
 #' 
112 113
 #' @return
113 114
 #' A \code{list()} with the following entries:
114
-#' \item{IDs}{IDs of the input univariate HMMs.}
115
+#' \item{info}{Experiment table for this object.}
116
+#' \item{bincounts}{A \code{\link[GenomicRanges]{GRanges}} object containing the genomic bin coordinates and original binned read count values for different offsets.}
115 117
 #' \item{bins}{A \code{\link[GenomicRanges]{GRanges}} object containing the genomic bin coordinates, their read count, (optional) posteriors and state classification.}
116 118
 #' \item{segments}{Same as \code{bins}, but consecutive bins with the same state are collapsed into segments.}
119
+#' \item{peaks}{A \code{list()} with \code{\link[GenomicRanges]{GRanges}} containing peak coordinates for each ID in \code{info}.}
117 120
 #' \item{mapping}{A named vector giving the mapping from decimal combinatorial states to human readable combinations.}
118 121
 #' \item{weights}{Weight for each component. Same as \code{apply(hmm$posteriors,2,mean)}.}
119 122
 #' \item{weights.univariate}{Weights of the univariate HMMs.}
... ...
@@ -122,7 +125,6 @@ NULL
122 125
 #' \item{startProbs}{Probabilities for the first bin. Same as \code{hmm$posteriors[1,]}.}
123 126
 #' \item{startProbs.initial}{Initial \code{startProbs} at the beginning of the Baum-Welch.}
124 127
 #' \item{distributions}{Emission distributions used for this model.}
125
-#' \item{post.cutoff}{False discovery rate. NULL means that the state with maximum posterior probability was chosen, irrespective of its absolute probability (default=NULL).}
126 128
 #' \item{convergenceInfo}{Contains information about the convergence of the Baum-Welch algorithm.}
127 129
 #' \item{convergenceInfo$eps}{Convergence threshold for the Baum-Welch.}
128 130
 #' \item{convergenceInfo$loglik}{Final loglikelihood after the last iteration.}
... ...
@@ -143,13 +145,17 @@ NULL
143 145
 
144 146
 #' Combined multivariate HMM object
145 147
 #'
146
-#' The multivariate HMM object is output of the function \code{\link{combineMultivariates}} and is a \code{list()} with various entries. The class() attribute of this list was set to "combinedMultiHMM". For a given hmm, the entries can be accessed with the list operators 'hmm[[]]' or 'hmm$'.
148
+#' The combined multivariate HMM object is output of the function \code{\link{combineMultivariates}} and is a \code{list()} with various entries. The \code{class()} attribute of this list was set to "combinedMultiHMM". For a given hmm, the entries can be accessed with the list operators 'hmm[[]]' or 'hmm$'.
147 149
 #' 
148 150
 #' @return
149 151
 #' A \code{list()} with the following entries:
152
+#' \item{info}{Experiment table for this object.}
150 153
 #' \item{bins}{A \code{\link[GenomicRanges]{GRanges}} object containing genomic bin coordinates and human readable combinations for the combined \code{\link{multiHMM}} objects.}
151 154
 #' \item{segments}{Same as \code{bins}, but consecutive bins with the same state are collapsed into segments.}
152
-#' \item{segments.per.condition}{A \code{list} with segments for each condition separately.}
155
+#' \item{segments.per.condition}{A \code{list()} with segments for each condition separately.}
156
+#' \item{peaks}{A \code{list()} with \code{\link[GenomicRanges]{GRanges}} containing peak coordinates for each ID in \code{info}.}
157
+#' \item{frequencies}{Genomic frequencies of combinations.}
158
+#' \item{mode}{Mode of analysis.}
153 159
 #' @seealso \code{\link{combineMultivariates}}, \code{\link{uniHMM}}, \code{\link{multiHMM}}
154 160
 #' @name combinedMultiHMM
155 161
 #' @aliases combinedHMM
... ...
@@ -260,7 +260,7 @@ plotFoldEnrichHeatmap <- function(hmm, annotations, what="combinations", combina
260 260
 #' @importFrom reshape2 melt
261 261
 #' @importFrom IRanges subsetByOverlaps
262 262
 #' @export
263
-plotEnrichCountHeatmap <- function(hmm, annotation, bp.around.annotation=10000, max.rows=1000, combinations=NULL, colorByCombinations=TRUE, sortByCombinations=TRUE, sortByColumns=NULL) {
263
+plotEnrichCountHeatmap <- function(hmm, annotation, bp.around.annotation=10000, max.rows=1000, combinations=NULL, colorByCombinations=sortByCombinations, sortByCombinations=is.null(sortByColumns), sortByColumns=NULL) {
264 264
 
265 265
     if (!is.null(sortByColumns)) {
266 266
         sortByCombinations <- FALSE
... ...
@@ -391,7 +391,13 @@ plotEnrichCountHeatmap <- function(hmm, annotation, bp.around.annotation=10000,
391 391
     ggplt <- ggplt + scale_fill_continuous(trans='log1p', low='white', high='black')
392 392
     if (sortByCombinations) {
393 393
         # Insert horizontal lines
394
-        y.lines <- sapply(split(df$id, df$combination), function(x) { max(as.integer(x)) })
394
+        y.lines <- sapply(split(df$id, df$combination), function(x) { 
395
+          y <- -Inf
396
+          if (length(x)>0) {
397
+            y <- max(as.integer(x))
398
+          }
399
+          return(y)
400
+        })
395 401
         df.lines <- data.frame(y=sort(y.lines[-1]) + 0.5)
396 402
         ggplt <- ggplt + geom_hline(data=df.lines, mapping=aes_string(yintercept='y'), linetype=2)
397 403
     }
... ...
@@ -591,7 +597,7 @@ enrichmentAtAnnotation <- function(bins, info, annotation, bp.around.annotation=
591 597
     seqlevels.only.in.bins <- setdiff(seqlevels(bins), seqlevels(annotation))
592 598
     seqlevels.only.in.annotation <- setdiff(seqlevels(annotation), seqlevels(bins))
593 599
     if (length(seqlevels.only.in.bins) > 0 | length(seqlevels.only.in.annotation) > 0) {
594
-        warning("Sequence levels in 'bins' but not in 'annotation': ", paste0(seqlevels.only.in.bins, collapse = ', '), "\n  Sequence levels in 'annotation' but not in 'bins': ", paste0(seqlevels.only.in.annotation, collapse = ''))
600
+        warning("Sequence levels in 'bins' but not in 'annotation': ", paste0(seqlevels.only.in.bins, collapse = ', '), "\n  Sequence levels in 'annotation' but not in 'bins': ", paste0(seqlevels.only.in.annotation, collapse = ', '))
595 601
     }
596 602
   
597 603
     ## Variables
598 604
new file mode 100644
... ...
@@ -0,0 +1,12 @@
1
+opt <- list()
2
+opt$chromstarObject <- '~/bioinformatics/galaxy/tools/chromstaR-galaxy/test-data/output_chromstaR-result.rdata'
3
+opt$annotationBed6 <- '~/bioinformatics/galaxy/tools/chromstaR-galaxy/test-data/rn4_protein_coding_genes.bed'
4
+opt$bpAroundAnnotation <- 10000
5
+opt$numIntervals <- 20
6
+opt$statistic <- "fold"
7
+opt$numLoci <- 1000
8
+
9
+opt <- list()
10
+opt$chromstarObject <- '~/bioinformatics/galaxy/tools/chromstaR-galaxy/test-data/output_chromstaR-result.rdata'
11
+opt$changeWhat <- 'changePostCutoff'
12
+opt$cutoff <- 0.9999999
... ...
@@ -6,12 +6,16 @@
6 6
 \title{Combined multivariate HMM object}
7 7
 \value{
8 8
 A \code{list()} with the following entries:
9
+\item{info}{Experiment table for this object.}
9 10
 \item{bins}{A \code{\link[GenomicRanges]{GRanges}} object containing genomic bin coordinates and human readable combinations for the combined \code{\link{multiHMM}} objects.}
10 11
 \item{segments}{Same as \code{bins}, but consecutive bins with the same state are collapsed into segments.}
11
-\item{segments.per.condition}{A \code{list} with segments for each condition separately.}
12
+\item{segments.per.condition}{A \code{list()} with segments for each condition separately.}
13
+\item{peaks}{A \code{list()} with \code{\link[GenomicRanges]{GRanges}} containing peak coordinates for each ID in \code{info}.}
14
+\item{frequencies}{Genomic frequencies of combinations.}
15
+\item{mode}{Mode of analysis.}
12 16
 }
13 17
 \description{
14
-The multivariate HMM object is output of the function \code{\link{combineMultivariates}} and is a \code{list()} with various entries. The class() attribute of this list was set to "combinedMultiHMM". For a given hmm, the entries can be accessed with the list operators 'hmm[[]]' or 'hmm$'.
18
+The combined multivariate HMM object is output of the function \code{\link{combineMultivariates}} and is a \code{list()} with various entries. The \code{class()} attribute of this list was set to "combinedMultiHMM". For a given hmm, the entries can be accessed with the list operators 'hmm[[]]' or 'hmm$'.
15 19
 }
16 20
 \seealso{
17 21
 \code{\link{combineMultivariates}}, \code{\link{uniHMM}}, \code{\link{multiHMM}}
... ...
@@ -11,8 +11,9 @@ plotFoldEnrichHeatmap(hmm, annotations, what = "combinations",
11 11
   combinations = NULL, marks = NULL, plot = TRUE, logscale = TRUE)
12 12
 
13 13
 plotEnrichCountHeatmap(hmm, annotation, bp.around.annotation = 10000,
14
-  max.rows = 1000, combinations = NULL, colorByCombinations = TRUE,
15
-  sortByCombinations = TRUE, sortByColumns = NULL)
14
+  max.rows = 1000, combinations = NULL,
15
+  colorByCombinations = sortByCombinations,
16
+  sortByCombinations = is.null(sortByColumns), sortByColumns = NULL)
16 17
 
17 18
 plotEnrichment(hmm, annotation, bp.around.annotation = 10000,
18 19
   region = c("start", "inside", "end"), num.intervals = 20,
... ...
@@ -6,9 +6,11 @@
6 6
 \title{Multivariate HMM object}
7 7
 \value{
8 8
 A \code{list()} with the following entries:
9
-\item{IDs}{IDs of the input univariate HMMs.}
9
+\item{info}{Experiment table for this object.}
10
+\item{bincounts}{A \code{\link[GenomicRanges]{GRanges}} object containing the genomic bin coordinates and original binned read count values for different offsets.}
10 11
 \item{bins}{A \code{\link[GenomicRanges]{GRanges}} object containing the genomic bin coordinates, their read count, (optional) posteriors and state classification.}
11 12
 \item{segments}{Same as \code{bins}, but consecutive bins with the same state are collapsed into segments.}
13
+\item{peaks}{A \code{list()} with \code{\link[GenomicRanges]{GRanges}} containing peak coordinates for each ID in \code{info}.}
12 14
 \item{mapping}{A named vector giving the mapping from decimal combinatorial states to human readable combinations.}
13 15
 \item{weights}{Weight for each component. Same as \code{apply(hmm$posteriors,2,mean)}.}
14 16
 \item{weights.univariate}{Weights of the univariate HMMs.}
... ...
@@ -17,7 +19,6 @@ A \code{list()} with the following entries:
17 19
 \item{startProbs}{Probabilities for the first bin. Same as \code{hmm$posteriors[1,]}.}
18 20
 \item{startProbs.initial}{Initial \code{startProbs} at the beginning of the Baum-Welch.}
19 21
 \item{distributions}{Emission distributions used for this model.}
20
-\item{post.cutoff}{False discovery rate. NULL means that the state with maximum posterior probability was chosen, irrespective of its absolute probability (default=NULL).}
21 22
 \item{convergenceInfo}{Contains information about the convergence of the Baum-Welch algorithm.}
22 23
 \item{convergenceInfo$eps}{Convergence threshold for the Baum-Welch.}
23 24
 \item{convergenceInfo$loglik}{Final loglikelihood after the last iteration.}
... ...
@@ -27,7 +28,7 @@ A \code{list()} with the following entries:
27 28
 \item{correlation.matrix}{Correlation matrix of transformed reads.}
28 29
 }
29 30
 \description{
30
-The multivariate HMM object is output of the function \code{\link{callPeaksMultivariate}} and is a \code{list()} with various entries. The class() attribute of this list was set to "multiHMM". For a given hmm, the entries can be accessed with the list operators 'hmm[[]]' or 'hmm$'.
31
+The multivariate HMM object is output of the function \code{\link{callPeaksMultivariate}} and is a \code{list()} with various entries. The \code{class()} attribute of this list was set to "multiHMM". For a given hmm, the entries can be accessed with the list operators 'hmm[[]]' or 'hmm$'.
31 32
 }
32 33
 \examples{
33 34
 ## Get an example multiHMM
... ...
@@ -6,9 +6,10 @@
6 6
 \title{Univariate HMM object}
7 7
 \value{
8 8
 A \code{list()} with the following entries:
9
-\item{ID}{An identifier that is used in various \pkg{\link{chromstaR}} functions.}
9
+\item{info}{Experiment table for this object.}
10
+\item{bincounts}{A \code{\link[GenomicRanges]{GRanges}} object containing the genomic bin coordinates and original binned read count values for different offsets.}
10 11
 \item{bins}{A \code{\link[GenomicRanges]{GRanges}} object containing the genomic bin coordinates, their read count, (optional) posteriors and state classification.}
11
-\item{segments}{Same as \code{bins}, but consecutive bins with the same state are collapsed into segments.}
12
+\item{peaks}{A \code{list()} with \code{\link[GenomicRanges]{GRanges}} containing peak coordinates for each ID in \code{info}.}
12 13
 \item{weights}{Weight for each component. Same as \code{apply(hmm$posteriors,2,mean)}.}
13 14
 \item{transitionProbs}{Matrix of transition probabilities from each state (row) into each state (column).}
14 15
 \item{transitionProbs.initial}{Initial \code{transitionProbs} at the beginning of the Baum-Welch.}
... ...
@@ -16,7 +17,7 @@ A \code{list()} with the following entries:
16 17
 \item{startProbs.initial}{Initial \code{startProbs} at the beginning of the Baum-Welch.}
17 18
 \item{distributions}{Estimated parameters of the emission distributions.}
18 19
 \item{distributions.initial}{Distribution parameters at the beginning of the Baum-Welch.}
19
-\item{post.cutoff}{Cutoff for posterior probabilities to call peaks (default=0.5).}
20
+\item{post.cutoff}{Cutoff for posterior probabilities to call peaks.}
20 21
 \item{convergenceInfo}{Contains information about the convergence of the Baum-Welch algorithm.}
21 22
 \item{convergenceInfo$eps}{Convergence threshold for the Baum-Welch.}
22 23
 \item{convergenceInfo$loglik}{Final loglikelihood after the last iteration.}
... ...
@@ -27,7 +28,7 @@ A \code{list()} with the following entries:
27 28
 \item{convergenceInfo$read.cutoff}{Cutoff value for read counts.}
28 29
 }
29 30
 \description{
30
-The univariate HMM object is output of the function \code{\link{callPeaksUnivariate}} and is a \code{list()} with various entries. The class() attribute of this list was set to "uniHMM". For a given hmm, the entries can be accessed with the list operators 'hmm[[]]' or 'hmm$'.
31
+The univariate HMM object is output of the function \code{\link{callPeaksUnivariate}} and is a \code{list()} with various entries. The \code{class()} attribute of this list was set to "uniHMM". For a given hmm, the entries can be accessed with the list operators 'hmm[[]]' or 'hmm$'.
31 32
 }
32 33
 \seealso{
33 34
 \code{\link{callPeaksUnivariate}}, \code{\link{multiHMM}}, \code{\link{combinedMultiHMM}}