Browse code

improved error and warning handling

chakalakka authored on 06/12/2014 11:14:36
Showing 4 changed files

... ...
@@ -32,7 +32,7 @@ univariate.findCNVs <- function(binned.data, ID, eps=0.001, init="standard", max
32 32
 	if (check.logical(use.gc.corrected.reads)!=0) stop("argument 'use.gc.corrected.reads' expects a logical (TRUE or FALSE)")
33 33
 	if (check.strand(strand)!=0) stop("argument 'strand' expects either '+', '-' or '*'")
34 34
 
35
-	war <- NULL
35
+	warlist <- list()
36 36
 	if (is.null(eps.try)) eps.try <- eps
37 37
 
38 38
 	## Assign variables
... ...
@@ -48,17 +48,26 @@ univariate.findCNVs <- function(binned.data, ID, eps=0.001, init="standard", max
48 48
 		select <- 'reads'
49 49
 	}
50 50
 	if (use.gc.corrected.reads) {
51
-		if (paste0(select,'.gc') %in% names(mcols(binned.data))) {
52
-			select <- paste0(select,'.gc')
51
+		if (!(paste0(select,'.gc') %in% names(mcols(binned.data)))) {
52
+			warlist[[length(warlist)+1]] <- warning(paste0("ID = ",ID,": Cannot use GC-corrected reads because they are not in the binned data. Continuing without GC-correction."))
53
+		} else if (any(is.na(mcols(binned.data)[,paste0(select,'.gc')]))) {
54
+			warlist[[length(warlist)+1]] <- warning(paste0("ID = ",ID,": Cannot use GC-corrected reads because there are NAs. GC-correction may not be reliable. Continuing without GC-correction."))
53 55
 		} else {
54
-			warning("Cannot use GC-corrected reads because they are not in the binned data. Continuing without GC-correction.")
56
+			select <- paste0(select,'.gc')
55 57
 		}
56 58
 	}
57 59
 	reads <- mcols(binned.data)[,select]
58 60
 
59 61
 	# Check if there are reads in the data, otherwise HMM will blow up
60 62
 	if (!any(reads!=0)) {
61
-		stop("All reads in data are zero. No HMM done.")
63
+		warlist[[length(warlist)+1]] <- warning(paste0("ID = ",ID,": All reads in data are zero. No HMM done."))
64
+		## Make return object
65
+		result <- list()
66
+		class(result) <- class.univariate.hmm
67
+		result$ID <- ID
68
+		result$bins <- binned.data
69
+		result$warnings <- warlist
70
+		return(result)
62 71
 	}
63 72
 
64 73
 	# Filter high reads out, makes HMM faster
... ...
@@ -141,7 +150,7 @@ univariate.findCNVs <- function(binned.data, ID, eps=0.001, init="standard", max
141 150
 		hmm$eps <- eps.try
142 151
 		if (num.trials > 1) {
143 152
 			if (hmm$loglik.delta > hmm$eps) {
144
-				warning("HMM did not converge in trial run ",i_try,"!\n")
153
+				warlist[[length(warlist)+1]] <- warning(paste0("ID = ",ID,": HMM did not converge in trial run ",i_try,"!\n"))
145 154
 			}
146 155
 			# Store model in list
147 156
 			hmm$reads <- NULL
... ...
@@ -152,6 +161,10 @@ univariate.findCNVs <- function(binned.data, ID, eps=0.001, init="standard", max
152 161
 				break
153 162
 			}
154 163
 			init <- 'random'
164
+		} else if (num.trials == 1) {
165
+			if (hmm$loglik.delta > eps) {
166
+				warlist[[length(warlist)+1]] <- warning(paste0("ID = ",ID,": HMM did not converge!\n"))
167
+			}
155 168
 		}
156 169
 	}
157 170
 
... ...
@@ -197,88 +210,89 @@ univariate.findCNVs <- function(binned.data, ID, eps=0.001, init="standard", max
197 210
 
198 211
 	### Make return object ###
199 212
 		result <- list()
213
+		class(result) <- class.univariate.hmm
200 214
 		result$ID <- ID
201
-	## Bin coordinates and states ###
202 215
 		result$bins <- binned.data
203
-		result$bins$state <- state.labels[hmm$states]
204
-	## Segmentation
205
-		cat("Making segmentation ...")
206
-		ptm <- proc.time()
207
-		gr <- result$bins
208
-		red.gr.list <- GRangesList()
209
-		for (state in state.labels) {
210
-			red.gr <- GenomicRanges::reduce(gr[gr$state==state])
211
-			mcols(red.gr)$state <- rep(factor(state, levels=levels(state.labels)),length(red.gr))
212
-			red.gr.list[[length(red.gr.list)+1]] <- red.gr
213
-		}
214
-		red.gr <- GenomicRanges::sort(GenomicRanges::unlist(red.gr.list))
215
-		result$segments <- red.gr
216
-		seqlengths(result$segments) <- seqlengths(binned.data)
217
-		time <- proc.time() - ptm
218
-		cat(paste0(" ",round(time[3],2),"s\n"))
219
-	## Parameters
220
-		# Weights
221
-		result$weights <- hmm$weights
222
-		names(result$weights) <- state.labels
223
-		# Transition matrices
224
-		transitionProbs <- matrix(hmm$A, ncol=hmm$num.states)
225
-		rownames(transitionProbs) <- state.labels
226
-		colnames(transitionProbs) <- state.labels
227
-		result$transitionProbs <- transitionProbs
228
-		transitionProbs.initial <- matrix(hmm$A.initial, ncol=hmm$num.states)
229
-		rownames(transitionProbs.initial) <- state.labels
230
-		colnames(transitionProbs.initial) <- state.labels
231
-		result$transitionProbs.initial <- transitionProbs.initial
232
-		# Initial probs
233
-		result$startProbs <- hmm$proba
234
-		names(result$startProbs) <- paste0("P(",state.labels,")")
235
-		result$startProbs.initial <- hmm$proba.initial
236
-		names(result$startProbs.initial) <- paste0("P(",state.labels,")")
237
-		# Distributions
238
-			distributions <- data.frame()
239
-			distributions.initial <- data.frame()
240
-			for (idistr in 1:length(hmm$distr.type)) {
241
-				distr <- levels(state.distributions)[hmm$distr.type[idistr]]
242
-				if (distr == 'dnbinom') {
243
-					distributions <- rbind(distributions, data.frame(type=distr, size=hmm$size[idistr], prob=hmm$prob[idistr], lambda=NA, mu=dnbinom.mean(hmm$size[idistr],hmm$prob[idistr]), variance=dnbinom.variance(hmm$size[idistr],hmm$prob[idistr])))
244
-					distributions.initial <- rbind(distributions.initial, data.frame(type=distr, size=hmm$size.initial[idistr], prob=hmm$prob.initial[idistr], lambda=NA, mu=dnbinom.mean(hmm$size.initial[idistr],hmm$prob.initial[idistr]), variance=dnbinom.variance(hmm$size.initial[idistr],hmm$prob.initial[idistr])))
245
-				} else if (distr == 'dgeom') {
246
-					distributions <- rbind(distributions, data.frame(type=distr, size=NA, prob=hmm$prob[idistr], lambda=NA, mu=dgeom.mean(hmm$prob[idistr]), variance=dgeom.variance(hmm$prob[idistr])))
247
-					distributions.initial <- rbind(distributions.initial, data.frame(type=distr, size=NA, prob=hmm$prob.initial[idistr], lambda=NA, mu=dgeom.mean(hmm$prob.initial[idistr]), variance=dgeom.variance(hmm$prob.initial[idistr])))
248
-				} else if (distr == 'delta') {
249
-					distributions <- rbind(distributions, data.frame(type=distr, size=NA, prob=NA, lambda=NA, mu=0, variance=0))
250
-					distributions.initial <- rbind(distributions.initial, data.frame(type=distr, size=NA, prob=NA, lambda=NA, mu=0, variance=0))
251
-				} else if (distr == 'dpois') {
252
-					distributions <- rbind(distributions, data.frame(type=distr, size=NA, prob=NA, lambda=hmm$lambda[idistr], mu=hmm$lambda[idistr], variance=hmm$lambda[idistr]))
253
-					distributions.initial <- rbind(distributions.initial, data.frame(type=distr, size=NA, prob=NA, lambda=hmm$lambda.initial[idistr], mu=hmm$lambda.initial[idistr], variance=hmm$lambda.initial[idistr]))
254
-				} else if (distr == 'dbinom') {
255
-					distributions <- rbind(distributions, data.frame(type=distr, size=hmm$size[idistr], prob=hmm$prob[idistr], lambda=NA, mu=dbinom.mean(hmm$size[idistr],hmm$prob[idistr]), variance=dbinom.variance(hmm$size[idistr],hmm$prob[idistr])))
256
-					distributions.initial <- rbind(distributions.initial, data.frame(type=distr, size=hmm$size.initial[idistr], prob=hmm$prob.initial[idistr], lambda=NA, mu=dbinom.mean(hmm$size.initial[idistr],hmm$prob.initial[idistr]), variance=dbinom.variance(hmm$size.initial[idistr],hmm$prob.initial[idistr])))
216
+	## Check for errors
217
+		if (hmm$error == 0) {
218
+		## Bin coordinates and states ###
219
+			result$bins$state <- state.labels[hmm$states]
220
+		## Segmentation
221
+			cat("Making segmentation ...")
222
+			ptm <- proc.time()
223
+			gr <- result$bins
224
+			red.gr.list <- GRangesList()
225
+			for (state in state.labels) {
226
+				red.gr <- GenomicRanges::reduce(gr[gr$state==state])
227
+				mcols(red.gr)$state <- rep(factor(state, levels=levels(state.labels)),length(red.gr))
228
+				if (length(red.gr) > 0) {
229
+					red.gr.list[[length(red.gr.list)+1]] <- red.gr
257 230
 				}
258 231
 			}
259
-			rownames(distributions) <- state.labels
260
-			rownames(distributions.initial) <- state.labels
261
-			result$distributions <- distributions
262
-			result$distributions.initial <- distributions
263
-	## Convergence info
264
-		convergenceInfo <- list(eps=eps, loglik=hmm$loglik, loglik.delta=hmm$loglik.delta, num.iterations=hmm$num.iterations, time.sec=hmm$time.sec)
265
-		result$convergenceInfo <- convergenceInfo
266
-	## Add class
267
-		class(result) <- class.univariate.hmm
268
-
269
-	### Issue warnings ###
270
-	if (num.trials == 1) {
271
-		if (hmm$loglik.delta > eps) {
272
-			war <- warning("HMM did not converge!\n")
232
+			red.gr <- GenomicRanges::sort(GenomicRanges::unlist(red.gr.list))
233
+			result$segments <- red.gr
234
+			seqlengths(result$segments) <- seqlengths(binned.data)
235
+			time <- proc.time() - ptm
236
+			cat(paste0(" ",round(time[3],2),"s\n"))
237
+		## Parameters
238
+			# Weights
239
+			result$weights <- hmm$weights
240
+			names(result$weights) <- state.labels
241
+			# Transition matrices
242
+			transitionProbs <- matrix(hmm$A, ncol=hmm$num.states)
243
+			rownames(transitionProbs) <- state.labels
244
+			colnames(transitionProbs) <- state.labels
245
+			result$transitionProbs <- transitionProbs
246
+			transitionProbs.initial <- matrix(hmm$A.initial, ncol=hmm$num.states)
247
+			rownames(transitionProbs.initial) <- state.labels
248
+			colnames(transitionProbs.initial) <- state.labels
249
+			result$transitionProbs.initial <- transitionProbs.initial
250
+			# Initial probs
251
+			result$startProbs <- hmm$proba
252
+			names(result$startProbs) <- paste0("P(",state.labels,")")
253
+			result$startProbs.initial <- hmm$proba.initial
254
+			names(result$startProbs.initial) <- paste0("P(",state.labels,")")
255
+			# Distributions
256
+				distributions <- data.frame()
257
+				distributions.initial <- data.frame()
258
+				for (idistr in 1:length(hmm$distr.type)) {
259
+					distr <- levels(state.distributions)[hmm$distr.type[idistr]]
260
+					if (distr == 'dnbinom') {
261
+						distributions <- rbind(distributions, data.frame(type=distr, size=hmm$size[idistr], prob=hmm$prob[idistr], lambda=NA, mu=dnbinom.mean(hmm$size[idistr],hmm$prob[idistr]), variance=dnbinom.variance(hmm$size[idistr],hmm$prob[idistr])))
262
+						distributions.initial <- rbind(distributions.initial, data.frame(type=distr, size=hmm$size.initial[idistr], prob=hmm$prob.initial[idistr], lambda=NA, mu=dnbinom.mean(hmm$size.initial[idistr],hmm$prob.initial[idistr]), variance=dnbinom.variance(hmm$size.initial[idistr],hmm$prob.initial[idistr])))
263
+					} else if (distr == 'dgeom') {
264
+						distributions <- rbind(distributions, data.frame(type=distr, size=NA, prob=hmm$prob[idistr], lambda=NA, mu=dgeom.mean(hmm$prob[idistr]), variance=dgeom.variance(hmm$prob[idistr])))
265
+						distributions.initial <- rbind(distributions.initial, data.frame(type=distr, size=NA, prob=hmm$prob.initial[idistr], lambda=NA, mu=dgeom.mean(hmm$prob.initial[idistr]), variance=dgeom.variance(hmm$prob.initial[idistr])))
266
+					} else if (distr == 'delta') {
267
+						distributions <- rbind(distributions, data.frame(type=distr, size=NA, prob=NA, lambda=NA, mu=0, variance=0))
268
+						distributions.initial <- rbind(distributions.initial, data.frame(type=distr, size=NA, prob=NA, lambda=NA, mu=0, variance=0))
269
+					} else if (distr == 'dpois') {
270
+						distributions <- rbind(distributions, data.frame(type=distr, size=NA, prob=NA, lambda=hmm$lambda[idistr], mu=hmm$lambda[idistr], variance=hmm$lambda[idistr]))
271
+						distributions.initial <- rbind(distributions.initial, data.frame(type=distr, size=NA, prob=NA, lambda=hmm$lambda.initial[idistr], mu=hmm$lambda.initial[idistr], variance=hmm$lambda.initial[idistr]))
272
+					} else if (distr == 'dbinom') {
273
+						distributions <- rbind(distributions, data.frame(type=distr, size=hmm$size[idistr], prob=hmm$prob[idistr], lambda=NA, mu=dbinom.mean(hmm$size[idistr],hmm$prob[idistr]), variance=dbinom.variance(hmm$size[idistr],hmm$prob[idistr])))
274
+						distributions.initial <- rbind(distributions.initial, data.frame(type=distr, size=hmm$size.initial[idistr], prob=hmm$prob.initial[idistr], lambda=NA, mu=dbinom.mean(hmm$size.initial[idistr],hmm$prob.initial[idistr]), variance=dbinom.variance(hmm$size.initial[idistr],hmm$prob.initial[idistr])))
275
+					}
276
+				}
277
+				rownames(distributions) <- state.labels
278
+				rownames(distributions.initial) <- state.labels
279
+				result$distributions <- distributions
280
+				result$distributions.initial <- distributions
281
+		## Convergence info
282
+			convergenceInfo <- list(eps=eps, loglik=hmm$loglik, loglik.delta=hmm$loglik.delta, num.iterations=hmm$num.iterations, time.sec=hmm$time.sec, error=hmm$error)
283
+			result$convergenceInfo <- convergenceInfo
284
+		## GC correction
285
+			result$gc.correction <- grepl('gc', select)
286
+		} else if (hmm$error == 1) {
287
+			warlist[[length(warlist)+1]] <- warning(paste0("ID = ",ID,": A NaN occurred during the Baum-Welch! Parameter estimation terminated prematurely. Check your library! The following factors are known to cause this error: 1) Your read counts contain very high numbers. Try again with a lower value for 'read.cutoff.quantile'. 2) Your library contains too few reads in each bin. 3) Your library contains reads for a different genome than it was aligned to."))
288
+		} else if (hmm$error == 2) {
289
+			warlist[[length(warlist)+1]] <- warning(paste0("ID = ",ID,": An error occurred during the Baum-Welch! Parameter estimation terminated prematurely. Check your library"))
273 290
 		}
274
-	}
275
-	if (hmm$error == 1) {
276
-		stop("A nan occurred during the Baum-Welch! Parameter estimation terminated prematurely. Check your read counts for very high numbers, they could be the cause for this problem. Try again with a lower value for 'read.cutoff.quantile'.")
277
-	} else if (hmm$error == 2) {
278
-		stop("An error occurred during the Baum-Welch! Parameter estimation terminated prematurely.")
279
-	}
280 291
 
281
-	# Return results
292
+	## Issue warnings
293
+	result$warnings <- warlist
294
+
295
+	## Return results
282 296
 	return(result)
283 297
 }
284 298
 
... ...
@@ -337,7 +351,7 @@ bivariate.findCNVs <- function(binned.data, ID, eps=0.001, init="standard", max.
337 351
 			if (paste0(select,'.gc') %in% names(mcols(binned.data))) {
338 352
 				select <- paste0(select,'.gc')
339 353
 			} else {
340
-				warning("Cannot use GC-corrected reads because they are not in the binned data. Continuing without GC-correction.")
354
+				warning(paste0("ID = ",ID,": Cannot use GC-corrected reads because they are not in the binned data. Continuing without GC-correction."))
341 355
 			}
342 356
 		}
343 357
 		reads <- matrix(c(mcols(models$plus$bins)[,paste0('p',select)], mcols(models$minus$bins)[,paste0('m',select)]), ncol=num.models, dimnames=list(bin=1:num.bins, strand=names(models)))
... ...
@@ -486,12 +500,12 @@ bivariate.findCNVs <- function(binned.data, ID, eps=0.001, init="standard", max.
486 500
 	### Check convergence ###
487 501
 	war <- NULL
488 502
 	if (hmm$loglik.delta > eps) {
489
-		war <- warning("HMM did not converge!\n")
503
+		war <- warning(paste0("ID = ",ID,": HMM did not converge!\n"))
490 504
 	}
491 505
 	if (hmm$error == 1) {
492
-		stop("A nan occurred during the Baum-Welch! Parameter estimation terminated prematurely. Check your read counts for very high numbers, they could be the cause for this problem.")
506
+		warning(paste0("ID = ",ID,": A NaN occurred during the Baum-Welch! Parameter estimation terminated prematurely. Check your library! The following factors are known to cause this error: 1) Your read counts contain very high numbers. Try again with a lower value for 'read.cutoff.quantile'. 2) Your library contains too few reads in each bin. 3) Your library contains reads for a different genome than it was aligned to."))
493 507
 	} else if (hmm$error == 2) {
494
-		stop("An error occurred during the Baum-Welch! Parameter estimation terminated prematurely.")
508
+		warning(paste0("ID = ",ID,": An error occurred during the Baum-Welch! Parameter estimation terminated prematurely. Check your library"))
495 509
 	}
496 510
 
497 511
 	### Make return object ###
... ...
@@ -45,15 +45,19 @@ plot.distribution <- function(model, state=NULL, chrom=NULL, start=NULL, end=NUL
45 45
 	}
46 46
 	if (length(which(selectmask)) != length(model$bins$reads)) {
47 47
 		reads <- model$bins$reads[selectmask]
48
-		states <- model$bins$state[selectmask]
49
-		weights <- rep(NA, 3)
50
-		weights[1] <- length(which(states=="zero-inflation"))
51
-		weights[2] <- length(which(states=="unmodified"))
52
-		weights[3] <- length(which(states=="modified"))
53
-		weights <- weights / length(states)
48
+		if (!is.null(model$bins$state)) {
49
+			states <- model$bins$state[selectmask]
50
+			weights <- rep(NA, 3)
51
+			weights[1] <- length(which(states=="zero-inflation"))
52
+			weights[2] <- length(which(states=="unmodified"))
53
+			weights[3] <- length(which(states=="modified"))
54
+			weights <- weights / length(states)
55
+		}
54 56
 	} else {
55 57
 		reads <- model$bins$reads
56
-		weights <- model$weights
58
+		if (!is.null(model$weights)) {
59
+			weights <- model$weights
60
+		}
57 61
 	}
58 62
 
59 63
 	# Find the x limits
... ...
@@ -64,6 +68,9 @@ plot.distribution <- function(model, state=NULL, chrom=NULL, start=NULL, end=NUL
64 68
 
65 69
 	# Plot the histogram
66 70
 	ggplt <- ggplot(data.frame(reads)) + geom_histogram(aes(x=reads, y=..density..), binwidth=1, color='black', fill='white') + coord_cartesian(xlim=c(0,rightxlim)) + theme_bw() + xlab("read count")
71
+	if (is.null(model$weights)) {
72
+		return(ggplt)
73
+	}
67 74
 
68 75
 	### Add fits to the histogram
69 76
 	c.state.labels <- as.character(levels(model$bins$state))
... ...
@@ -144,7 +151,11 @@ plot.chromosomes.univariate <- function(model, file=NULL) {
144 151
 	## Get some variables
145 152
 	num.chroms <- length(levels(seqnames(gr)))
146 153
 	maxseqlength <- max(seqlengths(gr))
147
-	custom.xlim <- model$distributions['monosomy','mu'] * 10
154
+	if (!is.null(model$distributions)) {
155
+		custom.xlim <- model$distributions['monosomy','mu'] * 10
156
+	} else {
157
+		custom.xlim <- mean(model$bins$reads, trim=0.1) * 5
158
+	}
148 159
 
149 160
 	## Setup page
150 161
 	library(grid)
... ...
@@ -165,15 +176,20 @@ plot.chromosomes.univariate <- function(model, file=NULL) {
165 176
 		# Get the i,j matrix positions of the regions that contain this subplot
166 177
 		matchidx <- as.data.frame(which(layout == i1+ncols, arr.ind = TRUE))
167 178
 
168
-		# Percentage of chromosome in state
169
-		tstate <- table(mcols(grl[[i1]])$state)
170
-		pstate.all <- tstate / sum(tstate)
171
-		pstate <- round(pstate.all*100)[-1]	# without 'nullsomy / unmapped' state
172
-		pstring <- apply(pstate, 1, function(x) { paste0(": ", x, "%") })
173
-		pstring <- paste0(names(pstring), pstring)
174
-		pstring <- paste(pstring[which.max(pstate)], collapse="\n")
175
-		pstring2 <- round(pstate.all*100)[1]	# only 'nullsomy / unmapped'
176
-		pstring2 <- paste0(names(pstring2), ": ", pstring2, "%")
179
+		if (!is.null(grl[[i1]]$state)) {
180
+			# Percentage of chromosome in state
181
+			tstate <- table(mcols(grl[[i1]])$state)
182
+			pstate.all <- tstate / sum(tstate)
183
+			pstate <- round(pstate.all*100)[-1]	# without 'nullsomy / unmapped' state
184
+			pstring <- apply(pstate, 1, function(x) { paste0(": ", x, "%") })
185
+			pstring <- paste0(names(pstring), pstring)
186
+			pstring <- paste(pstring[which.max(pstate)], collapse="\n")
187
+			pstring2 <- round(pstate.all*100)[1]	# only 'nullsomy / unmapped'
188
+			pstring2 <- paste0(names(pstring2), ": ", pstring2, "%")
189
+		} else {
190
+			pstring <- ''
191
+			pstring2 <- ''
192
+		}
177 193
 
178 194
 		# Plot the read counts
179 195
 		dfplot <- as.data.frame(grl[[i1]])
... ...
@@ -194,10 +210,15 @@ plot.chromosomes.univariate <- function(model, file=NULL) {
194 210
       panel.grid.minor=element_blank(),
195 211
       plot.background=element_blank())
196 212
 		ggplt <- ggplot(dfplot, aes(x=start, y=reads))	# data
197
-		ggplt <- ggplt + geom_linerange(aes(ymin=0, ymax=reads, col=state), size=0.2)	# read count
198 213
 		ggplt <- ggplt + geom_rect(ymin=-0.05*custom.xlim-0.1*custom.xlim, ymax=-0.05*custom.xlim, xmin=0, mapping=aes(xmax=max(start)), col='white', fill='gray20')	# chromosome backbone as simple rectangle
199
-		ggplt <- ggplt + geom_point(data=dfplot.points, mapping=aes(x=start, y=reads, col=state), size=5, shape=21)	# outliers
200
-		ggplt <- ggplt + scale_color_manual(values=state.colors, drop=F)	# do not drop levels if not present
214
+		if (!is.null(grl[[i1]]$state)) {
215
+			ggplt <- ggplt + geom_linerange(aes(ymin=0, ymax=reads, col=state), size=0.2)	# read count
216
+			ggplt <- ggplt + geom_point(data=dfplot.points, mapping=aes(x=start, y=reads, col=state), size=5, shape=21)	# outliers
217
+			ggplt <- ggplt + scale_color_manual(values=state.colors, drop=F)	# do not drop levels if not present
218
+		} else {
219
+			ggplt <- ggplt + geom_linerange(aes(ymin=0, ymax=reads), size=0.2, col='gray20')	# read count
220
+			ggplt <- ggplt + geom_point(data=dfplot.points, mapping=aes(x=start, y=reads), size=5, shape=21, col='gray20')	# outliers
221
+		}
201 222
 		ggplt <- ggplt + empty_theme	# no axes whatsoever
202 223
 		ggplt <- ggplt + ylab(paste0(seqnames(grl[[i1]])[1], "\n", pstring, "\n", pstring2))	# chromosome names
203 224
 		ggplt <- ggplt + xlim(0,maxseqlength) + ylim(-0.6*custom.xlim,custom.xlim)	# set x- and y-limits
... ...
@@ -307,7 +328,7 @@ plot.chromosomes.bivariate <- function(model, file=NULL) {
307 328
 # ------------------------------------------------------------
308 329
 # Plot genome overview
309 330
 # ------------------------------------------------------------
310
-plot.genome.overview <- function(hmm.list, file, bp.per.cm=5e7, numCPU=1, chromosome=NULL) {
331
+plot.genome.overview <- function(hmm.list, file, bp.per.cm=5e7, chromosome=NULL) {
311 332
 	
312 333
 	## Function definitions
313 334
 	reformat <- function(x) {
... ...
@@ -325,8 +346,7 @@ plot.genome.overview <- function(hmm.list, file, bp.per.cm=5e7, numCPU=1, chromo
325 346
 	hmm.list <- loadHmmsFromFiles(hmm.list)
326 347
 	
327 348
 	## Load and transform to GRanges
328
-	temp <- hmmList2GRangesList(hmm.list, reduce=FALSE, numCPU=numCPU)
329
-	uni.hmm.grl <- temp$grl
349
+	uni.hmm.grl <- lapply(hmm.list, '[[', 'bins')
330 350
 
331 351
 	## Setup page
332 352
 	library(grid)
... ...
@@ -393,7 +413,7 @@ plot.genome.overview <- function(hmm.list, file, bp.per.cm=5e7, numCPU=1, chromo
393 413
 # ------------------------------------------------------------
394 414
 # Plot genome summary
395 415
 # ------------------------------------------------------------
396
-plot.genome.summary <- function(hmm.list, file='aneufinder_genome_overview', numCPU=1) {
416
+plot.genome.summary <- function(hmm.list, file='aneufinder_genome_overview') {
397 417
 
398 418
 	## Function definitions
399 419
 	reformat <- function(x) {
... ...
@@ -415,9 +435,7 @@ plot.genome.summary <- function(hmm.list, file='aneufinder_genome_overview', num
415 435
 	ymax <- length(hmm.list)
416 436
 
417 437
 	## Load and transform to GRanges
418
-	cat('transforming to GRanges\n')
419
-	temp <- hmmList2GRangesList(hmm.list, reduce=TRUE, numCPU=numCPU)
420
-	uni.hmm.grl <- temp$grl
438
+	uni.hmm.grl <- lapply(hmm.list, '[[', 'segments')
421 439
 
422 440
 	## Process GRL for plotting
423 441
 	flattened_gr <- flatGrl(uni.hmm.grl)
... ...
@@ -501,22 +519,28 @@ plot.genome.summary <- function(hmm.list, file='aneufinder_genome_overview', num
501 519
 # ------------------------------------------------------------
502 520
 # Plot a heatmap of chromosome state for multiple samples
503 521
 # ------------------------------------------------------------
504
-plot.chromosome.heatmap <- function(hmm.list, cluster=TRUE, numCPU=1) {
522
+plot.chromosome.heatmap <- function(hmm.list, cluster=TRUE) {
505 523
 
506 524
 	## Load the files
507 525
 	hmm.list <- loadHmmsFromFiles(hmm.list)
508 526
 	
509 527
 	## Transform to GRanges in reduced representation
510
-	temp <- hmmList2GRangesList(hmm.list, reduce=T, numCPU=numCPU, consensus=F)
511
-	grlred <- temp$grl
528
+	grlred <- GRangesList()
529
+	IDlist <- list()
530
+	for (hmm in hmm.list) {
531
+		if (!is.null(hmm$segments)) {
532
+			grlred[[length(grlred)+1]] <- hmm$segments
533
+			IDlist[[length(IDlist)+1]] <- hmm$ID
534
+		}
535
+	}
512 536
 	
513 537
 	## Find the most frequent state (mfs) for each chromosome and sample
514 538
 	cat("finding most frequent state for each sample and chromosome ...")
515 539
 	grl.per.chrom <- lapply(grlred, function(x) { split(x, seqnames(x)) })
516 540
 	mfs.samples <- list()
517
-	for (i1 in 1:length(hmm.list)) {
518
-		mfs.samples[[hmm.list[[i1]]$ID]] <- lapply(grl.per.chrom[[i1]], function(x) { tab <- aggregate(width(x), by=list(state=x$state), FUN="sum"); tab$state[which.max(tab$x)] })
519
-		attr(mfs.samples[[hmm.list[[i1]]$ID]], "varname") <- 'chromosome'
541
+	for (i1 in 1:length(grlred)) {
542
+		mfs.samples[[IDlist[[i1]]]] <- lapply(grl.per.chrom[[i1]], function(x) { tab <- aggregate(width(x), by=list(state=x$state), FUN="sum"); tab$state[which.max(tab$x)] })
543
+		attr(mfs.samples[[IDlist[[i1]]]], "varname") <- 'chromosome'
520 544
 	}
521 545
 	attr(mfs.samples, "varname") <- 'sample'
522 546
 	cat(" done\n")
... ...
@@ -524,7 +548,7 @@ plot.chromosome.heatmap <- function(hmm.list, cluster=TRUE, numCPU=1) {
524 548
 	## Transform to data.frame
525 549
 	# Long format
526 550
 	df <- reshape2::melt(mfs.samples, value.name='state')
527
-	df$state <- factor(df$state, levels=levels(hmm.list[[1]]$states))
551
+	df$state <- factor(df$state, levels=levels(hmm.list[[1]]$bins$state))
528 552
 	df$sample <- factor(df$sample, levels=unique(df$sample))
529 553
 	df$chromosome <- factor(df$chromosome, levels=unique(df$chromosome))
530 554
 
... ...
@@ -534,7 +558,7 @@ plot.chromosome.heatmap <- function(hmm.list, cluster=TRUE, numCPU=1) {
534 558
 		df.wide <- reshape2::dcast(df, sample ~ chromosome, value.var='state', factorsAsStrings=F)
535 559
 		# Correct strings to factors
536 560
 		for (col in 2:ncol(df.wide)) {
537
-			df.wide[,col] <- factor(df.wide[,col], levels=levels(hmm.list[[1]]$states))
561
+			df.wide[,col] <- factor(df.wide[,col], levels=levels(hmm.list[[1]]$bins$state))
538 562
 		}
539 563
 		# Cluster
540 564
 		hc <- hclust(dist(data.matrix(df.wide[-1])))
... ...
@@ -542,7 +566,7 @@ plot.chromosome.heatmap <- function(hmm.list, cluster=TRUE, numCPU=1) {
542 566
 		mfs.samples.clustered <- mfs.samples[hc$order]
543 567
 		attr(mfs.samples.clustered, "varname") <- 'sample'
544 568
 		df <- reshape2::melt(mfs.samples.clustered, value.name='state')
545
-		df$state <- factor(df$state, levels=levels(hmm.list[[1]]$states))
569
+		df$state <- factor(df$state, levels=levels(hmm.list[[1]]$bins$state))
546 570
 		df$sample <- factor(df$sample, levels=unique(df$sample))
547 571
 		df$chromosome <- factor(df$chromosome, levels=unique(df$chromosome))
548 572
 	}
... ...
@@ -556,17 +580,36 @@ plot.chromosome.heatmap <- function(hmm.list, cluster=TRUE, numCPU=1) {
556 580
 # ------------------------------------------------------------
557 581
 # Plot a clustered heatmap of state calls
558 582
 # ------------------------------------------------------------
559
-plot.genome.heatmap <- function(hmm.list, file=NULL, cluster=TRUE, numCPU=1) {
583
+plot.genome.heatmap <- function(hmm.list, file=NULL, cluster=TRUE) {
560 584
 
561 585
 	## Load the files
562 586
 	hmm.list <- loadHmmsFromFiles(hmm.list)
563 587
 
588
+	## Get segments from list
589
+	grlred <- GRangesList()
590
+	IDlist <- list()
591
+	for (hmm in hmm.list) {
592
+		if (!is.null(hmm$segments)) {
593
+			grlred[[length(grlred)+1]] <- hmm$segments
594
+			IDlist[[length(IDlist)+1]] <- hmm$ID
595
+		}
596
+	}
597
+
598
+	## Clustering
564 599
 	if (cluster) {
565
-		# Transform to GRanges in reduced representation
566
-		temp <- hmmList2GRangesList(hmm.list, reduce=TRUE, numCPU=numCPU, consensus=T)
567
-		grlred <- temp$grl
568
-		consensus <- temp$consensus
569
-		constates <- temp$constates
600
+		cat("making consensus template ...")
601
+		suppressPackageStartupMessages(consensus <- disjoin(unlist(grlred)))
602
+		constates <- matrix(NA, ncol=length(grlred), nrow=length(consensus))
603
+		for (i1 in 1:length(grlred)) {
604
+			grred <- grlred[[i1]]
605
+			splt <- split(grred, mcols(grred)$state)
606
+			mind <- as.matrix(findOverlaps(consensus, splt, select='first'))
607
+			constates[,i1] <- mind
608
+		}
609
+		meanstates <- apply(constates, 1, mean, na.rm=T)
610
+		mcols(consensus)$meanstate <- meanstates
611
+		cat(" done\n")
612
+
570 613
 		# Distance measure
571 614
 		cat("clustering ...")
572 615
 		constates[is.na(constates)] <- 0
... ...
@@ -576,11 +619,8 @@ plot.genome.heatmap <- function(hmm.list, file=NULL, cluster=TRUE, numCPU=1) {
576 619
 		hc <- hclust(dist)
577 620
 		# Reorder samples
578 621
 		grlred <- grlred[hc$order]
622
+		IDlist <- IDlist[hc$order]
579 623
 		cat(" done\n")
580
-	} else {
581
-		# Transform to GRanges in reduced representation
582
-		temp <- hmmList2GRangesList(hmm.list, reduce=TRUE, numCPU=numCPU, consensus=F)
583
-		grlred <- temp$grl
584 624
 	}
585 625
 
586 626
 	cat("transforming coordinates ...")
... ...
@@ -601,7 +641,7 @@ plot.genome.heatmap <- function(hmm.list, file=NULL, cluster=TRUE, numCPU=1) {
601 641
 	# Data
602 642
 	df <- list()
603 643
 	for (i1 in 1:length(grlred)) {
604
-		df[[length(df)+1]] <- data.frame(start=grlred[[i1]]$start.genome, end=grlred[[i1]]$end.genome, seqnames=seqnames(grlred[[i1]]), sample=hmm.list[[i1]]$ID, state=grlred[[i1]]$state)
644
+		df[[length(df)+1]] <- data.frame(start=grlred[[i1]]$start.genome, end=grlred[[i1]]$end.genome, seqnames=seqnames(grlred[[i1]]), sample=IDlist[[i1]], state=grlred[[i1]]$state)
605 645
 	}
606 646
 	df <- do.call(rbind, df)
607 647
 	# Chromosome lines
... ...
@@ -13,7 +13,7 @@ void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, d
13 13
 	// Define logging level
14 14
 // 	FILE* pFile = fopen("chromStar.log", "w");
15 15
 // 	Output2FILE::Stream() = pFile;
16
- 	FILELog::ReportingLevel() = FILELog::FromString("ERROR");
16
+ 	FILELog::ReportingLevel() = FILELog::FromString("NONE");
17 17
 //  	FILELog::ReportingLevel() = FILELog::FromString("DEBUG2");
18 18
 
19 19
 // 	// Parallelization settings
... ...
@@ -8,7 +8,7 @@
8 8
 
9 9
 // inline std::string NowTime();
10 10
 
11
-enum TLogLevel {logERROR, logWARNING, logINFO, logITERATION, logDEBUG, logDEBUG1, logDEBUG2, logDEBUG3, logDEBUG4};
11
+enum TLogLevel {logNONE, logERROR, logWARNING, logINFO, logITERATION, logDEBUG, logDEBUG1, logDEBUG2, logDEBUG3, logDEBUG4};
12 12
 
13 13
 template <typename T>
14 14
 class Log
... ...
@@ -62,7 +62,7 @@ TLogLevel& Log<T>::ReportingLevel()
62 62
 template <typename T>
63 63
 std::string Log<T>::ToString(TLogLevel level)
64 64
 {
65
-	static const char* const buffer[] = {"ERROR", "WARNING", "INFO", "ITERATION", "DEBUG", "DEBUG1", "DEBUG2", "DEBUG3", "DEBUG4"};
65
+	static const char* const buffer[] = {"NONE", "ERROR", "WARNING", "INFO", "ITERATION", "DEBUG", "DEBUG1", "DEBUG2", "DEBUG3", "DEBUG4"};
66 66
     return buffer[level];
67 67
 }
68 68
 
... ...
@@ -87,6 +87,8 @@ TLogLevel Log<T>::FromString(const std::string& level)
87 87
         return logWARNING;
88 88
     if (level == "ERROR")
89 89
         return logERROR;
90
+		if (level == "NONE")
91
+				return logNONE;
90 92
     Log<T>().Get(logWARNING) << "Unknown logging level '" << level << "'. Using INFO level as default.";
91 93
     return logINFO;
92 94
 }