Browse code

read.cutoff changed, introduced several distributions (not used), windows compatible

chakalakka authored on 20/10/2014 07:10:07
Showing 13 changed files

... ...
@@ -18,7 +18,7 @@ get.reproducibility.one2many,
18 18
 get.reproducibility.many2many,
19 19
 
20 20
 export.hmm2bed,
21
-export.binned2wiggle,
21
+export.hmm2wiggle,
22 22
 binned2GRanges,
23 23
 hmm2GRanges,
24 24
 hmmList2GRangesList,
... ...
@@ -6,7 +6,7 @@ hmmList2GRangesList <- function(hmm.list, reduce=TRUE, numCPU=1, consensus=FALSE
6 6
 	## Transform to GRanges
7 7
 	cat('transforming to GRanges\n')
8 8
 	if (numCPU > 1) {
9
-		library(doParallel)
9
+		suppressMessages( library(doParallel) )
10 10
 		cl <- makeCluster(numCPU)
11 11
 		registerDoParallel(cl)
12 12
 		cfun <- function(...) { GRangesList(...) }
... ...
@@ -28,7 +28,7 @@ hmmList2GRangesList <- function(hmm.list, reduce=TRUE, numCPU=1, consensus=FALSE
28 28
 		}
29 29
 		if (consensus) {
30 30
 			consensus.gr <- disjoin(unlist(hmm.grl))
31
-			constates <- matrix(NA, ncol=length(hmm.grl), nrow=length(hmm.grl[[1]]))
31
+			constates <- matrix(NA, ncol=length(hmm.grl), nrow=length(consensus.gr))
32 32
 			for (i1 in 1:length(hmm.grl)) {
33 33
 				hmm.gr <- hmm.grl[[i1]]
34 34
 				splt <- split(hmm.gr, mcols(hmm.gr)$state)
... ...
@@ -38,8 +38,8 @@ hmmList2GRangesList <- function(hmm.list, reduce=TRUE, numCPU=1, consensus=FALSE
38 38
 		}
39 39
 	}
40 40
 	if (consensus) {
41
-		meanstates <- apply(constates, 1, mean)
42
-		mcols(consensus.gr) <- meanstates
41
+		meanstates <- apply(constates, 1, mean, na.rm=T)
42
+		mcols(consensus.gr)$meanstate <- meanstates
43 43
 		return(list(grl=hmm.grl, consensus=consensus.gr, constates=constates))
44 44
 	} else {
45 45
 		return(hmm.grl)
... ...
@@ -131,8 +131,8 @@ bed2GRanges <- function(bedfile, chrom.length.file, skip=1, binsize=NULL) {
131 131
 
132 132
 	## Inflate every range with bins
133 133
 	if (!is.null(binsize)) {
134
-		inflated.data <- GRangesList()
135 134
 		grl <- split(data, seqnames(data))
135
+		inflated.data <- GRangesList()
136 136
 		for (i1 in 1:length(grl)) {
137 137
 			rgr <- ranges(grl[[i1]])
138 138
 			widths <- width(rgr)
... ...
@@ -156,7 +156,7 @@ bed2GRanges <- function(bedfile, chrom.length.file, skip=1, binsize=NULL) {
156 156
 																								ranges=IRanges(start=infstarts, end=infends),
157 157
 																								strand=Rle(strand('*'), sum(numbins)),
158 158
 																								state=infstates)
159
-			inflated.data[[i1]] <- inflated.chrom
159
+			suppressWarnings( inflated.data[[i1]] <- inflated.chrom )
160 160
 		}
161 161
 		inflated.data <- unlist(inflated.data)
162 162
 		seqlengths(inflated.data) <- as.integer(chrom.lengths[names(seqlengths(data))])
... ...
@@ -1,125 +1,120 @@
1
-# ===============================================
2
-# Write color-coded tracks with univariate states
3
-# ===============================================
4
-export.hmm2bed <- function(uni.hmm.list, file="view_me_in_genome_browser", numCPU=1) {
5
-
6
-	## Intercept user input
7
-	if (check.univariate.modellist(uni.hmm.list)!=0) {
8
-		cat("Loading univariate HMMs from files ...")
9
-		mlist <- NULL
10
-		for (modelfile in uni.hmm.list) {
11
-			mlist[[length(mlist)+1]] <- get(load(modelfile))
12
-		}
13
-		uni.hmm.list <- mlist
14
-		remove(mlist)
15
-		cat(" done\n")
16
-		if (check.univariate.modellist(uni.hmm.list)!=0) stop("argument 'uni.hmm.list' expects a list of univariate hmms or a list of files that contain univariate hmms")
1
+# ====================================
2
+# Write color-coded tracks with states
3
+# ====================================
4
+export.hmm2bed <- function(hmm.list, file="view_me_in_genome_browser", numCPU=1) {
5
+
6
+	## Function definitions
7
+	insertchr <- function(hmm.gr) {
8
+		# Change chromosome names from '1' to 'chr1' if necessary
9
+		mask <- which(!grepl('chr', seqnames(hmm.gr)))
10
+		mcols(hmm.gr)$chromosome <- as.character(seqnames(hmm.gr))
11
+		mcols(hmm.gr)$chromosome[mask] <- sub(pattern='^', replacement='chr', mcols(hmm.gr)$chromosome[mask])
12
+		mcols(hmm.gr)$chromosome <- as.factor(mcols(hmm.gr)$chromosome)
13
+		return(hmm.gr)
17 14
 	}
18 15
 
16
+	## Load models
17
+	hmm.list <- loadHmmsFromFiles(hmm.list)
18
+
19 19
 	## Transform to GRanges
20
-	cat('transforming to GRanges\n')
21
-	if (numCPU > 1) {
22
-		library(doParallel)
23
-		cl <- makeCluster(numCPU)
24
-		registerDoParallel(cl)
25
-		cfun <- function(...) { GRangesList(...) }
26
-		uni.hmm.grl <- foreach (uni.hmm = uni.hmm.list, .packages='aneufinder', .combine='cfun', .multicombine=TRUE) %dopar% {
27
-			hmm2GRanges(uni.hmm, reduce=T)
28
-		}
29
-		consensus.gr <- disjoin(unlist(uni.hmm.grl))
30
-		constates <- foreach (uni.hmm.gr = uni.hmm.grl, .packages='GenomicRanges', .combine='cbind') %dopar% {
31
-			splt <- split(uni.hmm.gr, mcols(uni.hmm.gr)$state)
32
-			mind <- as.matrix(findOverlaps(consensus.gr, splt, select='first'))
33
-		}
34
-		stopCluster(cl)
35
-	} else {
36
-		uni.hmm.grl <- GRangesList()
37
-		for (uni.hmm in uni.hmm.list) {
38
-			uni.hmm.grl[[length(uni.hmm.grl)+1]] <- hmm2GRanges(uni.hmm, reduce=T)
39
-		}
40
-		consensus.gr <- disjoin(unlist(uni.hmm.grl))
41
-		constates <- matrix(NA, ncol=length(uni.hmm.grl), nrow=length(uni.hmm.grl[[1]]))
42
-		for (i1 in 1:length(uni.hmm.grl)) {
43
-			uni.hmm.gr <- uni.hmm.grl[[i1]]
44
-			splt <- split(uni.hmm.gr, mcols(uni.hmm.gr)$state)
45
-			mind <- as.matrix(findOverlaps(consensus.gr, splt, select='first'))
46
-			constates[,i1] <- mind
47
-		}
48
-	}
49
-	meanstates <- apply(constates, 1, mean)
50
-	mcols(consensus.gr) <- meanstates
20
+	temp <- hmmList2GRangesList(hmm.list, reduce=T, numCPU=numCPU, consensus=T)
21
+	consensus.gr <- insertchr(temp$consensus)
22
+	hmm.grl <- lapply(temp$grl, insertchr)
51 23
 
52 24
 	# Variables
53
-	nummod <- length(uni.hmm.list)
25
+	nummod <- length(hmm.list)
26
+	numstates <- hmm.list[[1]]$num.states
54 27
 	file <- paste(file,"bed", sep=".")
55 28
 
56 29
 	# Generate the colors
57
-	colors <- state.colors[state.labels]
30
+	colors <- state.colors[levels(hmm.grl[[1]]$state)]
58 31
 	RGBs <- t(col2rgb(colors))
59 32
 	RGBs <- apply(RGBs,1,paste,collapse=",")
33
+	cf <- colorRamp(colors, space='rgb', interpolate='linear')
34
+	consensusRGBs <- round(cf((mcols(consensus.gr)$meanstate-1)/(numstates-1)), 0)
35
+	itemconsensusRgb <- apply(consensusRGBs,1,paste,collapse=",")
60 36
 
61 37
 	# Write first line to file
62
-	cat("browser hide all\n", file=file)
38
+	cat('writing to file',file,'\n')
39
+# 	cat("browser hide all\n", file=file)
40
+	cat("", file=file)
63 41
 	
64 42
 	### Write every model to file ###
65 43
 	for (imod in 1:nummod) {
66
-		uni.hmm <- uni.hmm.list[[imod]]
67
-		uni.hmm.gr <- uni.hmm.grl[[imod]]
68
-		priority <- 50 + imod
69
-		cat(paste("track name=",uni.hmm$ID," description=\"univariate calls for ",names(uni.hmm.list)[imod],"\" visibility=1 itemRgb=On priority=",priority,"\n", sep=""), file=file, append=TRUE)
70
-
71
-		# Change chromosome names from '1' to 'chr1' if necessary
72
-		mask <- which(!grepl('chr', seqnames(uni.hmm.gr)))
73
-		mcols(uni.hmm.gr)$chromosome <- as.character(seqnames(uni.hmm.gr))
74
-		mcols(uni.hmm.gr)$chromosome[mask] <- sub(pattern='^', replacement='chr', mcols(uni.hmm.gr)$chromosome[mask])
75
-		mcols(uni.hmm.gr)$chromosome <- as.factor(mcols(uni.hmm.gr)$chromosome)
76
-
77
-		# Collapse the calls
78
-		collapsed.calls <- as.data.frame(uni.hmm.gr)[c('chromosome','start','end','state')]
79
-
80
-		# Append to file
44
+		cat('writing hmm',imod,'/',nummod,'\r')
45
+		hmm <- hmm.list[[imod]]
46
+		hmm.gr <- hmm.grl[[imod]]
47
+		priority <- 51 + imod
48
+		cat(paste0("track name=",hmm$ID," state"," description=\"",hmm$ID," state","\" visibility=1 itemRgb=On priority=",priority,"\n"), file=file, append=TRUE)
49
+		collapsed.calls <- as.data.frame(hmm.gr)[c('chromosome','start','end','state')]
81 50
 		itemRgb <- RGBs[as.character(collapsed.calls$state)]
82 51
 		numsegments <- nrow(collapsed.calls)
83 52
 		df <- cbind(collapsed.calls, score=rep(0,numsegments), strand=rep(".",numsegments), thickStart=collapsed.calls$start, thickEnd=collapsed.calls$end, itemRgb=itemRgb)
84 53
 		write.table(format(df, scientific=FALSE), file=file, append=TRUE, row.names=FALSE, col.names=FALSE, quote=FALSE)
85 54
 	}
55
+	cat('\n')
56
+
57
+	### Write consensus model to file ###
58
+	cat('writing mean states to file\n')
59
+	priority <- 50
60
+	cat(paste0("track name=\"","average state","\" description=\"","average state","\" visibility=1 itemRgb=On priority=",priority,"\n"), file=file, append=TRUE)
61
+	collapsed.calls <- as.data.frame(consensus.gr)[c('chromosome','start','end','meanstate')]
62
+	numsegments <- nrow(collapsed.calls)
63
+	df <- cbind(collapsed.calls, score=rep(0,numsegments), strand=rep(".",numsegments), thickStart=collapsed.calls$start, thickEnd=collapsed.calls$end, itemRgb=itemconsensusRgb)
64
+	write.table(format(df, scientific=FALSE), file=file, append=TRUE, row.names=FALSE, col.names=FALSE, quote=FALSE)
86 65
 
87 66
 }
88 67
 
89 68
 
90
-# ====================================
91
-# Write signal tracks from binned data
92
-# ====================================
93
-export.binned2wiggle <- function(binned.data.list, file="view_me_in_genome_browser") {
94
-	
95
-	# Check user input
96
-	if (is.null(names(binned.data.list))) {
97
-		stop("Please name the list entries. The names will be used as track name for the Genome Browser file.")
69
+# =============================
70
+# Write signal tracks from hmms
71
+# =============================
72
+export.hmm2wiggle <- function(hmm.list, file="view_me_in_genome_browser", numCPU=1) {
73
+
74
+	## Function definitions
75
+	insertchr <- function(hmm.gr) {
76
+		# Change chromosome names from '1' to 'chr1' if necessary
77
+		mask <- which(!grepl('chr', seqnames(hmm.gr)))
78
+		mcols(hmm.gr)$chromosome <- as.character(seqnames(hmm.gr))
79
+		mcols(hmm.gr)$chromosome[mask] <- sub(pattern='^', replacement='chr', mcols(hmm.gr)$chromosome[mask])
80
+		mcols(hmm.gr)$chromosome <- as.factor(mcols(hmm.gr)$chromosome)
81
+		return(hmm.gr)
98 82
 	}
99 83
 
84
+	## Load models
85
+	hmm.list <- loadHmmsFromFiles(hmm.list)
86
+
87
+	## Transform to GRanges
88
+	temp <- hmmList2GRangesList(hmm.list, reduce=F, numCPU=numCPU, consensus=T)
89
+	hmm.grl <- temp$grl
90
+	consensus.gr <- insertchr(temp$consensus)
91
+	hmm.grl <- lapply(hmm.grl, insertchr)
92
+
100 93
 	# Variables
101
-	file <- paste(file,"wig", sep=".")
94
+	nummod <- length(hmm.list)
95
+	numstates <- hmm.list[[1]]$num.states
96
+	file <- paste(file,"wiggle", sep=".")
102 97
 
103
-	# Write to file
104
-	cat("browser hide all\n", file=file)
98
+	# Write first line to file
99
+	cat('writing to file',file,'\n')
100
+# 	cat("browser hide all\n", file=file)
101
+	cat("", file=file)
105 102
 	
106
-	# Go through binned.data.list
107
-	for (i1 in 1:length(binned.data.list)) {
108
-		binned.data <- binned.data.list[[i1]]
109
-		names(binned.data) <- binned.data.names
110
-		
111
-		# Write track information
112
-		name <- names(binned.data.list)[i1]
113
-		binsize <- diff(binned.data$start[1:2])
114
-		description <- paste("binned read counts ",binsize,"bp", sep="")
115
-		cat(paste("track type=wiggle_0 name=\"",name,"\" description=\"",description,"\" visibility=full autoScale=on color=90,90,90 maxHeightPixels=100:50:20 graphType=bar priority=",i1,"\n", sep=""), file=file, append=TRUE)
103
+	### Write every model to file ###
104
+	for (imod in 1:nummod) {
105
+		cat('writing hmm',imod,'/',nummod,'\r')
106
+		hmm <- hmm.list[[imod]]
107
+		hmm.gr <- hmm.grl[[imod]]
108
+		priority <- 50 + imod
109
+		binsize <- width(hmm.gr[1])
110
+		cat(paste("track type=wiggle_0 name=\"",hmm$ID," read count","\" description=\"",hmm$ID," read count","\" visibility=full autoScale=on color=90,90,90 maxHeightPixels=100:50:20 graphType=bar priority=",priority,"\n", sep=""), file=file, append=TRUE)
116 111
 		# Write read data
117
-		for (chrom in unique(binned.data$chrom)) {
112
+		for (chrom in unique(hmm.gr$chromosome)) {
118 113
 			cat(paste("fixedStep chrom=",chrom," start=1 step=",binsize," span=",binsize,"\n", sep=""), file=file, append=TRUE)
119
-			write.table(binned.data$reads, file=file, append=TRUE, row.names=FALSE, col.names=FALSE)
114
+			write.table(mcols(hmm.gr[hmm.gr$chromosome==chrom])$reads, file=file, append=TRUE, row.names=FALSE, col.names=FALSE)
120 115
 		}
121 116
 	}
122
-
117
+	cat('\n')
123 118
 }
124 119
 
125 120
 
... ...
@@ -1,7 +1,11 @@
1
-findCNVs <- function(binned.data, ID, eps=0.001, init="standard", max.time=-1, max.iter=-1, num.trials=1, eps.try=NULL, num.threads=1, output.if.not.converged=FALSE, filter.reads=TRUE) {
1
+findCNVs <- function(binned.data, ID, eps=0.001, init="random", max.time=-1, max.iter=-1, num.trials=1, eps.try=NULL, num.threads=1, output.if.not.converged=FALSE, read.cutoff.quantile=0.999) {
2 2
 
3 3
 	## Intercept user input
4 4
 	IDcheck <- ID  #trigger error if not defined
5
+	if (class(binned.data) != 'GRanges') {
6
+		binned.data <- get(load(binned.data))
7
+		if (class(binned.data) != 'GRanges') stop("argument 'binned.data' expects a GRanges with meta-column 'reads' or a file that contains such an object")
8
+	}
5 9
 	if (check.positive(eps)!=0) stop("argument 'eps' expects a positive numeric")
6 10
 	if (check.integer(max.time)!=0) stop("argument 'max.time' expects an integer")
7 11
 	if (check.integer(max.iter)!=0) stop("argument 'max.iter' expects an integer")
... ...
@@ -28,28 +32,58 @@ findCNVs <- function(binned.data, ID, eps=0.001, init="standard", max.time=-1, m
28 32
 	}
29 33
 
30 34
 	# Filter high reads out, makes HMM faster
31
-	read.cutoff <- as.integer(quantile(reads, 0.9999))
32
-	if (filter.reads) {
33
-		mask <- reads > read.cutoff
34
-		reads[mask] <- read.cutoff
35
-		numfiltered <- length(which(mask))
36
-		cat(paste0("Replaced read counts > ",read.cutoff," (99.99% quantile) by ",read.cutoff," in ",numfiltered," bins. Set option 'filter.reads=FALSE' to disable this filtering.\n"))
37
-# 		if (numfiltered > 0) {
38
-# 			warning(paste("There are very high read counts in your data (probably artificial). Replaced read counts > ",read.cutoff," (99.99% quantile) by ",read.cutoff," in ",numfiltered," bins. Set option 'filter.reads=FALSE' to disable this filtering.", sep=""))
39
-# 		}
35
+	read.cutoff <- as.integer(quantile(reads, read.cutoff.quantile))
36
+	mask <- reads > read.cutoff
37
+	reads[mask] <- read.cutoff
38
+	numfiltered <- length(which(mask))
39
+	if (numfiltered > 0) {
40
+		cat(paste0("Replaced read counts > ",read.cutoff," (",names(read.cutoff)," quantile) by ",read.cutoff," in ",numfiltered," bins. Set option 'read.cutoff.quantile=1' to disable this filtering.\n"))
40 41
 	}
41 42
 	
42
-	
43 43
 	## Call univariate in a for loop to enable multiple trials
44 44
 	modellist <- list()
45 45
 	for (i_try in 1:num.trials) {
46 46
 		cat("\n\nTry ",i_try," of ",num.trials," ------------------------------\n")
47
+
48
+		## Initial parameters
49
+		if (init == 'random') {
50
+			A.initial <- matrix(runif(numstates^2), ncol=numstates)
51
+			A.initial <- sweep(A.initial, 1, rowSums(A.initial), "/")			
52
+			proba.initial <- runif(numstates)
53
+			size.initial <- runif(1, min=0, max=1000) * cumsum(ifelse(state.distributions=='dnbinom' | state.distributions=='dbinom', T, F))
54
+			prob.initial <- runif(1) * ifelse(state.distributions=='dnbinom', T, F)
55
+			prob.initial[state.distributions=='dgeom'] <- runif(1)
56
+			lambda.initial <- runif(1, min=0, max=1000) * cumsum(ifelse(state.distributions=='dpois', T, F))
57
+		} else if (init == 'standard') {
58
+			A.initial <- matrix(NA, ncol=numstates, nrow=numstates)
59
+			for (irow in 1:numstates) {
60
+				for (icol in 1:numstates) {
61
+					if (irow==icol) { A.initial[irow,icol] <- 0.9 }
62
+					else { A.initial[irow,icol] <- 0.1/(numstates-1) }
63
+				}
64
+			}
65
+			proba.initial <- rep(1/numstates, numstates)
66
+			mean.initial <- mean(reads[reads>0])/2 * cumsum(ifelse(state.distributions=='dnbinom' | state.distributions=='dbinom', T, F))
67
+			var.initial <- var(reads[reads>0])/2 * cumsum(ifelse(state.distributions=='dnbinom' | state.distributions=='dbinom', T, F))
68
+			size.initial <- rep(0,numstates)
69
+			prob.initial <- rep(0,numstates)
70
+			mask <- state.distributions=='dnbinom'
71
+			size.initial[mask] <- dnbinom.size(mean.initial[mask], var.initial[mask])
72
+			prob.initial[mask] <- dnbinom.prob(mean.initial[mask], var.initial[mask])
73
+			mask <- state.distributions=='dbinom'
74
+			size.initial[mask] <- dbinom.size(mean.initial[mask], var.initial[mask])
75
+			prob.initial[mask] <- dbinom.prob(mean.initial[mask], var.initial[mask])
76
+			prob.initial[state.distributions=='dgeom'] <- 0.9
77
+			lambda.initial <- mean(reads[reads>0])/2 * cumsum(ifelse(state.distributions=='dpois', T, F))
78
+		}
79
+	
47 80
 		hmm <- .C("R_univariate_hmm",
48 81
 			reads = as.integer(reads), # int* O
49 82
 			num.bins = as.integer(numbins), # int* T
50 83
 			num.states = as.integer(numstates), # int* N
51 84
 			size = double(length=numstates), # double* size
52 85
 			prob = double(length=numstates), # double* prob
86
+			lambda = double(length=numstates), # double* lambda
53 87
 			num.iterations = as.integer(max.iter), #  int* maxiter
54 88
 			time.sec = as.integer(max.time), # double* maxtime
55 89
 			loglik.delta = as.double(eps.try), # double* eps
... ...
@@ -60,11 +94,12 @@ findCNVs <- function(binned.data, ID, eps=0.001, init="standard", max.time=-1, m
60 94
 			weights = double(length=numstates), # double* weights
61 95
 			distr.type = as.integer(state.distributions), # int* distr_type
62 96
 			ini.proc = as.integer(iniproc), # int* iniproc
63
-			size.initial = double(length=numstates), # double* initial_size
64
-			prob.initial = double(length=numstates), # double* initial_prob
65
-			A.initial = double(length=numstates*numstates), # double* initial_A
66
-			proba.initial = double(length=numstates), # double* initial_proba
67
-			use.initial.params = as.logical(0), # bool* use_initial_params
97
+			size.initial = as.vector(size.initial), # double* initial_size
98
+			prob.initial = as.vector(prob.initial), # double* initial_prob
99
+			lambda.initial = as.vector(lambda.initial), # double* initial_lambda
100
+			A.initial = as.vector(A.initial), # double* initial_A
101
+			proba.initial = as.vector(proba.initial), # double* initial_proba
102
+			use.initial.params = as.logical(1), # bool* use_initial_params
68 103
 			num.threads = as.integer(num.threads), # int* num_threads
69 104
 			error = as.integer(0), # int* error (error handling)
70 105
 			read.cutoff = as.integer(read.cutoff) # int* read_cutoff
... ...
@@ -95,6 +130,7 @@ findCNVs <- function(binned.data, ID, eps=0.001, init="standard", max.time=-1, m
95 130
 			num.states = as.integer(numstates), # int* N
96 131
 			size = double(length=numstates), # double* size
97 132
 			prob = double(length=numstates), # double* prob
133
+			lambda = double(length=numstates), # double* lambda
98 134
 			num.iterations = as.integer(max.iter), #  int* maxiter
99 135
 			time.sec = as.integer(max.time), # double* maxtime
100 136
 			loglik.delta = as.double(eps), # double* eps
... ...
@@ -107,6 +143,7 @@ findCNVs <- function(binned.data, ID, eps=0.001, init="standard", max.time=-1, m
107 143
 			ini.proc = as.integer(iniproc), # int* iniproc
108 144
 			size.initial = as.vector(hmm$size), # double* initial_size
109 145
 			prob.initial = as.vector(hmm$prob), # double* initial_prob
146
+			lambda.initial = as.vector(hmm$lambda), # double* initial_lambda
110 147
 			A.initial = as.vector(hmm$A), # double* initial_A
111 148
 			proba.initial = as.vector(hmm$proba), # double* initial_proba
112 149
 			use.initial.params = as.logical(1), # bool* use_initial_params
... ...
@@ -117,6 +154,7 @@ findCNVs <- function(binned.data, ID, eps=0.001, init="standard", max.time=-1, m
117 154
 	}
118 155
 
119 156
 	# Add useful entries
157
+	hmm$read.cutoff <- read.cutoff
120 158
 	hmm$ID <- ID
121 159
 	names(hmm$weights) <- state.labels
122 160
 	hmm$coordinates <- data.frame(as.character(seqnames(binned.data)), start(ranges(binned.data)), end(ranges(binned.data)))
... ...
@@ -125,32 +163,44 @@ findCNVs <- function(binned.data, ID, eps=0.001, init="standard", max.time=-1, m
125 163
 	class(hmm) <- class.aneufinder.hmm
126 164
 	hmm$states <- factor(state.labels, levels=state.labels)[hmm$states+1]
127 165
 	hmm$eps <- eps
128
-	hmm$A <- matrix(hmm$A, ncol=hmm$num.states, byrow=TRUE)
166
+	hmm$A <- matrix(hmm$A, ncol=hmm$num.states)
129 167
 	rownames(hmm$A) <- state.labels
130 168
 	colnames(hmm$A) <- state.labels
131
-	hmm$distributions <- data.frame(type=state.distributions, size=hmm$size, prob=hmm$prob, mu=fmean(hmm$size,hmm$prob), variance=fvariance(hmm$size,hmm$prob))
132
-	rownames(hmm$distributions) <- state.labels
133
-	# Treat 'null-mixed' separately
134
-	if ('null-mixed' %in% state.labels) {
135
-		hmm$distributions['null-mixed','mu'] <- (1-hmm$distributions['null-mixed','prob'])/hmm$distributions['null-mixed','prob']
136
-		hmm$distributions['null-mixed','variance'] <- hmm$distributions['null-mixed','mu']/hmm$distributions['null-mixed','prob']
137
-		hmm$distributions['null-mixed','size'] <- NA
169
+	hmm$distributions <- data.frame()
170
+	hmm$distributions.initial <- data.frame()
171
+	for (idistr in 1:length(hmm$distr.type)) {
172
+		distr <- levels(state.distributions)[hmm$distr.type[idistr]]
173
+		if (distr == 'dnbinom') {
174
+			hmm$distributions <- rbind(hmm$distributions, data.frame(type=distr, size=hmm$size[idistr], prob=hmm$prob[idistr], lambda=NA, mu=dnbinom.mean(hmm$size[idistr],hmm$prob[idistr]), variance=dnbinom.variance(hmm$size[idistr],hmm$prob[idistr])))
175
+			hmm$distributions.initial <- rbind(hmm$distributions.initial, data.frame(type=distr, size=hmm$size.initial[idistr], prob=hmm$prob.initial[idistr], lambda=NA, mu=dnbinom.mean(hmm$size.initial[idistr],hmm$prob.initial[idistr]), variance=dnbinom.variance(hmm$size.initial[idistr],hmm$prob.initial[idistr])))
176
+		} else if (distr == 'dgeom') {
177
+			hmm$distributions <- rbind(hmm$distributions, data.frame(type=distr, size=NA, prob=hmm$prob[idistr], lambda=NA, mu=dgeom.mean(hmm$prob[idistr]), variance=dgeom.variance(hmm$prob[idistr])))
178
+			hmm$distributions.initial <- rbind(hmm$distributions.initial, data.frame(type=distr, size=NA, prob=hmm$prob.initial[idistr], lambda=NA, mu=dgeom.mean(hmm$prob.initial[idistr]), variance=dgeom.variance(hmm$prob.initial[idistr])))
179
+		} else if (distr == 'delta') {
180
+			hmm$distributions <- rbind(hmm$distributions, data.frame(type=distr, size=NA, prob=NA, lambda=NA, mu=0, variance=0))
181
+			hmm$distributions.initial <- rbind(hmm$distributions.initial, data.frame(type=distr, size=NA, prob=NA, lambda=NA, mu=0, variance=0))
182
+		} else if (distr == 'dpois') {
183
+			hmm$distributions <- rbind(hmm$distributions, data.frame(type=distr, size=NA, prob=NA, lambda=hmm$lambda[idistr], mu=hmm$lambda[idistr], variance=hmm$lambda[idistr]))
184
+			hmm$distributions.initial <- rbind(hmm$distributions.initial, data.frame(type=distr, size=NA, prob=NA, lambda=hmm$lambda.initial[idistr], mu=hmm$lambda.initial[idistr], variance=hmm$lambda.initial[idistr]))
185
+		} else if (distr == 'dbinom') {
186
+			hmm$distributions <- rbind(hmm$distributions, data.frame(type=distr, size=hmm$size[idistr], prob=hmm$prob[idistr], lambda=NA, mu=dbinom.mean(hmm$size[idistr],hmm$prob[idistr]), variance=dbinom.variance(hmm$size[idistr],hmm$prob[idistr])))
187
+			hmm$distributions.initial <- rbind(hmm$distributions.initial, data.frame(type=distr, size=hmm$size.initial[idistr], prob=hmm$prob.initial[idistr], lambda=NA, mu=dbinom.mean(hmm$size.initial[idistr],hmm$prob.initial[idistr]), variance=dbinom.variance(hmm$size.initial[idistr],hmm$prob.initial[idistr])))
188
+		}
138 189
 	}
139
-	hmm$A.initial <- matrix(hmm$A.initial, ncol=hmm$num.states, byrow=TRUE)
190
+	rownames(hmm$distributions) <- state.labels
191
+	rownames(hmm$distributions.initial) <- state.labels
192
+	hmm$A.initial <- matrix(hmm$A.initial, ncol=hmm$num.states)
140 193
 	rownames(hmm$A.initial) <- state.labels
141 194
 	colnames(hmm$A.initial) <- state.labels
142
-	hmm$distributions.initial <- data.frame(type=state.distributions, size=hmm$size.initial, prob=hmm$prob.initial, mu=fmean(hmm$size.initial,hmm$prob.initial), variance=fvariance(hmm$size.initial,hmm$prob.initial))
143
-	rownames(hmm$distributions.initial) <- state.labels
144
-	hmm$distributions.initial['nullsomy',2:5] <- c(0,1,0,0)
145
-	hmm$filter.reads <- filter.reads
146 195
 
147 196
 	# Delete redundant entries
148 197
 	hmm$size <- NULL
149 198
 	hmm$prob <- NULL
199
+	hmm$lambda <- NULL
150 200
 	hmm$size.initial <- NULL
151 201
 	hmm$prob.initial <- NULL
202
+	hmm$lambda.initial <- NULL
152 203
 	hmm$use.initial.params <- NULL
153
-	hmm$read.cutoff <- NULL
154 204
 	hmm$distr.type <- NULL
155 205
 
156 206
 	# Issue warnings
... ...
@@ -160,7 +210,7 @@ findCNVs <- function(binned.data, ID, eps=0.001, init="standard", max.time=-1, m
160 210
 		}
161 211
 	}
162 212
 	if (hmm$error == 1) {
163
-		stop("A nan occurred during the Baum-Welch! Parameter estimation terminated prematurely. Check your read counts for very high numbers, they could be the cause for this problem.")
213
+		stop("A nan occurred during the Baum-Welch! Parameter estimation terminated prematurely. Check your read counts for very high numbers, they could be the cause for this problem. Try again with a lower value for 'read.cutoff.quantile'.")
164 214
 	} else if (hmm$error == 2) {
165 215
 		stop("An error occurred during the Baum-Welch! Parameter estimation terminated prematurely.")
166 216
 	}
... ...
@@ -1,8 +1,8 @@
1 1
 # =======================================================
2 2
 # Some global variables that can be used in all functions
3 3
 # =======================================================
4
-state.labels <- c("nullsomy","null-mixed","monosomy","disomy","trisomy","tetrasomy","multisomy")
5
-state.distributions <- factor(c('delta','dgeom','dnbinom','dnbinom','dnbinom','dnbinom','dnbinom'), levels=c('delta','dgeom','dnbinom'))
4
+state.labels <- c("nullsomy","monosomy","disomy","trisomy","tetrasomy","multisomy")
5
+state.distributions <- factor(c('delta','dnbinom','dnbinom','dnbinom','dnbinom','dnbinom'), levels=c('delta','dgeom','dnbinom','dpois','dbinom'))
6 6
 coordinate.names <- c("chrom","start","end")
7 7
 binned.data.names <- c(coordinate.names,"reads")
8 8
 class.aneufinder.hmm <- "aneufinder.hmm"
... ...
@@ -13,18 +13,44 @@ get.state.colors <- function() { return(state.colors[state.labels]) }
13 13
 # ============================================================================
14 14
 # Functions for a Negative Binomial to transform (mean,variance)<->(size,prob)
15 15
 # ============================================================================
16
-fsize <- function(mean, variance) {
16
+dnbinom.size <- function(mean, variance) {
17 17
 	return(mean^2 / (variance - mean))
18 18
 }
19 19
 
20
-fprob <- function(mean, variance) {
20
+dnbinom.prob <- function(mean, variance) {
21 21
 	return(mean/variance)
22 22
 }
23 23
 
24
-fmean <- function(size, prob) {
24
+dnbinom.mean <- function(size, prob) {
25 25
 	return(size/prob - size)
26 26
 }
27 27
 
28
-fvariance <- function(size, prob) {
28
+dnbinom.variance <- function(size, prob) {
29 29
 	return( (size - prob*size) / prob^2 )
30 30
 }
31
+
32
+dgeom.mean <- function(prob) {
33
+	return( (1-prob)/prob )
34
+}
35
+
36
+dgeom.variance <- function(prob) {
37
+	return( (1-prob)/prob^2 )
38
+}
39
+
40
+dbinom.size <- function(mean, variance) {
41
+	return( mean^2/(mean-variance) )
42
+}
43
+
44
+dbinom.prob <- function(mean, variance) {
45
+	return( (mean-variance)/mean )
46
+}
47
+
48
+dbinom.mean <- function(size, prob) {
49
+	return( size*prob )
50
+}
51
+
52
+dbinom.variance <- function(size, prob) {
53
+	return( size*prob * (1-prob) )
54
+}
55
+
56
+
... ...
@@ -80,6 +80,15 @@ plot.distribution <- function(model, state=NULL, chrom=NULL, start=NULL, end=NUL
80 80
 		} else if (model$distributions[istate,'type']=='dnbinom') {
81 81
 			# negative binomials
82 82
 			distributions[[length(distributions)+1]] <- weights[istate] * dnbinom(x, model$distributions[istate,'size'], model$distributions[istate,'prob'])
83
+		} else if (model$distributions[istate,'type']=='dpois') {
84
+			# poissons
85
+			distributions[[length(distributions)+1]] <- weights[istate] * dpois(x, model$distributions[istate,'lambda'])
86
+		} else if (model$distributions[istate,'type']=='dbinom') {
87
+			# binomials
88
+			s <- model$distributions[istate,'size']
89
+			p <- model$distributions[istate,'prob']
90
+# 			distributions[[length(distributions)+1]] <- weights[istate] * dbinom(x, model$distributions[istate,'size'], model$distributions[istate,'prob'])	# only defined for integer 'size'
91
+			distributions[[length(distributions)+1]] <- weights[istate] * choose(s,x) * p^x * (1-p)^(s-x)
83 92
 		}
84 93
 	}
85 94
 	distributions <- as.data.frame(distributions)
... ...
@@ -23,6 +23,7 @@ The HMM object is output of the function \code{\link{findCNVs}} and is basically
23 23
 \item{proba.initial}{Initial \option{proba} at the beginning of the Baum-Welch.}
24 24
 \item{num.threads}{Number of threads that were used in the Baum-Welch.}
25 25
 \item{error}{This is 0 unless an error occurred during the Baum-Welch.}
26
+\item{read.cutoff}{The read cutoff as specified by the option \option{read.cutoff.quantile}.}
26 27
 \item{eps}{Convergence threshold for the Baum-Welch.}
27 28
 \item{distributions}{Estimated parameters of the employed distributions. See \code{\link{findCNVs}} for a description of which distribution is used for which state.}
28 29
 \item{distributions.initial}{Distribution parameters at the beginning of the Baum-Welch.}
... ...
@@ -24,13 +24,13 @@ Initialization procedure for the distributions. Currently only 'standard' is imp
24 24
 \item{output.if.not.converged}{
25 25
 If set to FALSE, no output will be given in the case that the Baum-Welch did not converge to the desired \option{eps}. If set to TRUE, a model will be returned even if the Baum-Welch did not converge to the desired \option{eps}.
26 26
 }
27
-\item{filter.reads}{
28
-If set to TRUE, then read counts which are above the 99.99\% quantile of the data are set to this value. A warning is issued in this case. This filtering can increase the speed of the Baum-Welch and is unlikely to affect the results.
27
+\item{read.cutoff.quantile}{
28
+Read counts which are above this quantile are replaced by the corresponding value. This filtering can increase the speed of the Baum-Welch and is unlikely to affect the results. It may also be helpful to specify a lower cutoff if you are getting the error message 'A nan occurred during the Baum-Welch! ...'.
29 29
 }
30 30
 }
31 31
 
32 32
 \details{
33
-The Hidden Markov Model which is used to classify the bins uses 7 states: state 'nullsomy' with a delta function as emission densitiy (only zero read counts), 'null-mixed' with a geometric distribution to account, 'monosomy','disomy','trisomy','tetrasomy' and 'multisomy' with Negative Binomials as emission densities. A Baum-Welch algorithm is employed to estimate the parameters of the distributions. See our paper for a detailed description of the method.
33
+The Hidden Markov Model which is used to classify the bins uses 7 states: state 'nullsomy' with a delta function as emission densitiy (only zero read counts), 'null-mixed' with a geometric distribution (see \code{\link{dgeom}}), 'monosomy','disomy','trisomy','tetrasomy' and 'multisomy' with negative binomials (see \code{\link{dnbinom}}) as emission densities. A Baum-Welch algorithm is employed to estimate the parameters of the distributions. See our paper for a detailed description of the method.
34 34
 TODO: insert paper
35 35
 }
36 36
 \value{
... ...
@@ -1,13 +1,13 @@
1 1
 #include "utility.h"
2 2
 #include "scalehmm.h"
3 3
 #include "loghmm.h"
4
-#include <omp.h> // parallelization options
4
+// #include <omp.h> // parallelization options
5 5
 
6 6
 // ===================================================================================================================================================
7 7
 // This function takes parameters from R, creates a univariate HMM object, creates the distributions, runs the Baum-Welch and returns the result to R.
8 8
 // ===================================================================================================================================================
9 9
 extern "C" {
10
-void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, int* iniproc, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff)
10
+void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, double* lambda, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, int* iniproc, double* initial_size, double* initial_prob, double* initial_lambda, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff)
11 11
 {
12 12
 
13 13
 	// Define logging level
... ...
@@ -16,8 +16,8 @@ void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* m
16 16
  	FILELog::ReportingLevel() = FILELog::FromString("ERROR");
17 17
 //  	FILELog::ReportingLevel() = FILELog::FromString("DEBUG2");
18 18
 
19
-	// Parallelization settings
20
-	omp_set_num_threads(*num_threads);
19
+// 	// Parallelization settings
20
+// 	omp_set_num_threads(*num_threads);
21 21
 
22 22
 	// Print some information
23 23
 	FILE_LOG(logINFO) << "number of states = " << *N;
... ...
@@ -75,43 +75,7 @@ void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* m
75 75
 	FILE_LOG(logINFO) << "data mean = " << mean << ", data variance = " << variance;		
76 76
 	Rprintf("data mean = %g, data variance = %g\n", mean, variance);		
77 77
 	
78
-	// Go through all states of the hmm and assign the density functions
79
-	// This loop assumes that the negative binomial states come last and are consecutive
80
-	double imean, ivariance;
81
-	for (int i_state=0; i_state<*N; i_state++)
82
-	{
83
-		if (*use_initial_params) {
84
-			FILE_LOG(logINFO) << "Using given parameters for size and prob";
85
-			Rprintf("Using given parameters for size and prob\n");
86
-			imean = (1-initial_prob[i_state])*initial_size[i_state] / initial_prob[i_state];
87
-			ivariance = imean / initial_prob[i_state];
88
-			FILE_LOG(logDEBUG2) << "imean = " << imean;
89
-			FILE_LOG(logDEBUG2) << "ivariance = " << ivariance;
90
-		} else {
91
-
92
-			if (*iniproc == 1)
93
-			{
94
-				// Simple initialization based on data mean, assumed to be the disomic mean
95
-				if (distr_type[i_state] == 1) { }
96
-				else if (distr_type[i_state] == 2) { }
97
-				else if (distr_type[i_state] == 3)
98
-				{
99
-					for (int ii_state=i_state; ii_state<*N; ii_state++)
100
-					{
101
-						imean = mean/2 * (ii_state-i_state+1);
102
-						ivariance = imean * 5;
103
-// 						ivariance = variance/2 * (i_state-1);
104
-						// Calculate r and p from mean and variance
105
-						initial_size[ii_state] = pow(imean,2)/(ivariance-imean);
106
-						initial_prob[ii_state] = imean/ivariance;
107
-					}
108
-					break;
109
-				}
110
-			}
111
-
112
-		}
113
-	}
114
-
78
+	// Create the emission densities and initialize
115 79
 	for (int i_state=0; i_state<*N; i_state++)
116 80
 	{
117 81
 		if (distr_type[i_state] == 1)
... ...
@@ -123,7 +87,7 @@ void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* m
123 87
 		else if (distr_type[i_state] == 2)
124 88
 		{
125 89
 			FILE_LOG(logDEBUG1) << "Using geometric distribution for state " << i_state;
126
-			Geometric *d = new Geometric(O, *T, 0.9); // delete is done inside ~ScaleHMM()
90
+			Geometric *d = new Geometric(O, *T, initial_prob[i_state]); // delete is done inside ~ScaleHMM()
127 91
 			hmm->densityFunctions.push_back(d);
128 92
 		}
129 93
 		else if (distr_type[i_state] == 3)
... ...
@@ -132,6 +96,18 @@ void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* m
132 96
 			NegativeBinomial *d = new NegativeBinomial(O, *T, initial_size[i_state], initial_prob[i_state]); // delete is done inside ~ScaleHMM()
133 97
 			hmm->densityFunctions.push_back(d);
134 98
 		}
99
+		else if (distr_type[i_state] == 4)
100
+		{
101
+			FILE_LOG(logDEBUG1) << "Using poisson for state " << i_state;
102
+			Poisson *d = new Poisson(O, *T, initial_lambda[i_state]); // delete is done inside ~ScaleHMM()
103
+			hmm->densityFunctions.push_back(d);
104
+		}
105
+		else if (distr_type[i_state] == 5)
106
+		{
107
+			FILE_LOG(logDEBUG1) << "Using binomial for state " << i_state;
108
+			NegativeBinomial *d = new NegativeBinomial(O, *T, initial_size[i_state], initial_prob[i_state]); // delete is done inside ~ScaleHMM()
109
+			hmm->densityFunctions.push_back(d);
110
+		}
135 111
 		else
136 112
 		{
137 113
 			FILE_LOG(logWARNING) << "Density not specified, using default negative binomial for state " << i_state;
... ...
@@ -189,7 +165,7 @@ void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* m
189 165
 		proba[i] = hmm->get_proba(i);
190 166
 		for (int j=0; j<*N; j++)
191 167
 		{
192
-			A[i * (*N) + j] = hmm->get_A(i,j);
168
+			A[i * (*N) + j] = hmm->get_A(j,i);
193 169
 		}
194 170
 	}
195 171
 
... ...
@@ -214,6 +190,17 @@ void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* m
214 190
 			size[i] = 0;
215 191
 			prob[i] = 1;
216 192
 		}
193
+		else if (hmm->densityFunctions[i]->get_name() == POISSON)
194
+		{
195
+			Poisson* d = (Poisson*)(hmm->densityFunctions[i]);
196
+			lambda[i] = d->get_lambda();
197
+		}
198
+		else if (hmm->densityFunctions[i]->get_name() == BINOMIAL) 
199
+		{
200
+			Binomial* d = (Binomial*)(hmm->densityFunctions[i]);
201
+			size[i] = d->get_size();
202
+			prob[i] = d->get_prob();
203
+		}
217 204
 	}
218 205
 	*loglik = hmm->get_logP();
219 206
 	hmm->calc_weights(weights);
... ...
@@ -91,6 +91,214 @@ void Normal::set_stdev(double stdev)
91 91
 }
92 92
 
93 93
 
94
+// ============================================================
95
+// Poisson density
96
+// ============================================================
97
+
98
+// Constructor and Destructor ---------------------------------
99
+Poisson::Poisson(int* observations, int T, double lambda)
100
+{
101
+	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
102
+	this->obs = observations;
103
+	this->T = T;
104
+	this->lambda = lambda;
105
+	this->lxfactorials = NULL;
106
+	// Precompute the lxfactorials that are used in computing the densities
107
+	if (this->obs != NULL)
108
+	{
109
+		this->max_obs = intMax(observations, T);
110
+		this->lxfactorials = (double*) calloc(max_obs+1, sizeof(double));
111
+		this->lxfactorials[0] = 0.0;	// Not necessary, already 0 because of calloc
112
+		this->lxfactorials[1] = 0.0;
113
+		for (int j=2; j<=max_obs; j++)
114
+		{
115
+			this->lxfactorials[j] = this->lxfactorials[j-1] + log(j);
116
+		}
117
+	}
118
+}
119
+
120
+Poisson::~Poisson()
121
+{
122
+	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
123
+	if (this->lxfactorials != NULL)
124
+	{
125
+		free(this->lxfactorials);
126
+	}
127
+}
128
+
129
+// Methods ----------------------------------------------------
130
+void Poisson::calc_logdensities(double* logdens)
131
+{
132
+	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
133
+	double logl = log(this->lambda);
134
+	double l = this->lambda;
135
+	double lxfactorial;
136
+	// Select strategy for computing densities
137
+	if (this->max_obs <= this->T)
138
+	{
139
+		FILE_LOG(logDEBUG2) << "Precomputing densities in " << __func__ << " for every obs[t], because max(O)<=T";
140
+		double logdens_per_read [this->max_obs+1];
141
+		for (int j=0; j<=this->max_obs; j++)
142
+		{
143
+			logdens_per_read[j] = j*logl - l - this->lxfactorials[j];
144
+		}
145
+		for (int t=0; t<this->T; t++)
146
+		{
147
+			logdens[t] = logdens_per_read[(int) this->obs[t]];
148
+			FILE_LOG(logDEBUG4) << "logdens["<<t<<"] = " << logdens[t];
149
+			if (isnan(logdens[t]))
150
+			{
151
+				FILE_LOG(logERROR) << __PRETTY_FUNCTION__;
152
+				FILE_LOG(logERROR) << "logdens["<<t<<"] = "<< logdens[t];
153
+				throw nan_detected;
154
+			}
155
+		}
156
+	}
157
+	else
158
+	{
159
+		FILE_LOG(logDEBUG2) << "Computing densities in " << __func__ << " for every t, because max(O)>T";
160
+		for (int t=0; t<this->T; t++)
161
+		{
162
+			lxfactorial = this->lxfactorials[(int) this->obs[t]];
163
+			logdens[t] = this->obs[t]*logl - l - lxfactorial;
164
+			FILE_LOG(logDEBUG4) << "logdens["<<t<<"] = " << logdens[t];
165
+			if (isnan(logdens[t]))
166
+			{
167
+				FILE_LOG(logERROR) << __PRETTY_FUNCTION__;
168
+				FILE_LOG(logERROR) << "logdens["<<t<<"] = "<< logdens[t];
169
+				throw nan_detected;
170
+			}
171
+		}
172
+	}
173
+} 
174
+
175
+void Poisson::calc_densities(double* dens)
176
+{
177
+	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
178
+	double logl = log(this->lambda);
179
+	double l = this->lambda;
180
+	double lxfactorial;
181
+	// Select strategy for computing densities
182
+	if (this->max_obs <= this->T)
183
+	{
184
+		FILE_LOG(logDEBUG2) << "Precomputing densities in " << __func__ << " for every obs[t], because max(O)<=T";
185
+		double dens_per_read [this->max_obs+1];
186
+		for (int j=0; j<=this->max_obs; j++)
187
+		{
188
+			dens_per_read[j] = exp( j*logl - l - this->lxfactorials[j] );
189
+		}
190
+		for (int t=0; t<this->T; t++)
191
+		{
192
+			dens[t] = dens_per_read[(int) this->obs[t]];
193
+			FILE_LOG(logDEBUG4) << "dens["<<t<<"] = " << dens[t];
194
+			if (isnan(dens[t]))
195
+			{
196
+				FILE_LOG(logERROR) << __PRETTY_FUNCTION__;
197
+				FILE_LOG(logERROR) << "dens["<<t<<"] = "<< dens[t];
198
+				throw nan_detected;
199
+			}
200
+		}
201
+	}
202
+	else
203
+	{
204
+		FILE_LOG(logDEBUG2) << "Computing densities in " << __func__ << " for every t, because max(O)>T";
205
+		for (int t=0; t<this->T; t++)
206
+		{
207
+			lxfactorial = this->lxfactorials[(int) this->obs[t]];
208
+			dens[t] = exp( this->obs[t]*logl - l - lxfactorial );
209
+			FILE_LOG(logDEBUG4) << "dens["<<t<<"] = " << dens[t];
210
+			if (isnan(dens[t]))
211
+			{
212
+				FILE_LOG(logERROR) << __PRETTY_FUNCTION__;
213
+				FILE_LOG(logERROR) << "dens["<<t<<"] = "<< dens[t];
214
+				throw nan_detected;
215
+			}
216
+		}
217
+	}
218
+} 
219
+
220
+void Poisson::update(double* weights)
221
+{
222
+	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
223
+	double numerator, denominator;
224
+	// Update lambda
225
+	numerator=denominator=0.0;
226
+// 	clock_t time, dtime;
227
+// 	time = clock();
228
+	for (int t=0; t<this->T; t++)
229
+	{
230
+		numerator += weights[t] * this->obs[t];
231
+		denominator += weights[t];
232
+	}
233
+	this->lambda = numerator/denominator; // Update of size is now done with updated lambda
234
+// 	dtime = clock() - time;
235
+// 	FILE_LOG(logDEBUG1) << "updateL(): "<<dtime<< " clicks";
236
+	FILE_LOG(logDEBUG1) << "l = " << this->lambda;
237
+
238
+}
239
+
240
+void Poisson::update_constrained(double** weights, int fromState, int toState)
241
+{
242
+	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
243
+	FILE_LOG(logDEBUG1) << "l = "<<this->lambda;
244
+	double numerator, denominator;
245
+	// Update lambda
246
+	numerator=denominator=0.0;
247
+// 	clock_t time, dtime;
248
+// 	time = clock();
249
+	for (int i=0; i<toState-fromState; i++)
250
+	{
251
+		for (int t=0; t<this->T; t++)
252
+		{
253
+			numerator += weights[i+fromState][t] * this->obs[t];
254
+			denominator += weights[i+fromState][t] * (i+1);
255
+		}
256
+	}
257
+	this->lambda = numerator/denominator; // Update of size is now done with old lambda
258
+// 	dtime = clock() - time;
259
+// 	FILE_LOG(logDEBUG1) << "updateL(): "<<dtime<< " clicks";
260
+	FILE_LOG(logDEBUG1) << "l = "<<this->lambda;
261
+
262
+}
263
+
264
+// Getter and Setter ------------------------------------------
265
+double Poisson::get_mean()
266
+{
267
+	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
268
+	return( this->lambda );
269
+}
270
+
271
+void Poisson::set_mean(double mean)
272
+{
273
+	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
274
+	this->lambda = mean;
275
+}
276
+
277
+double Poisson::get_variance()
278
+{
279
+	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
280
+	return( this->lambda );
281
+}
282
+
283
+void Poisson::set_variance(double variance)
284
+{
285
+	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
286
+	this->lambda = variance;
287
+}
288
+
289
+DensityName Poisson::get_name()
290
+{
291
+	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
292
+	return(POISSON);
293
+}
294
+
295
+double Poisson::get_lambda()
296
+{
297
+	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
298
+	return(this->lambda);
299
+}
300
+
301
+
94 302
 // ============================================================
95 303
 // Negative Binomial density
96 304
 // ============================================================
... ...
@@ -226,7 +434,7 @@ void NegativeBinomial::update(double* weights)
226 434
 {
227 435
 	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
228 436
 	double eps = 1e-4, kmax;
229
-	double numerator, denominator, rhere, dr, Fr, dFrdr, DigammaR, DigammaRplusDR;
437
+	double numerator, denominator, size0, dSize, F, dFdSize, DigammaSize, DigammaSizePlusDSize;
230 438
 	// Update prob (p)
231 439
 	numerator=denominator=0.0;
232 440
 // 	clock_t time, dtime;
... ...
@@ -236,86 +444,86 @@ void NegativeBinomial::update(double* weights)
236 444
 		numerator+=weights[t]*this->size;
237 445
 		denominator+=weights[t]*(this->size+this->obs[t]);
238 446
 	}
239
-	this->prob = numerator/denominator; // Update of size (r) is now done with updated prob
447
+	this->prob = numerator/denominator; // Update of size is now done with updated prob
240 448
 	double logp = log(this->prob);
241 449
 // 	dtime = clock() - time;
242 450
 // 	FILE_LOG(logDEBUG1) << "updateP(): "<<dtime<< " clicks";
243
-	// Update of size (r) with Newton Method
244
-	rhere = this->size;
245
-	dr = 0.00001;
451
+	// Update of size with Newton Method
452
+	size0 = this->size;
453
+	dSize = 0.00001;
246 454
 	kmax = 20;
247 455
 // 	time = clock();
248 456
 	// Select strategy for computing digammas
249 457
 	if (this->max_obs <= this->T)
250 458
 	{
251 459
 		FILE_LOG(logDEBUG2) << "Precomputing digammas in " << __func__ << " for every obs[t], because max(O)<=T";
252
-		double DigammaRplusX[this->max_obs+1], DigammaRplusDRplusX[this->max_obs+1];
460
+		double DigammaSizePlusX[this->max_obs+1], DigammaSizePlusDSizePlusX[this->max_obs+1];
253 461
 		for (int k=1; k<kmax; k++)
254 462
 		{
255
-			Fr=dFrdr=0.0;
256
-			DigammaR = digamma(rhere); // boost::math::digamma<>(rhere);
257
-			DigammaRplusDR = digamma(rhere + dr); // boost::math::digamma<>(rhere+dr);
463
+			F=dFdSize=0.0;
464
+			DigammaSize = digamma(size0); // boost::math::digamma<>(size0);
465
+			DigammaSizePlusDSize = digamma(size0 + dSize); // boost::math::digamma<>(size0+dSize);
258 466
 			// Precompute the digammas by iterating over all possible values of the observation vector
259 467
 			for (int j=0; j<=this->max_obs; j++)
260 468
 			{
261
-				DigammaRplusX[j] = digamma(rhere+j);
262
-				DigammaRplusDRplusX[j] = digamma(rhere+dr+j);
469
+				DigammaSizePlusX[j] = digamma(size0+j);
470
+				DigammaSizePlusDSizePlusX[j] = digamma(size0+dSize+j);
263 471
 			}
264 472
 			for(int t=0; t<this->T; t++)
265 473
 			{
266 474
 				if(this->obs[t]==0)
267 475
 				{
268
-					Fr+=weights[t]*logp;
269
-					//dFrdr+=0;
476
+					F += weights[t] * logp;
477
+					//dFdSize+=0;
270 478
 				}
271 479
 				if(this->obs[t]!=0)
272 480
 				{
273
-					Fr+=weights[t]*(logp-DigammaR+DigammaRplusX[(int)obs[t]]);
274
-					dFrdr+=weights[t]/dr*(DigammaR-DigammaRplusDR+DigammaRplusDRplusX[(int)obs[t]]-DigammaRplusX[(int)obs[t]]);
481
+					F += weights[t] * (logp - DigammaSize + DigammaSizePlusX[(int)obs[t]]);
482
+					dFdSize += weights[t]/dSize * (DigammaSize - DigammaSizePlusDSize + DigammaSizePlusDSizePlusX[(int)obs[t]] - DigammaSizePlusX[(int)obs[t]]);
275 483
 				}
276 484
 			}
277
-			if(fabs(Fr)<eps)
485
+			if(fabs(F)<eps)
278 486
 {
279 487
 				break;
280 488
 			}
281
-			if(Fr/dFrdr<rhere) rhere=rhere-Fr/dFrdr;
282
-			if(Fr/dFrdr>rhere) rhere=rhere/2.0;
489
+			if(F/dFdSize<size0) size0=size0-F/dFdSize;
490
+			if(F/dFdSize>size0) size0=size0/2.0;
283 491
 		}
284 492
 	}
285 493
 	else
286 494
 	{
287 495
 		FILE_LOG(logDEBUG2) << "Computing digammas in " << __func__ << " for every t, because max(O)>T";
288
-		double DigammaRplusX, DigammaRplusDRplusX;
496
+		double DigammaSizePlusX, DigammaSizePlusDSizePlusX;
289 497
 		for (int k=1; k<kmax; k++)
290 498
 		{
291
-			Fr = dFrdr = 0.0;
292
-			DigammaR = digamma(rhere); // boost::math::digamma<>(rhere);
293
-			DigammaRplusDR = digamma(rhere + dr); // boost::math::digamma<>(rhere+dr);
499
+			F = dFdSize = 0.0;
500
+			DigammaSize = digamma(size0); // boost::math::digamma<>(size0);
501
+			DigammaSizePlusDSize = digamma(size0 + dSize); // boost::math::digamma<>(size0+dSize);
294 502
 			for(int t=0; t<this->T; t++)
295 503
 			{
296
-				DigammaRplusX = digamma(rhere+this->obs[t]); //boost::math::digamma<>(rhere+this->obs[ti]);
297
-				DigammaRplusDRplusX = digamma(rhere+dr+this->obs[t]); // boost::math::digamma<>(rhere+dr+this->obs[ti]);
504
+				DigammaSizePlusX = digamma(size0+this->obs[t]); //boost::math::digamma<>(size0+this->obs[ti]);
505
+				DigammaSizePlusDSizePlusX = digamma(size0+dSize+this->obs[t]); // boost::math::digamma<>(size0+dSize+this->obs[ti]);
298 506
 				if(this->obs[t]==0)
299 507
 				{
300
-					Fr+=weights[t]*logp;
301
-					//dFrdr+=0;
508
+					F+=weights[t]*logp;
509
+					//dFdSize+=0;
302 510
 				}
303 511
 				if(this->obs[t]!=0)
304 512
 				{
305
-					Fr+=weights[t]*(logp-DigammaR+DigammaRplusX);
306
-					dFrdr+=weights[t]/dr*(DigammaR-DigammaRplusDR+DigammaRplusDRplusX-DigammaRplusX);
513
+					F += weights[t] * (logp - DigammaSize + DigammaSizePlusX);
514
+					dFdSize += weights[t]/dSize * (DigammaSize - DigammaSizePlusDSize + DigammaSizePlusDSizePlusX - DigammaSizePlusX);
307 515
 				}
308 516
 			}
309
-			if(fabs(Fr)<eps)
517
+			if(fabs(F)<eps)
310 518
 			{
311 519
 				break;
312 520
 			}
313
-			if(Fr/dFrdr<rhere) rhere=rhere-Fr/dFrdr;
314
-			if(Fr/dFrdr>rhere) rhere=rhere/2.0;
521
+			if(F/dFdSize<size0) size0=size0-F/dFdSize;
522
+			if(F/dFdSize>size0) size0=size0/2.0;
315 523
 		}
316 524
 	}
317
-	this->size = rhere;
318
-	FILE_LOG(logDEBUG1) << "r = "<<this->size << ", p = "<<this->prob;
525
+	this->size = size0;
526
+	FILE_LOG(logDEBUG1) << "size = "<<this->size << ", prob = "<<this->prob;
319 527
 
320 528
 // 	dtime = clock() - time;
321 529
 // 	FILE_LOG(logDEBUG1) << "updateR(): "<<dtime<< " clicks";
... ...
@@ -325,9 +533,10 @@ void NegativeBinomial::update(double* weights)
325 533
 void NegativeBinomial::update_constrained(double** weights, int fromState, int toState)
326 534
 {
327 535
 	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
328
-	FILE_LOG(logDEBUG1) << "r = "<<this->size << ", p = "<<this->prob;
536
+	FILE_LOG(logDEBUG1) << "size = "<<this->size << ", prob = "<<this->prob;
329 537
 	double eps = 1e-4, kmax;
330
-	double numerator, denominator, rhere, dr, Fr, dFrdr, DigammaR, DigammaRplusDR;
538
+// 	double numerator, denominator, size0, dSize, F, dFdSize, DigammaSize, DigammaSizePlusDSize;
539
+	double numerator, denominator, size0, dSize, F, dFdSize, DigammaSize, TrigammaSize;
331 540
 	double logp = log(this->prob);
332 541
 	// Update prob (p)
333 542
 	numerator=denominator=0.0;
... ...
@@ -341,91 +550,99 @@ void NegativeBinomial::update_constrained(double** weights, int fromState, int t
341 550
 			denominator += weights[i+fromState][t] * (this->size*(i+1) + this->obs[t]);
342 551
 		}
343 552
 	}
344
-	this->prob = numerator/denominator; // Update of size (r) is now done with old prob
553
+	this->prob = numerator/denominator; // Update of size is now done with old prob
345 554
 // 	dtime = clock() - time;
346 555
 // 	FILE_LOG(logDEBUG1) << "updateP(): "<<dtime<< " clicks";
347
-	// Update of size (r) with Newton Method
348
-	rhere = this->size;
349
-	dr = 0.00001;
556
+	// Update of size with Newton Method
557
+	size0 = this->size;
558
+	dSize = 0.00001;
350 559
 	kmax = 20;
351 560
 // 	time = clock();
352 561
 	// Select strategy for computing digammas
353 562
 	if (this->max_obs <= this->T)
354 563
 	{
355 564
 		FILE_LOG(logDEBUG2) << "Precomputing digammas in " << __func__ << " for every obs[t], because max(O)<=T";
356
-		double DigammaRplusX[this->max_obs+1], DigammaRplusDRplusX[this->max_obs+1];
565
+// 		double DigammaSizePlusX[this->max_obs+1], DigammaSizePlusDSizePlusX[this->max_obs+1];
566
+		double DigammaSizePlusX[this->max_obs+1], TrigammaSizePlusX[this->max_obs+1];
357 567
 		for (int k=1; k<kmax; k++)
358 568
 		{
359
-			Fr=dFrdr=0.0;
569
+			F=dFdSize=0.0;
360 570
 			for (int i=0; i<toState-fromState; i++)
361 571
 			{
362
-				DigammaR = digamma((i+1)*rhere); // boost::math::digamma<>(rhere);
363
-				DigammaRplusDR = digamma((i+1)*(rhere + dr)); // boost::math::digamma<>(rhere+dr);
572
+				DigammaSize = digamma((i+1)*size0); // boost::math::digamma<>(size0);
573
+				TrigammaSize = trigamma((i+1)*size0); // boost::math::digamma<>(size0);
574
+// 				DigammaSizePlusDSize = digamma((i+1)*(size0 + dSize)); // boost::math::digamma<>(size0+dSize);
364 575
 				// Precompute the digammas by iterating over all possible values of the observation vector
365 576
 				for (int j=0; j<=this->max_obs; j++)
366 577
 				{
367
-					DigammaRplusX[j] = digamma((i+1)*rhere+j);
368
-					DigammaRplusDRplusX[j] = digamma((i+1)*(rhere+dr)+j);
578
+					DigammaSizePlusX[j] = digamma((i+1)*size0+j);
579
+// 					DigammaSizePlusDSizePlusX[j] = digamma((i+1)*(size0+dSize)+j);
580
+					TrigammaSizePlusX[j] = trigamma((i+1)*size0+j);
369 581
 				}
370 582
 				for(int t=0; t<this->T; t++)
371 583
 				{
372 584
 					if(this->obs[t]==0)
373 585
 					{
374
-						Fr += weights[i+fromState][t] * (i+1) * logp;
375
-						//dFrdr+=0;
586
+						F += weights[i+fromState][t] * (i+1) * logp;
587
+						//dFdSize+=0;
376 588
 					}
377 589
 					if(this->obs[t]!=0)
378 590
 					{
379
-						Fr += weights[i+fromState][t] * (i+1) * (logp - DigammaR + DigammaRplusX[(int)obs[t]]);
380
-						dFrdr += weights[i+fromState][t] / dr * (i+1) * (DigammaR - DigammaRplusDR + DigammaRplusDRplusX[(int)obs[t]] - DigammaRplusX[(int)obs[t]]);
591
+						F += weights[i+fromState][t] * (i+1) * (logp - DigammaSize + DigammaSizePlusX[(int)obs[t]]);
592
+// 						dFdSize += weights[i+fromState][t] / dSize * (i+1) * (DigammaSize - DigammaSizePlusDSize + DigammaSizePlusDSizePlusX[(int)obs[t]] - DigammaSizePlusX[(int)obs[t]]);
593
+						dFdSize += weights[i+fromState][t] * pow((i+1),2) * (-TrigammaSize + TrigammaSizePlusX[(int)obs[t]]);
381 594
 					}
382 595
 				}
383
-				if(fabs(Fr)<eps)
596
+				if(fabs(F)<eps)
384 597
 				{
385 598
 					break;
386 599
 				}
387 600
 			}
388
-			if(Fr/dFrdr<rhere) rhere=rhere-Fr/dFrdr;
389
-			if(Fr/dFrdr>rhere) rhere=rhere/2.0;
601
+			if(F/dFdSize<size0) size0=size0-F/dFdSize;
602
+			if(F/dFdSize>size0) size0=size0/2.0;
390 603
 		}
391 604
 	}
392 605
 	else
393 606
 	{
394 607
 		FILE_LOG(logDEBUG2) << "Computing digammas in " << __func__ << " for every t, because max(O)>T";
395
-		double DigammaRplusX, DigammaRplusDRplusX;
608
+// 		double DigammaSizePlusX, DigammaSizePlusDSizePlusX;
609
+		double DigammaSizePlusX, TrigammaSizePlusX;
396 610
 		for (int k=1; k<kmax; k++)
397 611
 		{
398
-			Fr = dFrdr = 0.0;
612
+			F = dFdSize = 0.0;
399 613
 			for (int i=0; i<toState-fromState; i++)
400 614
 			{
401
-				DigammaR = digamma((i+1)*rhere); // boost::math::digamma<>(rhere);
402
-				DigammaRplusDR = digamma((i+1)*(rhere + dr)); // boost::math::digamma<>(rhere+dr);
615
+				DigammaSize = digamma((i+1)*size0); // boost::math::digamma<>(size0);
616
+				TrigammaSize = trigamma((i+1)*size0); // boost::math::digamma<>(size0);
617
+// 				DigammaSizePlusDSize = digamma((i+1)*(size0 + dSize)); // boost::math::digamma<>(size0+dSize);
403 618
 				for(int t=0; t<this->T; t++)
404 619
 				{
405
-					DigammaRplusX = digamma((i+1)*rhere+this->obs[t]); //boost::math::digamma<>(rhere+this->obs[ti]);
406
-					DigammaRplusDRplusX = digamma((i+1)*(rhere+dr)+this->obs[t]); // boost::math::digamma<>(rhere+dr+this->obs[ti]);
620
+					DigammaSizePlusX = digamma((i+1)*size0+this->obs[t]); //boost::math::digamma<>(size0+this->obs[ti]);
621
+// 					DigammaSizePlusDSizePlusX = digamma((i+1)*(size0+dSize)+this->obs[t]); // boost::math::digamma<>(size0+dSize+this->obs[ti]);
622
+					TrigammaSizePlusX = trigamma((i+1)*size0+this->obs[t]);
407 623
 					if(this->obs[t]==0)
408 624
 					{
409
-						Fr += weights[i+fromState][t] * (i+1) * logp;
410
-						//dFrdr+=0;
625
+						F += weights[i+fromState][t] * (i+1) * logp;
626
+						//dFdSize+=0;
411 627
 					}
412 628
 					if(this->obs[t]!=0)
413 629
 					{
414
-						Fr += weights[i+fromState][t] * (i+1) * (logp - DigammaR + DigammaRplusX);
415
-						dFrdr += weights[i+fromState][t] / dr * (i+1) * (DigammaR - DigammaRplusDR + DigammaRplusDRplusX - DigammaRplusX);
630
+						F += weights[i+fromState][t] * (i+1) * (logp - DigammaSize + DigammaSizePlusX);
631
+// 						dFdSize += weights[i+fromState][t] / dSize * (i+1) * (DigammaSize - DigammaSizePlusDSize + DigammaSizePlusDSizePlusX - DigammaSizePlusX);
632
+						dFdSize += weights[i+fromState][t] * pow((i+1),2) * (-TrigammaSize + TrigammaSizePlusX);
416 633
 					}
417 634
 				}
418 635
 			}
419
-			if(fabs(Fr)<eps)
636
+			if(fabs(F)<eps)
420 637
 			{
421 638
 				break;
422 639
 			}
423
-			if(Fr/dFrdr<rhere) rhere=rhere-Fr/dFrdr;
424
-			if(Fr/dFrdr>rhere) rhere=rhere/2.0;
640
+			if(F/dFdSize<size0) size0=size0-F/dFdSize;
641
+			if(F/dFdSize>size0) size0=size0/2.0;
425 642
 		}
426 643
 	}
427
-	this->size = rhere;
428
-	FILE_LOG(logDEBUG1) << "r = "<<this->size << ", p = "<<this->prob;
644
+	this->size = size0;
645
+	FILE_LOG(logDEBUG1) << "size = "<<this->size << ", prob = "<<this->prob;
429 646
 	this->mean = this->fmean(this->size, this->prob);
430 647
 	this->variance = this->fvariance(this->size, this->prob);
431 648
 
... ...
@@ -506,6 +723,396 @@ double NegativeBinomial::get_prob()
506 723
 }
507 724
 
508 725
 
726
+// ============================================================
727
+//  Binomial density
728
+// ============================================================
729
+
730
+// Constructor and Destructor ---------------------------------
731
+Binomial::Binomial(int* observations, int T, double size, double prob)
732
+{
733
+	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
734
+	this->obs = observations;
735
+	this->T = T;
736
+	this->size = size;
737
+	this->prob = prob;
738
+}
739
+
740
+Binomial::~Binomial()
741
+{
742
+	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
743
+}
744
+
745
+// Methods ----------------------------------------------------
746
+void Binomial::calc_logdensities(double* logdens)
747
+{
748
+	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
749
+	double logp = log(this->prob);
750
+	double log1minusp = log(1-this->prob);
751
+	// Select strategy for computing gammas
752
+	if (this->max_obs <= this->T)
753
+	{
754
+		FILE_LOG(logDEBUG2) << "Precomputing densities in " << __func__ << " for every obs[t], because max(O)<=T";
755
+		double logdens_per_read [this->max_obs+1];
756
+		for (int j=0; j<=this->max_obs; j++)
757
+		{
758
+			logdens_per_read[j] = lchoose(this->size, j) + j * logp + (this->size-j) * log1minusp;
759
+		}
760
+		for (int t=0; t<this->T; t++)
761
+		{
762
+			logdens[t] = logdens_per_read[(int) this->obs[t]];
763
+			FILE_LOG(logDEBUG4) << "logdens["<<t<<"] = " << logdens[t];
764
+			if (isnan(logdens[t]))
765
+			{
766
+				FILE_LOG(logERROR) << __PRETTY_FUNCTION__;
767
+				FILE_LOG(logERROR) << "logdens["<<t<<"] = "<< logdens[t];
768
+				throw nan_detected;
769
+			}
770
+		}
771
+	}
772
+	else
773
+	{
774
+		FILE_LOG(logDEBUG2) << "Computing densities in " << __func__ << " for every t, because max(O)>T";
775
+		int j;
776
+		for (int t=0; t<this->T; t++)
777
+		{
778
+			j = (int) this->obs[t];
779
+			logdens[t] = lchoose(this->size, j) + j * logp + (this->size-j) * log1minusp;
780
+			FILE_LOG(logDEBUG4) << "logdens["<<t<<"] = " << logdens[t];
781
+			if (isnan(logdens[t]))
782
+			{
783
+				FILE_LOG(logERROR) << __PRETTY_FUNCTION__;
784
+				FILE_LOG(logERROR) << "logdens["<<t<<"] = "<< logdens[t];
785
+				throw nan_detected;
786
+			}
787
+		}
788
+	}
789
+} 
790
+
791
+void Binomial::calc_densities(double* dens)
792
+{
793
+	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
794
+	double logp = log(this->prob);
795
+	double log1minusp = log(1-this->prob);
796
+	// Select strategy for computing gammas
797
+	if (this->max_obs <= this->T)
798
+	{
799
+		FILE_LOG(logDEBUG2) << "Precomputing densities in " << __func__ << " for every obs[t], because max(O)<=T";
800
+		double dens_per_read [this->max_obs+1];
801
+		for (int j=0; j<=this->max_obs; j++)
802
+		{
803
+			dens_per_read[j] = exp( lchoose(this->size, j) + j * logp + (this->size-j) * log1minusp );
804
+		}
805
+		for (int t=0; t<this->T; t++)
806
+		{
807
+			dens[t] = dens_per_read[(int) this->obs[t]];
808
+			FILE_LOG(logDEBUG4) << "dens["<<t<<"] = " << dens[t];
809
+			if (isnan(dens[t]))
810
+			{
811
+				FILE_LOG(logERROR) << __PRETTY_FUNCTION__;
812
+				FILE_LOG(logERROR) << "dens["<<t<<"] = "<< dens[t];
813
+				throw nan_detected;
814
+			}
815
+		}
816
+	}
817
+	else
818
+	{
819
+		FILE_LOG(logDEBUG2) << "Computing densities in " << __func__ << " for every t, because max(O)>T";
820
+		int j;
821
+		for (int t=0; t<this->T; t++)
822
+		{
823
+			j = (int) this->obs[t];
824
+			dens[t] = exp( lchoose(this->size, j) + j * logp + (this->size-j) * log1minusp );
825
+			FILE_LOG(logDEBUG4) << "dens["<<t<<"] = " << dens[t];
826
+			if (isnan(dens[t]))
827
+			{
828
+				FILE_LOG(logERROR) << __PRETTY_FUNCTION__;
829
+				FILE_LOG(logERROR) << "dens["<<t<<"] = "<< dens[t];
830
+				throw nan_detected;
831
+			}
832
+		}
833
+	}
834
+} 
835
+
836
+void Binomial::update(double* weights)
837
+{
838
+	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
839
+	double eps = 1e-4, kmax;
840
+	double numerator, denominator, size0, dSize, F, dFdSize, DigammaSizePlus1, DigammaSizePlusDSizePlus1;
841
+	// Update prob (p)
842
+	numerator=denominator=0.0;
843
+// 	clock_t time, dtime;
844
+// 	time = clock();
845
+	for (int t=0; t<this->T; t++)
846
+	{
847
+		numerator += weights[t] * this->obs[t];
848
+		denominator += weights[t] * this->size;
849
+	}
850
+	this->prob = numerator/denominator; // Update of size is now done with updated prob
851
+	double log1minusp = log(1-this->prob);
852
+// 	dtime = clock() - time;
853
+// 	FILE_LOG(logDEBUG1) << "updateP(): "<<dtime<< " clicks";
854
+	// Update of size with Newton Method
855
+	size0 = this->size;
856
+	dSize = 0.00001;
857
+	kmax = 20;
858
+// 	time = clock();
859
+	// Select strategy for computing digammas
860
+	if (this->max_obs <= this->T)
861
+	{
862
+		FILE_LOG(logDEBUG2) << "Precomputing digammas in " << __func__ << " for every obs[t], because max(O)<=T";
863
+		double DigammaSizeMinusXPlus1[this->max_obs+1], DigammaSizePlusDSizeMinusXPlus1[this->max_obs+1];
864
+		for (int k=1; k<kmax; k++)
865
+		{
866
+			F=dFdSize=0.0;
867
+			DigammaSizePlus1 = digamma(size0+1);
868
+			DigammaSizePlusDSizePlus1 = digamma((size0+dSize)+1);
869
+			// Precompute the digammas by iterating over all possible values of the observation vector
870
+			for (int j=0; j<=this->max_obs; j++)
871
+			{
872
+				DigammaSizeMinusXPlus1[j] = digamma(size0-j+1);
873
+				DigammaSizePlusDSizeMinusXPlus1[j] = digamma((size0+dSize)-j+1);
874
+			}
875
+			for(int t=0; t<this->T; t++)
876
+			{
877
+				if(this->obs[t]==0)
878
+				{
879
+					F += weights[t] * log1minusp;
880
+					//dFdSize+=0;
881
+				}
882
+				if(this->obs[t]!=0)
883
+				{
884
+					F += weights[t] * (DigammaSizePlus1 - DigammaSizeMinusXPlus1[(int)this->obs[t]] + log1minusp);
885
+					dFdSize += weights[t]/dSize * (DigammaSizePlusDSizePlus1-DigammaSizePlus1 - DigammaSizePlusDSizeMinusXPlus1[(int)this->obs[t]]+DigammaSizeMinusXPlus1[(int)this->obs[t]]);
886
+				}
887
+			}
888
+			if(fabs(F)<eps)
889
+{
890
+				break;
891
+			}
892
+			if(F/dFdSize<size0) size0=size0-F/dFdSize;
893
+			if(F/dFdSize>size0) size0=size0/2.0;
894
+		}
895
+	}
896
+	else
897
+	{
898
+		FILE_LOG(logDEBUG2) << "Computing digammas in " << __func__ << " for every t, because max(O)>T";
899
+		double DigammaSizePlusX, DigammaSizePlusDSizePlusX,	DigammaSizeMinusXPlus1, DigammaSizePlusDSizeMinusXPlus1;
900
+		for (int k=1; k<kmax; k++)
901
+		{
902
+			F = dFdSize = 0.0;
903
+			DigammaSizePlus1 = digamma(size0+1);
904
+			DigammaSizePlusDSizePlus1 = digamma((size0+dSize)+1);
905
+			for(int t=0; t<this->T; t++)
906
+			{
907
+				DigammaSizeMinusXPlus1 = digamma(size0-(int)this->obs[t]+1);
908
+				DigammaSizePlusDSizeMinusXPlus1 = digamma((size0+dSize)-(int)this->obs[t]+1);
909
+				if(this->obs[t]==0)
910
+				{
911
+					F += weights[t] * log1minusp;
912
+					//dFdSize+=0;
913
+				}
914
+				if(this->obs[t]!=0)
915
+				{
916
+					F += weights[t] * (DigammaSizePlus1 - DigammaSizeMinusXPlus1 + log1minusp);
917
+					dFdSize += weights[t]/dSize * (DigammaSizePlusDSizePlus1-DigammaSizePlus1 - DigammaSizePlusDSizeMinusXPlus1+DigammaSizeMinusXPlus1);
918
+				}
919
+			}
920
+			if(fabs(F)<eps)
921
+			{
922
+				break;
923
+			}
924
+			if(F/dFdSize<size0) size0=size0-F/dFdSize;
925
+			if(F/dFdSize>size0) size0=size0/2.0;
926
+		}
927
+	}
928
+	this->size = size0;
929
+	FILE_LOG(logDEBUG1) << "r = "<<this->size << ", p = "<<this->prob;
930
+
931
+// 	dtime = clock() - time;
932
+// 	FILE_LOG(logDEBUG1) << "updateR(): "<<dtime<< " clicks";