Browse code

binned.data is now GRanges, reduced memory consumption, posteriors are not returned

chakalakka authored on 09/07/2014 15:24:19
Showing 7 changed files

... ...
@@ -14,7 +14,6 @@ plot.distribution,
14 14
 univariate2bed,
15 15
 binned2wiggle,
16 16
 multivariate2bed,
17
-binned2GRanges,
18 17
 hmm2GRanges,
19 18
 
20 19
 mixedsort,
... ...
@@ -22,6 +21,6 @@ mixedorder
22 21
 )
23 22
 
24 23
 # Import all packages listed as Imports or Depends
25
-# import(
26
-# 	GenomicRanges, IRanges
27
-# )
24
+import(
25
+	GenomicRanges, IRanges
26
+)
... ...
@@ -20,23 +20,25 @@ align2binned <- function(file, format, index=file, chrom.length.file, outputfold
20 20
 		dir.create(outputfolder)
21 21
 	}
22 22
 
23
-	## Read in the data
23
+	### Read in the data
24
+	## BED (0-based)
24 25
 	if (format == "bed") {
25 26
 		cat("Reading file",basename(file),"...")
26
-		# File with chromosome lengths
27
+		# File with chromosome lengths (1-based)
27 28
 		chrom.lengths.df <- read.table(chrom.length.file)
28 29
 		chrom.lengths <- chrom.lengths.df[,2]
29 30
 		names(chrom.lengths) <- chrom.lengths.df[,1]
30
-		# File with reads, determine classes first for faster import
31
+		# File with reads, determine classes first for faster import (0-based)
31 32
 		tab5rows <- read.table(file, nrows=5)
32 33
 		classes.in.bed <- sapply(tab5rows, class)
33 34
 		classes <- rep("NULL",length(classes.in.bed))
34 35
 		classes[1:3] <- classes.in.bed[1:3]
35 36
 		data <- read.table(file, colClasses=classes)
36 37
 		# Convert to GRanges object
37
-		data <- GenomicRanges::GRanges(seqnames=Rle(data[,1]), ranges=IRanges(start=data[,2], end=data[,3]), strand=Rle(strand("*"), nrow(data)))
38
+		data <- GenomicRanges::GRanges(seqnames=Rle(data[,1]), ranges=IRanges(start=data[,2]+1, end=data[,3]+1), strand=Rle(strand("*"), nrow(data)))	# +1 to match coordinate systems
38 39
 		seqlengths(data) <- as.integer(chrom.lengths[names(seqlengths(data))])
39 40
 		chroms.in.data <- seqlevels(data)
41
+	## BAM (1-based)
40 42
 	} else if (format == "bam") {
41 43
 		library(Rsamtools) # TODO: put this in Imports in finished package
42 44
 		library(GenomicAlignments) # TODO: put this in Imports in finished package
... ...
@@ -44,9 +46,10 @@ align2binned <- function(file, format, index=file, chrom.length.file, outputfold
44 46
 		file.header <- Rsamtools::scanBamHeader(file)[[1]]
45 47
 		chrom.lengths <- file.header$targets
46 48
 		chroms.in.data <- names(chrom.lengths)
49
+	## BEDGraph (0-based)
47 50
 	} else if (format == "bedGraph") {
48 51
 		cat("Reading file",basename(file),"...")
49
-		# File with chromosome lengths
52
+		# File with chromosome lengths (1-based)
50 53
 		chrom.lengths.df <- read.table(chrom.length.file)
51 54
 		chrom.lengths <- chrom.lengths.df[,2]
52 55
 		names(chrom.lengths) <- chrom.lengths.df[,1]
... ...
@@ -57,7 +60,7 @@ align2binned <- function(file, format, index=file, chrom.length.file, outputfold
57 60
 		classes[1:4] <- classes.in.bed[1:4]
58 61
 		data <- read.table(file, colClasses=classes)
59 62
 		# Convert to GRanges object
60
-		data <- GenomicRanges::GRanges(seqnames=Rle(data[,1]), ranges=IRanges(start=data[,2], end=data[,3]), strand=Rle(strand("*"), nrow(data)), signal=data[,4])
63
+		data <- GenomicRanges::GRanges(seqnames=Rle(data[,1]), ranges=IRanges(start=data[,2]+1, end=data[,3]+1), strand=Rle(strand("*"), nrow(data)), signal=data[,4])	# +1 to match coordinate systems
61 64
 		seqlengths(data) <- as.integer(chrom.lengths[names(seqlengths(data))])
62 65
 		chroms.in.data <- seqlevels(data)
63 66
 	}
... ...
@@ -69,7 +72,7 @@ align2binned <- function(file, format, index=file, chrom.length.file, outputfold
69 72
 		cat("Binning into binsize",binsize,"\n")
70 73
 
71 74
 		### Iterate over all chromosomes
72
-		binned.data.allchroms <- NULL
75
+		binned.data <- GenomicRanges::GRanges()
73 76
 		if (is.null(chromosomes)) {
74 77
 			chromosomes <- chroms.in.data
75 78
 		}
... ...
@@ -86,38 +89,43 @@ align2binned <- function(file, format, index=file, chrom.length.file, outputfold
86 89
 			## Check last incomplete bin
87 90
 			incomplete.bin <- chrom.lengths[chromosome] %% binsize > 0
88 91
 			if (incomplete.bin) {
89
-				numbins <- ceiling(chrom.lengths[chromosome]/binsize)
92
+				numbins <- floor(chrom.lengths[chromosome]/binsize)	# floor: we don't want incomplete bins, ceiling: we want incomplete bins at the end
90 93
 			} else {
91 94
 				numbins <- chrom.lengths[chromosome]/binsize
92 95
 			}
96
+			if (numbins == 0) {
97
+				warning("chromosome ",chromosome," is smaller than binsize. Skipped.")
98
+				next
99
+			}
93 100
 			## Initialize vectors
94 101
 			cat("initialize vectors...\r")
95 102
 			chroms <- rep(chromosome,numbins)
96 103
 			reads <- rep(0,numbins)
97
-			start <- seq(from=0, by=binsize, length.out=numbins)
98
-			end <- seq(from=binsize-1, by=binsize, length.out=numbins)
99
-			end[length(end)] <- chrom.lengths[chromosome]
104
+			start <- seq(from=1, by=binsize, length.out=numbins)
105
+			end <- seq(from=binsize+1, by=binsize, length.out=numbins)
106
+# 			end[length(end)] <- chrom.lengths[chromosome] # last ending coordinate is size of chromosome, only if incomplete bins are desired
100 107
 
101 108
 			## Create binned chromosome as GRanges object
102 109
 			cat("creating GRanges container...            \r")
103
-			ichrom <- GenomicRanges::GRanges(seqnames = Rle(chromosome, numbins),
110
+			i.binned.data <- GenomicRanges::GRanges(seqnames = Rle(chromosome, numbins),
104 111
 							ranges = IRanges(start=start, end=end),
105 112
 							strand = Rle(strand("*"), numbins)
106 113
 							)
114
+			seqlengths(i.binned.data) <- chrom.lengths[chromosome]
107 115
 
108 116
 			if (format=="bam") {
109 117
 				cat("reading reads from file...               \r")
110
-				data <- GenomicAlignments::readGAlignmentsFromBam(file, index=index, param=ScanBamParam(what=c("pos"),which=range(ichrom)))
118
+				data <- GenomicAlignments::readGAlignmentsFromBam(file, index=index, param=ScanBamParam(what=c("pos"),which=range(i.binned.data)))
111 119
 			}
112 120
 
113 121
 			## Count overlaps
114 122
 			cat("counting overlaps...                     \r")
115 123
 			if (format=="bam" | format=="bed") {
116
-				reads <- GenomicRanges::countOverlaps(ichrom, data[seqnames(data)==chromosome])
124
+				reads <- GenomicRanges::countOverlaps(i.binned.data, data[seqnames(data)==chromosome])
117 125
 			} else if (format=="bedGraph") {
118 126
 				# Take the max value from all regions that fall into / overlap a given bin as read count
119
-				midx <- as.matrix(findOverlaps(ichrom, data[seqnames(data)==chromosome]))
120
-				reads <- rep(0,length(ichrom))
127
+				midx <- as.matrix(findOverlaps(i.binned.data, data[seqnames(data)==chromosome]))
128
+				reads <- rep(0,length(i.binned.data))
121 129
 				signal <- mcols(data)$signal
122 130
 				rle <- rle(midx[,1])
123 131
 				read.idx <- rle$values
... ...
@@ -132,10 +140,10 @@ align2binned <- function(file, format, index=file, chrom.length.file, outputfold
132 140
 			
133 141
 			## Concatenate
134 142
 			cat("concatenate...                           \r")
135
-			binned.data <- data.frame(chroms,start,end,reads)
136
-			names(binned.data) <- binned.data.names
143
+			mcols(i.binned.data)$reads <- reads
137 144
 
138 145
 			if (separate.chroms==TRUE) {
146
+				binned.data <- i.binned.data
139 147
 				if (save.as.RData==TRUE) {
140 148
 					## Print to file
141 149
 					filename <- paste(basename(file),"_binsize_",binsize,"_",chromosome,".RData", sep="")
... ...
@@ -146,23 +154,20 @@ align2binned <- function(file, format, index=file, chrom.length.file, outputfold
146 154
 					return(binned.data)
147 155
 				}
148 156
 			} else {
149
-				binned.data.allchroms[[length(binned.data.allchroms)+1]] <- binned.data
157
+				binned.data <- suppressWarnings(BiocGenerics::append(binned.data, i.binned.data))
150 158
 			}
151 159
 			cat("                                         \r")
152 160
 
153 161
 		}
154
-		if (separate.chroms!=TRUE) {
155
-			cat("Concatenating chromosomes ...")
156
-			binned.data.allchroms <- do.call("rbind",binned.data.allchroms)
157
-			cat(" done\n")
162
+		if (separate.chroms==FALSE) {
158 163
 			if (save.as.RData==TRUE) {
159 164
 				# Print to file
160 165
 				filename <- paste(basename(file),"_binsize_",binsize,".RData", sep="")
161 166
 				cat("Saving to file ...")
162
-				save(binned.data.allchroms, file=file.path(outputfolder,filename) )
167
+				save(binned.data, file=file.path(outputfolder,filename) )
163 168
 				cat(" done\n")
164 169
 			} else {
165
-				return(binned.data.allchroms)
170
+				return(binned.data)
166 171
 			}
167 172
 		}
168 173
 
... ...
@@ -1,19 +1,6 @@
1
-binned2GRanges <- function(binned.data) {
2
-
3
-	library(GenomicRanges)
4
-	gr <- GenomicRanges::GRanges(
5
-			seqnames = Rle(binned.data$chrom),
6
-			ranges = IRanges(start=binned.data$start, end=binned.data$end),
7
-			strand = Rle(strand("*"), nrow(binned.data)),
8
-			reads = binned.data$reads
9
-			)
10
-	return(gr)
11
-
12
-}
13
-
14 1
 hmm2GRanges <- function(hmm, reduce=TRUE) {
15 2
 
16
-	library(GenomicRanges)
3
+# 	library(GenomicRanges)
17 4
 	### Check user input ###
18 5
 	if (check.univariate.model(hmm)!=0) stop("argument 'hmm' expects a univariate hmm object (type ?uni.hmm for help)")
19 6
 	if (check.logical(reduce)!=0) stop("argument 'reduce' expects TRUE or FALSE")
... ...
@@ -25,10 +12,9 @@ hmm2GRanges <- function(hmm, reduce=TRUE) {
25 12
 			ranges = IRanges(start=hmm$coordinates$start, end=hmm$coordinates$end),
26 13
 			strand = Rle(strand("*"), nrow(hmm$coordinates))
27 14
 			)
28
-	# Seqlengths gives warnings because our coordinates are 0-based
29
-# 	for (chrom in unique(hmm$coordinates$chrom)) {
30
-# 		seqlengths(gr)[names(seqlengths(gr))==chrom] <- max(hmm$coordinates$end[hmm$coordinates$chrom==chrom])
31
-# 	}
15
+	seqlengths(gr) <- hmm$seqlengths[names(seqlengths(gr))]
16
+	# Reorder seqlevels
17
+	gr <- GenomicRanges::keepSeqlevels(gr, names(hmm$seqlengths))
32 18
 
33 19
 	if (reduce) {
34 20
 		# Reduce state by state
... ...
@@ -16,25 +16,24 @@ find.aneuploidies <- function(binned.data, use.states=0:3, eps=0.001, init="stan
16 16
 	war <- NULL
17 17
 	if (is.null(eps.try)) eps.try <- eps
18 18
 
19
-	names(binned.data) <- binned.data.names # defined globally outside this function
20
-
21 19
 	## Assign variables
22 20
 # 	state.labels # assigned globally outside this function
23 21
 	use.state.labels <- state.labels[use.states+1]
24 22
 	numstates <- length(use.states)
25
-	numbins <- length(binned.data$reads)
23
+	numbins <- length(binned.data)
24
+	reads <- mcols(binned.data)$reads
26 25
 	iniproc <- which(init==c("standard","random","empiric")) # transform to int
27 26
 
28 27
 	# Check if there are reads in the data, otherwise HMM will blow up
29
-	if (!any(binned.data$reads!=0)) {
28
+	if (!any(reads!=0)) {
30 29
 		stop("All reads in data are zero. No univariate HMM done.")
31 30
 	}
32 31
 
33 32
 	# Filter high reads out, makes HMM faster
34
-	read.cutoff <- as.integer(quantile(binned.data$reads, 0.9999))
33
+	read.cutoff <- as.integer(quantile(reads, 0.9999))
35 34
 	if (filter.reads) {
36
-		mask <- binned.data$reads > read.cutoff
37
-		binned.data$reads[mask] <- read.cutoff
35
+		mask <- reads > read.cutoff
36
+		reads[mask] <- read.cutoff
38 37
 		numfiltered <- length(which(mask))
39 38
 		if (numfiltered > 0) {
40 39
 			warning(paste("There are very high read counts in your data (probably artificial). Replaced read counts > ",read.cutoff," (99.99% quantile) by ",read.cutoff," in ",numfiltered," bins. Set option 'filter.reads=FALSE' to disable this filtering.", sep=""))
... ...
@@ -47,16 +46,16 @@ find.aneuploidies <- function(binned.data, use.states=0:3, eps=0.001, init="stan
47 46
 	for (i_try in 1:num.trials) {
48 47
 		cat("\n\nTry ",i_try," of ",num.trials," ------------------------------\n")
49 48
 		hmm <- .C("R_univariate_hmm",
50
-			reads = as.integer(binned.data$reads), # int* O
49
+			reads = as.integer(reads), # int* O
51 50
 			num.bins = as.integer(numbins), # int* T
52 51
 			num.states = as.integer(numstates), # int* N
53
-			use.states = as.integer(use.states), # int* states
52
+			use.states = as.integer(use.states), # int* statelabels
54 53
 			size = double(length=numstates), # double* size
55 54
 			prob = double(length=numstates), # double* prob
56 55
 			num.iterations = as.integer(max.iter), #  int* maxiter
57 56
 			time.sec = as.integer(max.time), # double* maxtime
58 57
 			loglik.delta = as.double(eps.try), # double* eps
59
-			posteriors = double(length=numbins * numstates), # double* posteriors
58
+			states = integer(length=numbins), # int* states
60 59
 			A = double(length=numstates*numstates), # double* A
61 60
 			proba = double(length=numstates), # double* proba
62 61
 			loglik = double(length=1), # double* loglik
... ...
@@ -89,7 +88,6 @@ find.aneuploidies <- function(binned.data, use.states=0:3, eps=0.001, init="stan
89 88
 				warning("HMM did not converge in trial run ",i_try,"!\n")
90 89
 			}
91 90
 			# Store model in list
92
-			hmm$posteriors <- NULL
93 91
 			hmm$reads <- NULL
94 92
 			modellist[[i_try]] <- hmm
95 93
 		}
... ...
@@ -104,16 +102,16 @@ find.aneuploidies <- function(binned.data, use.states=0:3, eps=0.001, init="stan
104 102
 		# Rerun the HMM with different epsilon and initial parameters from trial run
105 103
 		cat("\n\nRerunning try ",indexmax," with eps =",eps,"--------------------\n")
106 104
 		hmm <- .C("R_univariate_hmm",
107
-			reads = as.integer(binned.data$reads), # int* O
105
+			reads = as.integer(reads), # int* O
108 106
 			num.bins = as.integer(numbins), # int* T
109 107
 			num.states = as.integer(numstates), # int* N
110
-			use.states = as.integer(use.states), # int* states
108
+			use.states = as.integer(use.states), # int* statelabels
111 109
 			size = double(length=numstates), # double* size
112 110
 			prob = double(length=numstates), # double* prob
113 111
 			num.iterations = as.integer(max.iter), #  int* maxiter
114 112
 			time.sec = as.integer(max.time), # double* maxtime
115 113
 			loglik.delta = as.double(eps), # double* eps
116
-			posteriors = double(length=numbins * numstates), # double* posteriors
114
+			states = integer(length=numbins), # int* states
117 115
 			A = double(length=numstates*numstates), # double* A
118 116
 			proba = double(length=numstates), # double* proba
119 117
 			loglik = double(length=1), # double* loglik
... ...
@@ -132,11 +130,11 @@ find.aneuploidies <- function(binned.data, use.states=0:3, eps=0.001, init="stan
132 130
 
133 131
 	# Add useful entries
134 132
 	names(hmm$weights) <- use.state.labels
135
-	hmm$coordinates <- binned.data[,coordinate.names]
136
-	hmm$posteriors <- matrix(hmm$posteriors, ncol=hmm$num.states)
137
-	colnames(hmm$posteriors) <- paste("P(",use.state.labels,")", sep="")
133
+	hmm$coordinates <- data.frame(as.character(seqnames(binned.data)), start(ranges(binned.data)), end(ranges(binned.data)))
134
+	names(hmm$coordinates) <- coordinate.names
135
+	hmm$seqlengths <- seqlengths(binned.data)
138 136
 	class(hmm) <- class.aneufinder.univariate
139
-	hmm$states <- use.state.labels[apply(hmm$posteriors, 1, which.max)]
137
+	hmm$states <- use.state.labels[hmm$states+1]
140 138
 	hmm$eps <- eps
141 139
 	hmm$A <- matrix(hmm$A, ncol=hmm$num.states, byrow=TRUE)
142 140
 	rownames(hmm$A) <- use.state.labels
... ...
@@ -6,7 +6,7 @@
6 6
 // This function takes parameters from R, creates a univariate HMM object, creates the distributions, runs the Baum-Welch and returns the result to R.
7 7
 // ===================================================================================================================================================
8 8
 extern "C" {
9
-void R_univariate_hmm(int* O, int* T, int* N, int* states, double* size, double* prob, int* maxiter, int* maxtime, double* eps, double* posteriors, double* A, double* proba, double* loglik, double* weights, int* iniproc, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff)
9
+void R_univariate_hmm(int* O, int* T, int* N, int* statelabels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* iniproc, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff)
10 10
 {
11 11
 
12 12
 	// Define logging level
... ...
@@ -89,8 +89,8 @@ void R_univariate_hmm(int* O, int* T, int* N, int* states, double* size, double*
89 89
 			if (*iniproc == 1)
90 90
 			{
91 91
 				// Simple initialization based on data mean, assumed to be the disomic mean
92
-				imean = mean/2 * states[i_state];
93
-				ivariance = variance/2 * states[i_state];
92
+				imean = mean/2 * statelabels[i_state];
93
+				ivariance = variance/2 * statelabels[i_state];
94 94
 			}
95 95
 
96 96
 			// Calculate r and p from mean and variance
... ...
@@ -137,15 +137,28 @@ void R_univariate_hmm(int* O, int* T, int* N, int* states, double* size, double*
137 137
 	}
138 138
 		
139 139
 	FILE_LOG(logDEBUG1) << "Finished with Baum-Welch estimation";
140
-	// Compute the posteriors and save results directly to the R pointer
141
-	FILE_LOG(logDEBUG1) << "Recode posteriors into column representation";
142
-	#pragma omp parallel for
143
-	for (int iN=0; iN<*N; iN++)
140
+
141
+// 	// Compute the posteriors and save results directly to the R pointer
142
+// 	FILE_LOG(logDEBUG1) << "Recode posteriors into column representation";
143
+// 	#pragma omp parallel for
144
+// 	for (int iN=0; iN<*N; iN++)
145
+// 	{
146
+// 		for (int t=0; t<*T; t++)
147
+// 		{
148
+// 			posteriors[t + iN * (*T)] = hmm->get_posterior(iN, t);
149
+// 		}
150
+// 	}
151
+
152
+	// Compute the states from posteriors
153
+	FILE_LOG(logDEBUG1) << "Computing states from posteriors";
154
+	double posterior_per_t [*N];
155
+	for (int t=0; t<*T; t++)
144 156
 	{
145
-		for (int t=0; t<*T; t++)
157
+		for (int iN=0; iN<*N; iN++)
146 158
 		{
147
-			posteriors[t + iN * (*T)] = hmm->get_posterior(iN, t);
159
+			posterior_per_t[iN] = hmm->get_posterior(iN, t);
148 160
 		}
161
+		states[t] = statelabels[argMax(posterior_per_t, *N)];
149 162
 	}
150 163
 
151 164
 	FILE_LOG(logDEBUG1) << "Return parameters";
... ...
@@ -17,7 +17,7 @@ ScaleHMM::ScaleHMM(int T, int N)
17 17
 	this->scalealpha = allocDoubleMatrix(T, N);
18 18
 	this->scalebeta = allocDoubleMatrix(T, N);
19 19
 	this->densities = allocDoubleMatrix(N, T);
20
-	this->tdensities = allocDoubleMatrix(T, N);
20
+// 	this->tdensities = allocDoubleMatrix(T, N);
21 21
 	this->proba = (double*) calloc(N, sizeof(double));
22 22
 	this->gamma = allocDoubleMatrix(N, T);
23 23
 	this->sumgamma = (double*) calloc(N, sizeof(double));
... ...
@@ -26,7 +26,7 @@ ScaleHMM::ScaleHMM(int T, int N)
26 26
 	this->dlogP = INFINITY;
27 27
 	this->sumdiff_state_last = 0;
28 28
 	this->sumdiff_posterior = 0.0;
29
-	this->use_tdens = false;
29
+// 	this->use_tdens = false;
30 30
 
31 31
 }
32 32
 
... ...
@@ -38,7 +38,7 @@ ScaleHMM::~ScaleHMM()
38 38
 	freeDoubleMatrix(this->scalealpha, this->T);
39 39
 	freeDoubleMatrix(this->scalebeta, this->T);
40 40
 	freeDoubleMatrix(this->densities, this->N);
41
-	freeDoubleMatrix(this->tdensities, this->T);
41
+// 	freeDoubleMatrix(this->tdensities, this->T);
42 42
 	freeDoubleMatrix(this->gamma, this->N);
43 43
 	freeDoubleMatrix(this->sumxi, this->N);
44 44
 	free(this->proba);
... ...
@@ -213,7 +213,7 @@ void ScaleHMM::baumWelch(int* maxiter, int* maxtime, double* eps)
213 213
 		this->print_uni_iteration(iteration);
214 214
 
215 215
 		// Check convergence
216
-		if(fabs(this->dlogP) < *eps) //it has converged
216
+		if(this->dlogP < *eps) //it has converged
217 217
 		{
218 218
 			FILE_LOG(logINFO) << "\nConvergence reached!\n";
219 219
 			Rprintf("\nConvergence reached!\n\n");
... ...
@@ -371,8 +371,8 @@ void ScaleHMM::forward()
371 371
 	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
372 372
 	clock_t time = clock(), dtime;
373 373
 
374
-	if (not this->use_tdens)
375
-	{
374
+// 	if (not this->use_tdens)
375
+// 	{
376 376
 
377 377
 		double alpha [this->N];
378 378
 		// Initialization
... ...
@@ -424,61 +424,61 @@ void ScaleHMM::forward()
424 424
 			}
425 425
 		}
426 426
 
427
-	}
428
-	else if (this->use_tdens)
429
-	{
430
-
431
-		double alpha [this->N];
432
-		// Initialization
433
-		this->scalefactoralpha[0] = 0.0;
434
-		for (int iN=0; iN<this->N; iN++)
435
-		{
436
-			alpha[iN] = this->proba[iN] * this->tdensities[0][iN];
437
-			FILE_LOG(logDEBUG4) << "alpha["<<iN<<"] = " << alpha[iN];
438
-			this->scalefactoralpha[0] += alpha[iN];
439
-		}
440
-		FILE_LOG(logDEBUG4) << "scalefactoralpha["<<0<<"] = " << scalefactoralpha[0];
441
-		for (int iN=0; iN<this->N; iN++)
442
-		{
443
-			this->scalealpha[0][iN] = alpha[iN] / this->scalefactoralpha[0];
444
-			FILE_LOG(logDEBUG4) << "scalealpha["<<0<<"]["<<iN<<"] = " << scalealpha[0][iN];
445
-		}
446
-		// Induction
447
-		for (int t=1; t<this->T; t++)
448
-		{
449
-			this->scalefactoralpha[t] = 0.0;
450
-			for (int iN=0; iN<this->N; iN++)
451
-			{
452
-				double helpsum = 0.0;
453
-				for (int jN=0; jN<this->N; jN++)
454
-				{
455
-					helpsum += this->scalealpha[t-1][jN] * this->A[jN][iN];
456
-				}
457
-				alpha[iN] = helpsum * this->tdensities[t][iN];
458
-				FILE_LOG(logDEBUG4) << "alpha["<<iN<<"] = " << alpha[iN];
459
-				this->scalefactoralpha[t] += alpha[iN];
460
-			}
461
-			FILE_LOG(logDEBUG4) << "scalefactoralpha["<<t<<"] = " << scalefactoralpha[t];
462
-			for (int iN=0; iN<this->N; iN++)
463
-			{
464
-				this->scalealpha[t][iN] = alpha[iN] / this->scalefactoralpha[t];
465
-				FILE_LOG(logDEBUG4) << "scalealpha["<<t<<"]["<<iN<<"] = " << scalealpha[t][iN];
466
-				if(isnan(this->scalealpha[t][iN]))
467
-				{
468
-					FILE_LOG(logWARNING) << __PRETTY_FUNCTION__;
469
-					for (int jN=0; jN<this->N; jN++)
470
-					{
471
-						FILE_LOG(logWARNING) << "scalealpha["<<t-1<<"]["<<jN<<"] = " << scalealpha[t-1][jN];
472
-						FILE_LOG(logWARNING) << "A["<<iN<<"]["<<jN<<"] = " << A[iN][jN];
473
-					}
474
-					FILE_LOG(logWARNING) << "scalefactoralpha["<<t<<"] = "<<scalefactoralpha[t] << ", tdensities = "<<tdensities[t][iN];
475
-					FILE_LOG(logWARNING) << "scalealpha["<<t<<"]["<<iN<<"] = " << scalealpha[t][iN];
476
-					exit(1);
477
-				}
478
-			}
479
-		}
480
-
481
-	}
427
+// 	}
428
+// 	else if (this->use_tdens)
429
+// 	{
430
+// 
431
+// 		double alpha [this->N];
432
+// 		// Initialization
433
+// 		this->scalefactoralpha[0] = 0.0;
434
+// 		for (int iN=0; iN<this->N; iN++)
435
+// 		{
436
+// 			alpha[iN] = this->proba[iN] * this->tdensities[0][iN];
437
+// 			FILE_LOG(logDEBUG4) << "alpha["<<iN<<"] = " << alpha[iN];
438
+// 			this->scalefactoralpha[0] += alpha[iN];
439
+// 		}
440
+// 		FILE_LOG(logDEBUG4) << "scalefactoralpha["<<0<<"] = " << scalefactoralpha[0];
441
+// 		for (int iN=0; iN<this->N; iN++)
442
+// 		{
443
+// 			this->scalealpha[0][iN] = alpha[iN] / this->scalefactoralpha[0];
444
+// 			FILE_LOG(logDEBUG4) << "scalealpha["<<0<<"]["<<iN<<"] = " << scalealpha[0][iN];
445
+// 		}
446
+// 		// Induction
447
+// 		for (int t=1; t<this->T; t++)
448
+// 		{
449
+// 			this->scalefactoralpha[t] = 0.0;
450
+// 			for (int iN=0; iN<this->N; iN++)
451
+// 			{
452
+// 				double helpsum = 0.0;
453
+// 				for (int jN=0; jN<this->N; jN++)
454
+// 				{
455
+// 					helpsum += this->scalealpha[t-1][jN] * this->A[jN][iN];
456
+// 				}
457
+// 				alpha[iN] = helpsum * this->tdensities[t][iN];
458
+// 				FILE_LOG(logDEBUG4) << "alpha["<<iN<<"] = " << alpha[iN];
459
+// 				this->scalefactoralpha[t] += alpha[iN];
460
+// 			}
461
+// 			FILE_LOG(logDEBUG4) << "scalefactoralpha["<<t<<"] = " << scalefactoralpha[t];
462
+// 			for (int iN=0; iN<this->N; iN++)
463
+// 			{
464
+// 				this->scalealpha[t][iN] = alpha[iN] / this->scalefactoralpha[t];
465
+// 				FILE_LOG(logDEBUG4) << "scalealpha["<<t<<"]["<<iN<<"] = " << scalealpha[t][iN];
466
+// 				if(isnan(this->scalealpha[t][iN]))
467
+// 				{
468
+// 					FILE_LOG(logWARNING) << __PRETTY_FUNCTION__;
469
+// 					for (int jN=0; jN<this->N; jN++)
470
+// 					{
471
+// 						FILE_LOG(logWARNING) << "scalealpha["<<t-1<<"]["<<jN<<"] = " << scalealpha[t-1][jN];
472
+// 						FILE_LOG(logWARNING) << "A["<<iN<<"]["<<jN<<"] = " << A[iN][jN];
473
+// 					}
474
+// 					FILE_LOG(logWARNING) << "scalefactoralpha["<<t<<"] = "<<scalefactoralpha[t] << ", tdensities = "<<tdensities[t][iN];
475
+// 					FILE_LOG(logWARNING) << "scalealpha["<<t<<"]["<<iN<<"] = " << scalealpha[t][iN];
476
+// 					exit(1);
477
+// 				}
478
+// 			}
479
+// 		}
480
+// 
481
+// 	}
482 482
 
483 483
 	dtime = clock() - time;
484 484
 	FILE_LOG(logDEBUG) << "forward(): " << dtime << " clicks";
... ...
@@ -489,8 +489,8 @@ void ScaleHMM::backward()
489 489
 	FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__;
490 490
 	clock_t time = clock(), dtime;
491 491
 
492
-	if (not this->use_tdens)
493
-	{
492
+// 	if (not this->use_tdens)
493
+// 	{
494 494
 
495 495
 		double beta [this->N];
496 496
 		// Initialization
... ...
@@ -538,57 +538,57 @@ void ScaleHMM::backward()
538 538
 			}
539 539
 		}
540 540
 
541
-	}
542
-	else if (this->use_tdens)
543
-	{
544
-
545
-		double beta [this->N];
546
-		// Initialization
547
-		for (int iN=0; iN<this->N; iN++)
548
-		{
549
-			beta[iN] = 1.0;
550
-			FILE_LOG(logDEBUG4) << "beta["<<iN<<"] = " << beta[iN];
551
-		}
552
-		FILE_LOG(logDEBUG4) << "scalefactoralpha["<<T-1<<"] = " << scalefactoralpha[T-1];
553
-		for (int iN=0; iN<this->N; iN++)
554
-		{
555
-			this->scalebeta[T-1][iN] = beta[iN] / this->scalefactoralpha[T-1];
556
-			FILE_LOG(logDEBUG4) << "scalebeta["<<T-1<<"]["<<iN<<"] = " << scalebeta[T-1][iN];
557
-		}
558
-		// Induction
559
-		for (int t=this->T-2; t>=0; t--)
560
-		{
561
-			for (int iN=0; iN<this->N; iN++)
562
-			{
563
-				FILE_LOG(logDEBUG4) << "Calculating backward variable for state " << iN;
564
-				beta[iN] = 0.0;
565
-				for(int jN=0; jN<this->N; jN++)
566
-				{
567
-					beta[iN] += this->A[iN][jN] * this->tdensities[t+1][jN] * this->scalebeta[t+1][jN];
568
-				}
569
-			}
570
-			FILE_LOG(logDEBUG4) << "scalefactoralpha["<<t<<"] = " << scalefactoralpha[t];
571
-			for (int iN=0; iN<this->N; iN++)
572
-			{
573
-				this->scalebeta[t][iN] = beta[iN] / this->scalefactoralpha[t];
574
-				FILE_LOG(logDEBUG4) << "scalebeta["<<t<<"]["<<iN<<"] = " << scalebeta[t][iN];
575
-				if (isnan(this->scalebeta[t][iN]))
576
-				{
577
-					FILE_LOG(logWARNING) << __PRETTY_FUNCTION__;
578
-					for (int jN=0; jN<this->N; jN++)
579
-					{
580
-						FILE_LOG(logWARNING) << "scalebeta["<<jN<<"]["<<t+1<<"] = " << scalebeta[t+1][jN];
581
-						FILE_LOG(logWARNING) << "A["<<iN<<"]["<<jN<<"] = " << A[iN][jN];
582
-						FILE_LOG(logWARNING) << "tdensities["<<t+1<<"]["<<jN<<"] = " << tdensities[t+1][jN];
583
-					}
584
-					FILE_LOG(logWARNING) << "this->scalefactoralpha[t]["<<t<<"] = "<<this->scalefactoralpha[t] << ", tdensities = "<<densities[t][iN];
585
-					FILE_LOG(logWARNING) << "scalebeta["<<iN<<"]["<<t<<"] = " << scalebeta[t][iN];
586
-					exit(1);
587
-				}
588
-			}
589
-		}
590
-
591
-	}
541
+// 	}
542
+// 	else if (this->use_tdens)
543
+// 	{
544
+// 
545
+// 		double beta [this->N];
546
+// 		// Initialization
547
+// 		for (int iN=0; iN<this->N; iN++)
548
+// 		{
549
+// 			beta[iN] = 1.0;
550
+// 			FILE_LOG(logDEBUG4) << "beta["<<iN<<"] = " << beta[iN];
551
+// 		}
552
+// 		FILE_LOG(logDEBUG4) << "scalefactoralpha["<<T-1<<"] = " << scalefactoralpha[T-1];
553
+// 		for (int iN=0; iN<this->N; iN++)
554
+// 		{
555
+// 			this->scalebeta[T-1][iN] = beta[iN] / this->scalefactoralpha[T-1];
556
+// 			FILE_LOG(logDEBUG4) << "scalebeta["<<T-1<<"]["<<iN<<"] = " << scalebeta[T-1][iN];
557
+// 		}
558
+// 		// Induction
559
+// 		for (int t=this->T-2; t>=0; t--)
560
+// 		{
561
+// 			for (int iN=0; iN<this->N; iN++)
562
+// 			{
563
+// 				FILE_LOG(logDEBUG4) << "Calculating backward variable for state " << iN;
564
+// 				beta[iN] = 0.0;
565
+// 				for(int jN=0; jN<this->N; jN++)
566
+// 				{
567
+// 					beta[iN] += this->A[iN][jN] * this->tdensities[t+1][jN] * this->scalebeta[t+1][jN];
568
+// 				}
569
+// 			}
570
+// 			FILE_LOG(logDEBUG4) << "scalefactoralpha["<<t<<"] = " << scalefactoralpha[t];
571
+// 			for (int iN=0; iN<this->N; iN++)
572
+// 			{
573
+// 				this->scalebeta[t][iN] = beta[iN] / this->scalefactoralpha[t];
574
+// 				FILE_LOG(logDEBUG4) << "scalebeta["<<t<<"]["<<iN<<"] = " << scalebeta[t][iN];
575
+// 				if (isnan(this->scalebeta[t][iN]))
576
+// 				{
577
+// 					FILE_LOG(logWARNING) << __PRETTY_FUNCTION__;
578
+// 					for (int jN=0; jN<this->N; jN++)
579
+// 					{
580
+// 						FILE_LOG(logWARNING) << "scalebeta["<<jN<<"]["<<t+1<<"] = " << scalebeta[t+1][jN];
581
+// 						FILE_LOG(logWARNING) << "A["<<iN<<"]["<<jN<<"] = " << A[iN][jN];
582
+// 						FILE_LOG(logWARNING) << "tdensities["<<t+1<<"]["<<jN<<"] = " << tdensities[t+1][jN];
583
+// 					}
584
+// 					FILE_LOG(logWARNING) << "this->scalefactoralpha[t]["<<t<<"] = "<<this->scalefactoralpha[t] << ", tdensities = "<<densities[t][iN];
585
+// 					FILE_LOG(logWARNING) << "scalebeta["<<iN<<"]["<<t<<"] = " << scalebeta[t][iN];
586
+// 					exit(1);
587
+// 				}
588
+// 			}
589
+// 		}
590
+// 
591
+// 	}
592 592
 
593 593
 	dtime = clock() - time;
594 594
 	FILE_LOG(logDEBUG) << "backward(): " << dtime << " clicks";
... ...
@@ -641,8 +641,8 @@ void ScaleHMM::calc_sumxi()
641 641
 	}	
642 642
 
643 643
 	// Compute the sumxi
644
-	if (not this->use_tdens)
645
-	{
644
+// 	if (not this->use_tdens)
645
+// 	{
646 646
 
647 647
 		#pragma omp parallel for
648 648
 		for (int iN=0; iN<this->N; iN++)
... ...
@@ -658,25 +658,25 @@ void ScaleHMM::calc_sumxi()
658 658
 			}
659 659
 		}
660 660
 
661
-	}
662
-	else if (this->use_tdens)
663
-	{
664
-
665
-		#pragma omp parallel for
666
-		for (int iN=0; iN<this->N; iN++)
667
-		{
668
-			FILE_LOG(logDEBUG3) << "Calculating sumxi["<<iN<<"][jN]";
669
-			for (int t=0; t<this->T-1; t++)
670
-			{
671
-				for (int jN=0; jN<this->N; jN++)
672
-				{
673
-					xi = this->scalealpha[t][iN] * this->A[iN][jN] * this->tdensities[t+1][jN] * this->scalebeta[t+1][jN];
674
-					this->sumxi[iN][jN] += xi;
675
-				}
676
-			}
677
-		}
678
-
679
-	}
661
+// 	}
662
+// 	else if (this->use_tdens)
663
+// 	{
664
+// 
665
+// 		#pragma omp parallel for
666
+// 		for (int iN=0; iN<this->N; iN++)
667
+// 		{
668
+// 			FILE_LOG(logDEBUG3) << "Calculating sumxi["<<iN<<"][jN]";
669
+// 			for (int t=0; t<this->T-1; t++)
670
+// 			{
671
+// 				for (int jN=0; jN<this->N; jN++)
672
+// 				{
673
+// 					xi = this->scalealpha[t][iN] * this->A[iN][jN] * this->tdensities[t+1][jN] * this->scalebeta[t+1][jN];
674
+// 					this->sumxi[iN][jN] += xi;
675
+// 				}
676
+// 			}
677
+// 		}
678
+// 
679
+// 	}
680 680
 
681 681
 	dtime = clock() - time;
682 682
 	FILE_LOG(logDEBUG) << "calc_sumxi(): " << dtime << " clicks";
... ...
@@ -708,18 +708,18 @@ void ScaleHMM::calc_densities()
708 708
 		this->densityFunctions[iN]->calc_densities(this->densities[iN]);
709 709
 	}
710 710
 
711
-	if (this->use_tdens)
712
-	{
713
-
714
-		for (int t=0; t<this->T; t++)
715
-		{
716
-			for (int iN=0; iN<this->N; iN++)
717
-			{
718
-				this->tdensities[t][iN] = this->densities[iN][t];
719
-			}
720
-		}
721
-
722
-	}
711
+// 	if (this->use_tdens)
712
+// 	{
713
+// 
714
+// 		for (int t=0; t<this->T; t++)
715
+// 		{
716
+// 			for (int iN=0; iN<this->N; iN++)
717
+// 			{
718
+// 				this->tdensities[t][iN] = this->densities[iN][t];
719
+// 			}
720
+// 		}
721
+// 
722
+// 	}
723 723
 
724 724
 	dtime = clock() - time;
725 725
 	FILE_LOG(logDEBUG) << "calc_densities(): " << dtime << " clicks";
... ...
@@ -46,7 +46,7 @@ class ScaleHMM  {
46 46
 		double** scalealpha; ///< matrix [T x N] of forward probabilities
47 47
 		double** scalebeta; ///<  matrix [T x N] of backward probabilities
48 48
 		double** densities; ///< matrix [N x T] of density values
49
-		double** tdensities; ///< matrix [T x N] of density values, for use in multivariate
49
+// 		double** tdensities; ///< matrix [T x N] of density values, for use in multivariate !increases speed, but on cost of RAM usage and that seems to be limiting
50 50
 		double* sumgamma; ///< vector[N] of sum of posteriors (gamma values)
51 51
 		double** sumxi; ///< matrix[N x N] of xi values
52 52
 		double** gamma; ///< matrix[N x T] of posteriors
... ...
@@ -55,7 +55,7 @@ class ScaleHMM  {
55 55
 		int baumWelchTime_real; ///< elapsed time from start of the 0th iteration
56 56
 		int sumdiff_state_last; ///< sum of the difference in the state 1 assignments from one iteration to the next
57 57
 		double sumdiff_posterior; ///< sum of the difference in posterior (gamma) values from one iteration to the next
58
-		bool use_tdens; ///< switch for using the tdensities in the calculations
58
+// 		bool use_tdens; ///< switch for using the tdensities in the calculations
59 59
 
60 60
 		// Methods
61 61
 		void forward(); ///< calculate forward variables (alpha)