Browse code

stepsize runs with Aneufinder(strandseq=TRUE)

chakalakka authored on 26/05/2017 11:27:19
Showing1 changed files
... ...
@@ -214,7 +214,7 @@ void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, dou
214 214
 // =====================================================================================================================================================
215 215
 // This function takes parameters from R, creates a multivariate HMM object, runs the EM and returns the result to R.
216 216
 // =====================================================================================================================================================
217
-void multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* algorithm, int* verbosity)
217
+void multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, double* maxPosterior, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* algorithm, int* verbosity)
218 218
 {
219 219
 
220 220
 	// Define logging level {"ERROR", "WARNING", "INFO", "ITERATION", "DEBUG", "DEBUG1", "DEBUG2", "DEBUG3", "DEBUG4"}
... ...
@@ -330,6 +330,7 @@ void multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, in
330 330
 		}
331 331
 		ind_max = std::distance(posterior_per_t.begin(), std::max_element(posterior_per_t.begin(), posterior_per_t.end()));
332 332
 		states[t] = comb_states[ind_max];
333
+		maxPosterior[t] = posterior_per_t[ind_max];
333 334
 	}
334 335
 	
335 336
 	//FILE_LOG(logDEBUG1) << "Return parameters";
Browse code

Aneufinder with stepsize

chakalakka authored on 17/05/2017 12:38:47
Showing1 changed files
... ...
@@ -9,7 +9,7 @@ static double** multiD;
9 9
 // ===================================================================================================================================================
10 10
 // This function takes parameters from R, creates a univariate HMM object, creates the distributions, runs the EM and returns the result to R.
11 11
 // ===================================================================================================================================================
12
-void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, double* maxPosterior, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff, int* algorithm)
12
+void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, double* maxPosterior, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff, int* algorithm, int* verbosity)
13 13
 {
14 14
 
15 15
 	// Define logging level
... ...
@@ -23,34 +23,34 @@ void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, dou
23 23
 
24 24
 	// Print some information
25 25
 	//FILE_LOG(logINFO) << "number of states = " << *N;
26
-	Rprintf("number of states = %d\n", *N);
26
+	if (*verbosity>=1) Rprintf("number of states = %d\n", *N);
27 27
 	//FILE_LOG(logINFO) << "number of bins = " << *T;
28
-	Rprintf("number of bins = %d\n", *T);
28
+	if (*verbosity>=1) Rprintf("number of bins = %d\n", *T);
29 29
 	if (*maxiter < 0)
30 30
 	{
31 31
 		//FILE_LOG(logINFO) << "maximum number of iterations = none";
32
-		Rprintf("maximum number of iterations = none\n");
32
+		if (*verbosity>=1) Rprintf("maximum number of iterations = none\n");
33 33
 	} else {
34 34
 		//FILE_LOG(logINFO) << "maximum number of iterations = " << *maxiter;
35
-		Rprintf("maximum number of iterations = %d\n", *maxiter);
35
+		if (*verbosity>=1) Rprintf("maximum number of iterations = %d\n", *maxiter);
36 36
 	}
37 37
 	if (*maxtime < 0)
38 38
 	{
39 39
 		//FILE_LOG(logINFO) << "maximum running time = none";
40
-		Rprintf("maximum running time = none\n");
40
+		if (*verbosity>=1) Rprintf("maximum running time = none\n");
41 41
 	} else {
42 42
 		//FILE_LOG(logINFO) << "maximum running time = " << *maxtime << " sec";
43
-		Rprintf("maximum running time = %d sec\n", *maxtime);
43
+		if (*verbosity>=1) Rprintf("maximum running time = %d sec\n", *maxtime);
44 44
 	}
45 45
 	//FILE_LOG(logINFO) << "epsilon = " << *eps;
46
-	Rprintf("epsilon = %g\n", *eps);
46
+	if (*verbosity>=1) Rprintf("epsilon = %g\n", *eps);
47 47
 
48 48
 	//FILE_LOG(logDEBUG3) << "observation vector";
49 49
 	for (int t=0; t<50; t++) {
50 50
 		//FILE_LOG(logDEBUG3) << "O["<<t<<"] = " << O[t];
51 51
 	}
52 52
 
53
-	// Flush Rprintf statements to console
53
+	// Flush if (*verbosity>=1) Rprintf statements to console
54 54
 	R_FlushConsole();
55 55
 
56 56
 	// Create the HMM
... ...
@@ -75,7 +75,7 @@ void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, dou
75 75
 	}
76 76
 	variance = variance / *T;
77 77
 	//FILE_LOG(logINFO) << "data mean = " << mean << ", data variance = " << variance;		
78
-	Rprintf("data mean = %g, data variance = %g\n", mean, variance);		
78
+	if (*verbosity>=1) Rprintf("data mean = %g, data variance = %g\n", mean, variance);		
79 79
 	
80 80
 	// Create the emission densities and initialize
81 81
 	for (int i_state=0; i_state<*N; i_state++)
... ...
@@ -112,7 +112,7 @@ void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, dou
112 112
 		}
113 113
 	}
114 114
 
115
-	// Flush Rprintf statements to console
115
+	// Flush if (*verbosity>=1) Rprintf statements to console
116 116
 	R_FlushConsole();
117 117
 
118 118
 	// Do the EM to estimate the parameters
... ...
@@ -132,7 +132,7 @@ void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, dou
132 132
 	catch (std::exception& e)
133 133
 	{
134 134
 		//FILE_LOG(logERROR) << "Error in EM/baumWelch: " << e.what();
135
-		Rprintf("Error in EM/baumWelch: %s\n", e.what());
135
+		if (*verbosity>=1) Rprintf("Error in EM/baumWelch: %s\n", e.what());
136 136
 		if (strcmp(e.what(),"nan detected")==0) { *error = 1; }
137 137
 		else { *error = 2; }
138 138
 	}
... ...
@@ -214,7 +214,7 @@ void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, dou
214 214
 // =====================================================================================================================================================
215 215
 // This function takes parameters from R, creates a multivariate HMM object, runs the EM and returns the result to R.
216 216
 // =====================================================================================================================================================
217
-void multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* algorithm)
217
+void multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* algorithm, int* verbosity)
218 218
 {
219 219
 
220 220
 	// Define logging level {"ERROR", "WARNING", "INFO", "ITERATION", "DEBUG", "DEBUG1", "DEBUG2", "DEBUG3", "DEBUG4"}
... ...
@@ -229,31 +229,31 @@ void multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, in
229 229
 
230 230
 	// Print some information
231 231
 	//FILE_LOG(logINFO) << "number of states = " << *N;
232
-	Rprintf("number of states = %d\n", *N);
232
+	if (*verbosity>=1) Rprintf("number of states = %d\n", *N);
233 233
 	//FILE_LOG(logINFO) << "number of bins = " << *T;
234
-	Rprintf("number of bins = %d\n", *T);
234
+	if (*verbosity>=1) Rprintf("number of bins = %d\n", *T);
235 235
 	if (*maxiter < 0)
236 236
 	{
237 237
 		//FILE_LOG(logINFO) << "maximum number of iterations = none";
238
-		Rprintf("maximum number of iterations = none\n");
238
+		if (*verbosity>=1) Rprintf("maximum number of iterations = none\n");
239 239
 	} else {
240 240
 		//FILE_LOG(logINFO) << "maximum number of iterations = " << *maxiter;
241
-		Rprintf("maximum number of iterations = %d\n", *maxiter);
241
+		if (*verbosity>=1) Rprintf("maximum number of iterations = %d\n", *maxiter);
242 242
 	}
243 243
 	if (*maxtime < 0)
244 244
 	{
245 245
 		//FILE_LOG(logINFO) << "maximum running time = none";
246
-		Rprintf("maximum running time = none\n");
246
+		if (*verbosity>=1) Rprintf("maximum running time = none\n");
247 247
 	} else {
248 248
 		//FILE_LOG(logINFO) << "maximum running time = " << *maxtime << " sec";
249
-		Rprintf("maximum running time = %d sec\n", *maxtime);
249
+		if (*verbosity>=1) Rprintf("maximum running time = %d sec\n", *maxtime);
250 250
 	}
251 251
 	//FILE_LOG(logINFO) << "epsilon = " << *eps;
252
-	Rprintf("epsilon = %g\n", *eps);
252
+	if (*verbosity>=1) Rprintf("epsilon = %g\n", *eps);
253 253
 	//FILE_LOG(logINFO) << "number of modifications = " << *Nmod;
254
-	Rprintf("number of modifications = %d\n", *Nmod);
254
+	if (*verbosity>=1) Rprintf("number of modifications = %d\n", *Nmod);
255 255
 
256
-	// Flush Rprintf statements to console
256
+	// Flush if (*verbosity>=1) Rprintf statements to console
257 257
 	R_FlushConsole();
258 258
 
259 259
 	// Recode the densities vector to matrix representation
... ...
@@ -303,7 +303,7 @@ void multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, in
303 303
 	catch (std::exception& e)
304 304
 	{
305 305
 		//FILE_LOG(logERROR) << "Error in EM/baumWelch: " << e.what();
306
-		Rprintf("Error in EM/baumWelch: %s\n", e.what());
306
+		if (*verbosity>=1) Rprintf("Error in EM/baumWelch: %s\n", e.what());
307 307
 		if (strcmp(e.what(),"nan detected")==0) { *error = 1; }
308 308
 		else { *error = 2; }
309 309
 	}
... ...
@@ -379,10 +379,10 @@ void array2D_which_max(double* array2D, int* dim, int* ind_max, double* value_ma
379 379
     for (int i1=0; i1<dim[1]; i1++)
380 380
     {
381 381
 			value_per_i0[i1] = array2D[i1 * dim[0] + i0];
382
-      // Rprintf("i0=%d, i1=%d, value_per_i0[%d] = %g\n", i0, i1, i1, value_per_i0[i1]);
382
+      // if (*verbosity>=1) Rprintf("i0=%d, i1=%d, value_per_i0[%d] = %g\n", i0, i1, i1, value_per_i0[i1]);
383 383
     }
384 384
 		ind_max[i0] = 1 + std::distance(value_per_i0.begin(), std::max_element(value_per_i0.begin(), value_per_i0.end()));
385 385
     value_max[i0] = *std::max_element(value_per_i0.begin(), value_per_i0.end());
386 386
   }
387 387
 	
388
-}
389 388
\ No newline at end of file
389
+}
Browse code

univariate.findCNVs with stepsize

chakalakka authored on 17/05/2017 11:38:38
Showing1 changed files
... ...
@@ -9,7 +9,7 @@ static double** multiD;
9 9
 // ===================================================================================================================================================
10 10
 // This function takes parameters from R, creates a univariate HMM object, creates the distributions, runs the EM and returns the result to R.
11 11
 // ===================================================================================================================================================
12
-void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff, int* algorithm)
12
+void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, double* maxPosterior, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff, int* algorithm)
13 13
 {
14 14
 
15 15
 	// Define logging level
... ...
@@ -137,16 +137,16 @@ void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, dou
137 137
 		else { *error = 2; }
138 138
 	}
139 139
 
140
-// 	// Compute the posteriors and save results directly to the R pointer
141
-// 	//FILE_LOG(logDEBUG1) << "Recode posteriors into column representation";
142
-// 	#pragma omp parallel for
143
-// 	for (int iN=0; iN<*N; iN++)
144
-// 	{
145
-// 		for (int t=0; t<*T; t++)
146
-// 		{
147
-// 			posteriors[t + iN * (*T)] = hmm->get_posterior(iN, t);
148
-// 		}
149
-// 	}
140
+	// // Compute the posteriors and save results directly to the R pointer
141
+	// //FILE_LOG(logDEBUG1) << "Recode posteriors into column representation";
142
+	// #pragma omp parallel for
143
+	// for (int iN=0; iN<*N; iN++)
144
+	// {
145
+	// 	for (int t=0; t<*T; t++)
146
+	// 	{
147
+	// 		posteriors[t + iN * (*T)] = hmm->get_posterior(iN, t);
148
+	// 	}
149
+	// }
150 150
 
151 151
 	// Compute the states from posteriors
152 152
 	//FILE_LOG(logDEBUG1) << "Computing states from posteriors";
... ...
@@ -160,6 +160,7 @@ void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, dou
160 160
 		}
161 161
 		ind_max = std::distance(posterior_per_t.begin(), std::max_element(posterior_per_t.begin(), posterior_per_t.end()));
162 162
 		states[t] = state_labels[ind_max];
163
+		maxPosterior[t] = posterior_per_t[ind_max];
163 164
 	}
164 165
 
165 166
 	//FILE_LOG(logDEBUG1) << "Return parameters";
... ...
@@ -365,3 +366,23 @@ void multivariate_cleanup(int* N)
365 366
 	FreeDoubleMatrix(multiD, *N);
366 367
 }
367 368
 
369
+
370
+// ====================================================================
371
+// C version of apply(array2D, 1, which.max) and apply(array2D, 1, max)
372
+// ====================================================================
373
+void array2D_which_max(double* array2D, int* dim, int* ind_max, double* value_max)
374
+{
375
+  // array2D is actually a vector, but is intended to originate from a 2D array in R
376
+	std::vector<double> value_per_i0(dim[1]);
377
+  for (int i0=0; i0<dim[0]; i0++)
378
+  {
379
+    for (int i1=0; i1<dim[1]; i1++)
380
+    {
381
+			value_per_i0[i1] = array2D[i1 * dim[0] + i0];
382
+      // Rprintf("i0=%d, i1=%d, value_per_i0[%d] = %g\n", i0, i1, i1, value_per_i0[i1]);
383
+    }
384
+		ind_max[i0] = 1 + std::distance(value_per_i0.begin(), std::max_element(value_per_i0.begin(), value_per_i0.end()));
385
+    value_max[i0] = *std::max_element(value_per_i0.begin(), value_per_i0.end());
386
+  }
387
+	
388
+}
368 389
\ No newline at end of file
Browse code

fixed BiocCheck Recommended issues

chakalakka authored on 15/03/2016 16:50:38
Showing1 changed files
... ...
@@ -9,8 +9,7 @@ static double** multiD;
9 9
 // ===================================================================================================================================================
10 10
 // This function takes parameters from R, creates a univariate HMM object, creates the distributions, runs the EM and returns the result to R.
11 11
 // ===================================================================================================================================================
12
-extern "C" {
13
-void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff, int* algorithm)
12
+void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff, int* algorithm)
14 13
 {
15 14
 
16 15
 	// Define logging level
... ...
@@ -209,14 +208,12 @@ void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, d
209 208
 	delete hmm;
210 209
 	hmm = NULL; // assign NULL to defuse the additional delete in on.exit() call
211 210
 }
212
-} //extern
213 211
 
214 212
 
215 213
 // =====================================================================================================================================================
216 214
 // This function takes parameters from R, creates a multivariate HMM object, runs the EM and returns the result to R.
217 215
 // =====================================================================================================================================================
218
-extern "C" {
219
-void R_multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* algorithm)
216
+void multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* algorithm)
220 217
 {
221 218
 
222 219
 	// Define logging level {"ERROR", "WARNING", "INFO", "ITERATION", "DEBUG", "DEBUG1", "DEBUG2", "DEBUG3", "DEBUG4"}
... ...
@@ -351,25 +348,20 @@ void R_multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states,
351 348
 	hmm = NULL; // assign NULL to defuse the additional delete in on.exit() call
352 349
 // 	FreeDoubleMatrix(multiD, *N);
353 350
 }
354
-} //extern
355 351
 
356 352
 
357 353
 // =======================================================
358 354
 // This function make a cleanup if anything was left over
359 355
 // =======================================================
360
-extern "C" {
361
-void R_univariate_cleanup()
356
+void univariate_cleanup()
362 357
 {
363 358
 // 	//FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__; // This message will be shown if interrupt happens before start of C-code
364 359
 	delete hmm;
365 360
 }
366
-} //extern
367 361
 
368
-extern "C" {
369
-void R_multivariate_cleanup(int* N)
362
+void multivariate_cleanup(int* N)
370 363
 {
371 364
 	delete hmm;
372 365
 	FreeDoubleMatrix(multiD, *N);
373 366
 }
374
-} //extern
375 367
 
Browse code

removed C++ parallelization

chakalakka authored on 02/03/2016 10:16:24
Showing1 changed files
... ...
@@ -20,7 +20,7 @@ void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, d
20 20
 //  	FILELog::ReportingLevel() = FILELog::FromString("DEBUG2");
21 21
 
22 22
 // 	// Parallelization settings
23
-	omp_set_num_threads(*num_threads);
23
+// 	omp_set_num_threads(*num_threads);
24 24
 
25 25
 	// Print some information
26 26
 	//FILE_LOG(logINFO) << "number of states = " << *N;
... ...
@@ -227,7 +227,7 @@ void R_multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states,
227 227
 //  	FILELog::ReportingLevel() = FILELog::FromString("ERROR");
228 228
 
229 229
 	// Parallelization settings
230
-	omp_set_num_threads(*num_threads);
230
+// 	omp_set_num_threads(*num_threads);
231 231
 
232 232
 	// Print some information
233 233
 	//FILE_LOG(logINFO) << "number of states = " << *N;
Browse code

added variable-width bins, timedMessage, simulateReads

chakalakka authored on 17/02/2016 09:09:42
Showing1 changed files
... ...
@@ -1,11 +1,7 @@
1 1
 
2 2
 
3 3
 
4
-#include "utility.h"
5
-#include "scalehmm.h"
6
-#include "loghmm.h"
7
-#include <omp.h> // parallelization options
8
-#include <string> // strcmp
4
+#include "R_interface.h"
9 5
 
10 6
 static ScaleHMM* hmm; // declare as static outside the function because we only need one and this enables memory-cleanup on R_CheckUserInterrupt()
11 7
 static double** multiD;
... ...
@@ -213,7 +209,7 @@ void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, d
213 209
 	delete hmm;
214 210
 	hmm = NULL; // assign NULL to defuse the additional delete in on.exit() call
215 211
 }
216
-} // extern C
212
+} //extern
217 213
 
218 214
 
219 215
 // =====================================================================================================================================================
... ...
@@ -355,7 +351,7 @@ void R_multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states,
355 351
 	hmm = NULL; // assign NULL to defuse the additional delete in on.exit() call
356 352
 // 	FreeDoubleMatrix(multiD, *N);
357 353
 }
358
-} // extern C
354
+} //extern
359 355
 
360 356
 
361 357
 // =======================================================
... ...
@@ -367,7 +363,7 @@ void R_univariate_cleanup()
367 363
 // 	//FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__; // This message will be shown if interrupt happens before start of C-code
368 364
 	delete hmm;
369 365
 }
370
-}
366
+} //extern
371 367
 
372 368
 extern "C" {
373 369
 void R_multivariate_cleanup(int* N)
... ...
@@ -375,5 +371,5 @@ void R_multivariate_cleanup(int* N)
375 371
 	delete hmm;
376 372
 	FreeDoubleMatrix(multiD, *N);
377 373
 }
378
-}
374
+} //extern
379 375
 
Browse code

improved SCE calling and added baumWelch/EM option

chakalakka authored on 11/10/2015 10:31:37
Showing1 changed files
... ...
@@ -11,10 +11,10 @@ static ScaleHMM* hmm; // declare as static outside the function because we only
11 11
 static double** multiD;
12 12
 
13 13
 // ===================================================================================================================================================
14
-// This function takes parameters from R, creates a univariate HMM object, creates the distributions, runs the Baum-Welch and returns the result to R.
14
+// This function takes parameters from R, creates a univariate HMM object, creates the distributions, runs the EM and returns the result to R.
15 15
 // ===================================================================================================================================================
16 16
 extern "C" {
17
-void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff)
17
+void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff, int* algorithm)
18 18
 {
19 19
 
20 20
 	// Define logging level
... ...
@@ -120,21 +120,27 @@ void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, d
120 120
 	// Flush Rprintf statements to console
121 121
 	R_FlushConsole();
122 122
 
123
-	// Do the Baum-Welch to estimate the parameters
124
-	//FILE_LOG(logDEBUG1) << "Starting Baum-Welch estimation";
123
+	// Do the EM to estimate the parameters
125 124
 	try
126 125
 	{
127
-		hmm->baumWelch(maxiter, maxtime, eps);
126
+		if (*algorithm == 1)
127
+		{
128
+			hmm->baumWelch();
129
+		}
130
+		else if (*algorithm == 3)
131
+		{
132
+			//FILE_LOG(logDEBUG1) << "Starting EM estimation";
133
+			hmm->EM(maxiter, maxtime, eps);
134
+			//FILE_LOG(logDEBUG1) << "Finished with EM estimation";
135
+		}
128 136
 	}
129 137
 	catch (std::exception& e)
130 138
 	{
131
-		//FILE_LOG(logERROR) << "Error in Baum-Welch: " << e.what();
132
-		Rprintf("Error in Baum-Welch: %s\n", e.what());
139
+		//FILE_LOG(logERROR) << "Error in EM/baumWelch: " << e.what();
140
+		Rprintf("Error in EM/baumWelch: %s\n", e.what());
133 141
 		if (strcmp(e.what(),"nan detected")==0) { *error = 1; }
134 142
 		else { *error = 2; }
135 143
 	}
136
-		
137
-	//FILE_LOG(logDEBUG1) << "Finished with Baum-Welch estimation";
138 144
 
139 145
 // 	// Compute the posteriors and save results directly to the R pointer
140 146
 // 	//FILE_LOG(logDEBUG1) << "Recode posteriors into column representation";
... ...
@@ -211,10 +217,10 @@ void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, d
211 217
 
212 218
 
213 219
 // =====================================================================================================================================================
214
-// This function takes parameters from R, creates a multivariate HMM object, runs the Baum-Welch and returns the result to R.
220
+// This function takes parameters from R, creates a multivariate HMM object, runs the EM and returns the result to R.
215 221
 // =====================================================================================================================================================
216 222
 extern "C" {
217
-void R_multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error)
223
+void R_multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* algorithm)
218 224
 {
219 225
 
220 226
 	// Define logging level {"ERROR", "WARNING", "INFO", "ITERATION", "DEBUG", "DEBUG1", "DEBUG2", "DEBUG3", "DEBUG4"}
... ...
@@ -286,20 +292,27 @@ void R_multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states,
286 292
 // 		}
287 293
 // 	}
288 294
 
289
-	// Estimate the parameters
290
-	//FILE_LOG(logDEBUG1) << "Starting Baum-Welch estimation";
295
+	// Do the EM to estimate the parameters
291 296
 	try
292 297
 	{
293
-		hmm->baumWelch(maxiter, maxtime, eps);
298
+		if (*algorithm == 1)
299
+		{
300
+			hmm->baumWelch();
301
+		}
302
+		else if (*algorithm == 3)
303
+		{
304
+			//FILE_LOG(logDEBUG1) << "Starting EM estimation";
305
+			hmm->EM(maxiter, maxtime, eps);
306
+			//FILE_LOG(logDEBUG1) << "Finished with EM estimation";
307
+		}
294 308
 	}
295 309
 	catch (std::exception& e)
296 310
 	{
297
-		//FILE_LOG(logERROR) << "Error in Baum-Welch: " << e.what();
298
-		Rprintf("Error in Baum-Welch: %s\n", e.what());
311
+		//FILE_LOG(logERROR) << "Error in EM/baumWelch: " << e.what();
312
+		Rprintf("Error in EM/baumWelch: %s\n", e.what());
299 313
 		if (strcmp(e.what(),"nan detected")==0) { *error = 1; }
300 314
 		else { *error = 2; }
301 315
 	}
302
-	//FILE_LOG(logDEBUG1) << "Finished with Baum-Welch estimation";
303 316
 	
304 317
 // 	// Compute the posteriors and save results directly to the R pointer
305 318
 // 	//FILE_LOG(logDEBUG1) << "Recode posteriors into column representation";
Browse code

bugfix usage

chakalakka authored on 23/09/2015 12:42:18
Showing1 changed files
... ...
@@ -1,18 +1,3 @@
1
-//aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
-//Copyright (C) 2015  Aaron Taudt
3
-//
4
-//This program is free software: you can redistribute it and/or modify
5
-//it under the terms of the GNU General Public License as published by
6
-//the Free Software Foundation, either version 3 of the License, or
7
-//(at your option) any later version.
8
-//
9
-//This program is distributed in the hope that it will be useful,
10
-//but WITHOUT ANY WARRANTY; without even the implied warranty of
11
-//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
-//GNU General Public License for more details.
13
-//
14
-//You should have received a copy of the GNU General Public License
15
-//along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 1
 
17 2
 
18 3
 
Browse code

adjusted licensing to GPLv3

chakalakka authored on 19/09/2015 14:26:38
Showing1 changed files
... ...
@@ -1,3 +1,21 @@
1
+//aneufinder - An R-package for CNV detection in whole-genome single cell sequencing data
2
+//Copyright (C) 2015  Aaron Taudt
3
+//
4
+//This program is free software: you can redistribute it and/or modify
5
+//it under the terms of the GNU General Public License as published by
6
+//the Free Software Foundation, either version 3 of the License, or
7
+//(at your option) any later version.
8
+//
9
+//This program is distributed in the hope that it will be useful,
10
+//but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+//GNU General Public License for more details.
13
+//
14
+//You should have received a copy of the GNU General Public License
15
+//along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
+
17
+
18
+
1 19
 #include "utility.h"
2 20
 #include "scalehmm.h"
3 21
 #include "loghmm.h"
Browse code

added findSCEs

chakalakka authored on 28/04/2015 09:51:45
Showing1 changed files
... ...
@@ -11,7 +11,7 @@ static double** multiD;
11 11
 // This function takes parameters from R, creates a univariate HMM object, creates the distributions, runs the Baum-Welch and returns the result to R.
12 12
 // ===================================================================================================================================================
13 13
 extern "C" {
14
-void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, double* lambda, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, double* initial_size, double* initial_prob, double* initial_lambda, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff)
14
+void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff)
15 15
 {
16 16
 
17 17
 	// Define logging level
... ...
@@ -101,12 +101,6 @@ void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, d
101 101
 			hmm->densityFunctions.push_back(d);
102 102
 		}
103 103
 		else if (distr_type[i_state] == 4)
104
-		{
105
-			//FILE_LOG(logDEBUG1) << "Using poisson for state " << i_state;
106
-			Poisson *d = new Poisson(O, *T, initial_lambda[i_state]); // delete is done inside ~ScaleHMM()
107
-			hmm->densityFunctions.push_back(d);
108
-		}
109
-		else if (distr_type[i_state] == 5)
110 104
 		{
111 105
 			//FILE_LOG(logDEBUG1) << "Using binomial for state " << i_state;
112 106
 			NegativeBinomial *d = new NegativeBinomial(O, *T, initial_size[i_state], initial_prob[i_state]); // delete is done inside ~ScaleHMM()
... ...
@@ -196,11 +190,6 @@ void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, d
196 190
 			size[i] = 0;
197 191
 			prob[i] = 1;
198 192
 		}
199
-		else if (hmm->densityFunctions[i]->get_name() == POISSON)
200
-		{
201
-			Poisson* d = (Poisson*)(hmm->densityFunctions[i]);
202
-			lambda[i] = d->get_lambda();
203
-		}
204 193
 		else if (hmm->densityFunctions[i]->get_name() == BINOMIAL) 
205 194
 		{
206 195
 			Binomial* d = (Binomial*)(hmm->densityFunctions[i]);
Browse code

compile is passed without warnings

chakalakka authored on 16/02/2015 09:50:42
Showing1 changed files
... ...
@@ -1,59 +1,63 @@
1 1
 #include "utility.h"
2 2
 #include "scalehmm.h"
3 3
 #include "loghmm.h"
4
-// #include <omp.h> // parallelization options
4
+#include <omp.h> // parallelization options
5
+#include <string> // strcmp
6
+
7
+static ScaleHMM* hmm; // declare as static outside the function because we only need one and this enables memory-cleanup on R_CheckUserInterrupt()
8
+static double** multiD;
5 9
 
6 10
 // ===================================================================================================================================================
7 11
 // This function takes parameters from R, creates a univariate HMM object, creates the distributions, runs the Baum-Welch and returns the result to R.
8 12
 // ===================================================================================================================================================
9 13
 extern "C" {
10
-void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, double* lambda, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, int* iniproc, double* initial_size, double* initial_prob, double* initial_lambda, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff)
14
+void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, double* lambda, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, double* initial_size, double* initial_prob, double* initial_lambda, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff)
11 15
 {
12 16
 
13 17
 	// Define logging level
14 18
 // 	FILE* pFile = fopen("chromStar.log", "w");
15 19
 // 	Output2FILE::Stream() = pFile;
16
- 	FILELog::ReportingLevel() = FILELog::FromString("NONE");
20
+//  	FILELog::ReportingLevel() = FILELog::FromString("NONE");
17 21
 //  	FILELog::ReportingLevel() = FILELog::FromString("DEBUG2");
18 22
 
19 23
 // 	// Parallelization settings
20
-// 	omp_set_num_threads(*num_threads);
24
+	omp_set_num_threads(*num_threads);
21 25
 
22 26
 	// Print some information
23
-	FILE_LOG(logINFO) << "number of states = " << *N;
27
+	//FILE_LOG(logINFO) << "number of states = " << *N;
24 28
 	Rprintf("number of states = %d\n", *N);
25
-	FILE_LOG(logINFO) << "number of bins = " << *T;
29
+	//FILE_LOG(logINFO) << "number of bins = " << *T;
26 30
 	Rprintf("number of bins = %d\n", *T);
27 31
 	if (*maxiter < 0)
28 32
 	{
29
-		FILE_LOG(logINFO) << "maximum number of iterations = none";
33
+		//FILE_LOG(logINFO) << "maximum number of iterations = none";
30 34
 		Rprintf("maximum number of iterations = none\n");
31 35
 	} else {
32
-		FILE_LOG(logINFO) << "maximum number of iterations = " << *maxiter;
36
+		//FILE_LOG(logINFO) << "maximum number of iterations = " << *maxiter;
33 37
 		Rprintf("maximum number of iterations = %d\n", *maxiter);
34 38
 	}
35 39
 	if (*maxtime < 0)
36 40
 	{
37
-		FILE_LOG(logINFO) << "maximum running time = none";
41
+		//FILE_LOG(logINFO) << "maximum running time = none";
38 42
 		Rprintf("maximum running time = none\n");
39 43
 	} else {
40
-		FILE_LOG(logINFO) << "maximum running time = " << *maxtime << " sec";
44
+		//FILE_LOG(logINFO) << "maximum running time = " << *maxtime << " sec";
41 45
 		Rprintf("maximum running time = %d sec\n", *maxtime);
42 46
 	}
43
-	FILE_LOG(logINFO) << "epsilon = " << *eps;
47
+	//FILE_LOG(logINFO) << "epsilon = " << *eps;
44 48
 	Rprintf("epsilon = %g\n", *eps);
45 49
 
46
-	FILE_LOG(logDEBUG3) << "observation vector";
50
+	//FILE_LOG(logDEBUG3) << "observation vector";
47 51
 	for (int t=0; t<50; t++) {
48
-		FILE_LOG(logDEBUG3) << "O["<<t<<"] = " << O[t];
52
+		//FILE_LOG(logDEBUG3) << "O["<<t<<"] = " << O[t];
49 53
 	}
50 54
 
51 55
 	// Flush Rprintf statements to console
52 56
 	R_FlushConsole();
53 57
 
54 58
 	// Create the HMM
55
-	FILE_LOG(logDEBUG1) << "Creating a univariate HMM";
56
-	ScaleHMM* hmm = new ScaleHMM(*T, *N);
59
+	//FILE_LOG(logDEBUG1) << "Creating a univariate HMM";
60
+	hmm = new ScaleHMM(*T, *N);
57 61
 // 	LogHMM* hmm = new LogHMM(*T, *N);
58 62
 	hmm->set_cutoff(*read_cutoff);
59 63
 	// Initialize the transition probabilities and proba
... ...
@@ -72,7 +76,7 @@ void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, d
72 76
 		variance+= pow(O[t] - mean, 2);
73 77
 	}
74 78
 	variance = variance / *T;
75
-	FILE_LOG(logINFO) << "data mean = " << mean << ", data variance = " << variance;		
79
+	//FILE_LOG(logINFO) << "data mean = " << mean << ", data variance = " << variance;		
76 80
 	Rprintf("data mean = %g, data variance = %g\n", mean, variance);		
77 81
 	
78 82
 	// Create the emission densities and initialize
... ...
@@ -80,37 +84,37 @@ void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, d
80 84
 	{
81 85
 		if (distr_type[i_state] == 1)
82 86
 		{
83
-			FILE_LOG(logDEBUG1) << "Using delta distribution for state " << i_state;
87
+			//FILE_LOG(logDEBUG1) << "Using delta distribution for state " << i_state;
84 88
 			ZeroInflation *d = new ZeroInflation(O, *T); // delete is done inside ~ScaleHMM()
85 89
 			hmm->densityFunctions.push_back(d);
86 90
 		}
87 91
 		else if (distr_type[i_state] == 2)
88 92
 		{
89
-			FILE_LOG(logDEBUG1) << "Using geometric distribution for state " << i_state;
93
+			//FILE_LOG(logDEBUG1) << "Using geometric distribution for state " << i_state;
90 94
 			Geometric *d = new Geometric(O, *T, initial_prob[i_state]); // delete is done inside ~ScaleHMM()
91 95
 			hmm->densityFunctions.push_back(d);
92 96
 		}
93 97
 		else if (distr_type[i_state] == 3)
94 98
 		{
95
-			FILE_LOG(logDEBUG1) << "Using negative binomial for state " << i_state;
99
+			//FILE_LOG(logDEBUG1) << "Using negative binomial for state " << i_state;
96 100
 			NegativeBinomial *d = new NegativeBinomial(O, *T, initial_size[i_state], initial_prob[i_state]); // delete is done inside ~ScaleHMM()
97 101
 			hmm->densityFunctions.push_back(d);
98 102
 		}
99 103
 		else if (distr_type[i_state] == 4)
100 104
 		{
101
-			FILE_LOG(logDEBUG1) << "Using poisson for state " << i_state;
105
+			//FILE_LOG(logDEBUG1) << "Using poisson for state " << i_state;
102 106
 			Poisson *d = new Poisson(O, *T, initial_lambda[i_state]); // delete is done inside ~ScaleHMM()
103 107
 			hmm->densityFunctions.push_back(d);
104 108
 		}
105 109
 		else if (distr_type[i_state] == 5)
106 110
 		{
107
-			FILE_LOG(logDEBUG1) << "Using binomial for state " << i_state;
111
+			//FILE_LOG(logDEBUG1) << "Using binomial for state " << i_state;
108 112
 			NegativeBinomial *d = new NegativeBinomial(O, *T, initial_size[i_state], initial_prob[i_state]); // delete is done inside ~ScaleHMM()
109 113
 			hmm->densityFunctions.push_back(d);
110 114
 		}
111 115
 		else
112 116
 		{
113
-			FILE_LOG(logWARNING) << "Density not specified, using default negative binomial for state " << i_state;
117
+			//FILE_LOG(logWARNING) << "Density not specified, using default negative binomial for state " << i_state;
114 118
 			NegativeBinomial *d = new NegativeBinomial(O, *T, initial_size[i_state], initial_prob[i_state]);
115 119
 			hmm->densityFunctions.push_back(d);
116 120
 		}
... ...
@@ -120,23 +124,23 @@ void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, d
120 124
 	R_FlushConsole();
121 125
 
122 126
 	// Do the Baum-Welch to estimate the parameters
123
-	FILE_LOG(logDEBUG1) << "Starting Baum-Welch estimation";
127
+	//FILE_LOG(logDEBUG1) << "Starting Baum-Welch estimation";
124 128
 	try
125 129
 	{
126 130
 		hmm->baumWelch(maxiter, maxtime, eps);
127 131
 	}
128 132
 	catch (std::exception& e)
129 133
 	{
130
-		FILE_LOG(logERROR) << "Error in Baum-Welch: " << e.what();
134
+		//FILE_LOG(logERROR) << "Error in Baum-Welch: " << e.what();
131 135
 		Rprintf("Error in Baum-Welch: %s\n", e.what());
132
-		if (e.what()=="nan detected") { *error = 1; }
136
+		if (strcmp(e.what(),"nan detected")==0) { *error = 1; }
133 137
 		else { *error = 2; }
134 138
 	}
135 139
 		
136
-	FILE_LOG(logDEBUG1) << "Finished with Baum-Welch estimation";
140
+	//FILE_LOG(logDEBUG1) << "Finished with Baum-Welch estimation";
137 141
 
138 142
 // 	// Compute the posteriors and save results directly to the R pointer
139
-// 	FILE_LOG(logDEBUG1) << "Recode posteriors into column representation";
143
+// 	//FILE_LOG(logDEBUG1) << "Recode posteriors into column representation";
140 144
 // 	#pragma omp parallel for
141 145
 // 	for (int iN=0; iN<*N; iN++)
142 146
 // 	{
... ...
@@ -147,18 +151,20 @@ void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, d
147 151
 // 	}
148 152
 
149 153
 	// Compute the states from posteriors
150
-	FILE_LOG(logDEBUG1) << "Computing states from posteriors";
151
-	double posterior_per_t [*N];
154
+	//FILE_LOG(logDEBUG1) << "Computing states from posteriors";
155
+	int ind_max;
156
+	std::vector<double> posterior_per_t(*N);
152 157
 	for (int t=0; t<*T; t++)
153 158
 	{
154 159
 		for (int iN=0; iN<*N; iN++)
155 160
 		{
156 161
 			posterior_per_t[iN] = hmm->get_posterior(iN, t);
157 162
 		}
158
-		states[t] = state_labels[argMax(posterior_per_t, *N)];
163
+		ind_max = std::distance(posterior_per_t.begin(), std::max_element(posterior_per_t.begin(), posterior_per_t.end()));
164
+		states[t] = state_labels[ind_max];
159 165
 	}
160 166
 
161
-	FILE_LOG(logDEBUG1) << "Return parameters";
167
+	//FILE_LOG(logDEBUG1) << "Return parameters";
162 168
 	// also return the estimated transition matrix and the initial probs
163 169
 	for (int i=0; i<*N; i++)
164 170
 	{
... ...
@@ -205,8 +211,9 @@ void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, d
205 211
 	*loglik = hmm->get_logP();
206 212
 	hmm->calc_weights(weights);
207 213
 	
208
-	FILE_LOG(logDEBUG1) << "Deleting the hmm";
214
+	//FILE_LOG(logDEBUG1) << "Deleting the hmm";
209 215
 	delete hmm;
216
+	hmm = NULL; // assign NULL to defuse the additional delete in on.exit() call
210 217
 }
211 218
 } // extern C
212 219
 
... ...
@@ -223,35 +230,35 @@ void R_multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states,
223 230
 // 	Output2FILE::Stream() = pFile;
224 231
 //  	FILELog::ReportingLevel() = FILELog::FromString("ITERATION");
225 232
 //  	FILELog::ReportingLevel() = FILELog::FromString("DEBUG2");
226
- 	FILELog::ReportingLevel() = FILELog::FromString("ERROR");
233
+//  	FILELog::ReportingLevel() = FILELog::FromString("ERROR");
227 234
 
228 235
 	// Parallelization settings
229
-// 	omp_set_num_threads(*num_threads);
236
+	omp_set_num_threads(*num_threads);
230 237
 
231 238
 	// Print some information
232
-	FILE_LOG(logINFO) << "number of states = " << *N;
239
+	//FILE_LOG(logINFO) << "number of states = " << *N;
233 240
 	Rprintf("number of states = %d\n", *N);
234
-	FILE_LOG(logINFO) << "number of bins = " << *T;
241
+	//FILE_LOG(logINFO) << "number of bins = " << *T;
235 242
 	Rprintf("number of bins = %d\n", *T);
236 243
 	if (*maxiter < 0)
237 244
 	{
238
-		FILE_LOG(logINFO) << "maximum number of iterations = none";
245
+		//FILE_LOG(logINFO) << "maximum number of iterations = none";
239 246
 		Rprintf("maximum number of iterations = none\n");
240 247
 	} else {
241
-		FILE_LOG(logINFO) << "maximum number of iterations = " << *maxiter;
248
+		//FILE_LOG(logINFO) << "maximum number of iterations = " << *maxiter;
242 249
 		Rprintf("maximum number of iterations = %d\n", *maxiter);
243 250
 	}
244 251
 	if (*maxtime < 0)
245 252
 	{
246
-		FILE_LOG(logINFO) << "maximum running time = none";
253
+		//FILE_LOG(logINFO) << "maximum running time = none";
247 254
 		Rprintf("maximum running time = none\n");
248 255
 	} else {
249
-		FILE_LOG(logINFO) << "maximum running time = " << *maxtime << " sec";
256
+		//FILE_LOG(logINFO) << "maximum running time = " << *maxtime << " sec";
250 257
 		Rprintf("maximum running time = %d sec\n", *maxtime);
251 258
 	}
252
-	FILE_LOG(logINFO) << "epsilon = " << *eps;
259
+	//FILE_LOG(logINFO) << "epsilon = " << *eps;
253 260
 	Rprintf("epsilon = %g\n", *eps);
254
-	FILE_LOG(logINFO) << "number of modifications = " << *Nmod;
261
+	//FILE_LOG(logINFO) << "number of modifications = " << *Nmod;
255 262
 	Rprintf("number of modifications = %d\n", *Nmod);
256 263
 
257 264
 	// Flush Rprintf statements to console
... ...
@@ -259,7 +266,7 @@ void R_multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states,
259 266
 
260 267
 	// Recode the densities vector to matrix representation
261 268
 // 	clock_t clocktime = clock(), dtime;
262
-	double** multiD = allocDoubleMatrix(*N, *T);
269
+	multiD = CallocDoubleMatrix(*N, *T);
263 270
 	for (int iN=0; iN<*N; iN++)
264 271
 	{
265 272
 		for (int t=0; t<*T; t++)
... ...
@@ -268,11 +275,11 @@ void R_multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states,
268 275
 		}
269 276
 	}
270 277
 // 	dtime = clock() - clocktime;
271
-// 	FILE_LOG(logDEBUG1) << "recoding densities vector to matrix representation: " << dtime << " clicks";
278
+// 	//FILE_LOG(logDEBUG1) << "recoding densities vector to matrix representation: " << dtime << " clicks";
272 279
 
273 280
 	// Create the HMM
274
-	FILE_LOG(logDEBUG1) << "Creating the multivariate HMM";
275
-	ScaleHMM* hmm = new ScaleHMM(*T, *N, *Nmod, multiD);
281
+	//FILE_LOG(logDEBUG1) << "Creating the multivariate HMM";
282
+	hmm = new ScaleHMM(*T, *N, *Nmod, multiD);
276 283
 	// Initialize the transition probabilities and proba
277 284
 	hmm->initialize_transition_probs(initial_A, *use_initial_params);
278 285
 	hmm->initialize_proba(initial_proba, *use_initial_params);
... ...
@@ -280,30 +287,30 @@ void R_multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states,
280 287
 	// Print logproba and A
281 288
 // 	for (int iN=0; iN<*N; iN++)
282 289
 // 	{
283
-// 		FILE_LOG(logDEBUG) << "proba["<<iN<<"] = " <<exp(hmm->logproba[iN]);
290
+// 		//FILE_LOG(logDEBUG) << "proba["<<iN<<"] = " <<exp(hmm->logproba[iN]);
284 291
 // 		for (int jN=0; jN<*N; jN++)
285 292
 // 		{
286
-// 			FILE_LOG(logDEBUG) << "A["<<iN<<"]["<<jN<<"] = " << hmm->A[iN][jN];
293
+// 			//FILE_LOG(logDEBUG) << "A["<<iN<<"]["<<jN<<"] = " << hmm->A[iN][jN];
287 294
 // 		}
288 295
 // 	}
289 296
 
290 297
 	// Estimate the parameters
291
-	FILE_LOG(logDEBUG1) << "Starting Baum-Welch estimation";
298
+	//FILE_LOG(logDEBUG1) << "Starting Baum-Welch estimation";
292 299
 	try
293 300
 	{
294 301
 		hmm->baumWelch(maxiter, maxtime, eps);
295 302
 	}
296 303
 	catch (std::exception& e)
297 304
 	{
298
-		FILE_LOG(logERROR) << "Error in Baum-Welch: " << e.what();
305
+		//FILE_LOG(logERROR) << "Error in Baum-Welch: " << e.what();
299 306
 		Rprintf("Error in Baum-Welch: %s\n", e.what());
300
-		if (e.what()=="nan detected") { *error = 1; }
307
+		if (strcmp(e.what(),"nan detected")==0) { *error = 1; }
301 308
 		else { *error = 2; }
302 309
 	}
303
-	FILE_LOG(logDEBUG1) << "Finished with Baum-Welch estimation";
310
+	//FILE_LOG(logDEBUG1) << "Finished with Baum-Welch estimation";
304 311
 	
305 312
 // 	// Compute the posteriors and save results directly to the R pointer
306
-// 	FILE_LOG(logDEBUG1) << "Recode posteriors into column representation";
313
+// 	//FILE_LOG(logDEBUG1) << "Recode posteriors into column representation";
307 314
 // 	for (int iN=0; iN<*N; iN++)
308 315
 // 	{
309 316
 // 		for (int t=0; t<*T; t++)
... ...
@@ -313,18 +320,20 @@ void R_multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states,
313 320
 // 	}
314 321
 
315 322
 	// Compute the states from posteriors
316
-	FILE_LOG(logDEBUG1) << "Computing states from posteriors";
317
-	double posterior_per_t [*N];
323
+	//FILE_LOG(logDEBUG1) << "Computing states from posteriors";
324
+	int ind_max;
325
+	std::vector<double> posterior_per_t(*N);
318 326
 	for (int t=0; t<*T; t++)
319 327
 	{
320 328
 		for (int iN=0; iN<*N; iN++)
321 329
 		{
322 330
 			posterior_per_t[iN] = hmm->get_posterior(iN, t);
323 331
 		}
324
-		states[t] = comb_states[argMax(posterior_per_t, *N)];
332
+		ind_max = std::distance(posterior_per_t.begin(), std::max_element(posterior_per_t.begin(), posterior_per_t.end()));
333
+		states[t] = comb_states[ind_max];
325 334
 	}
326 335
 	
327
-	FILE_LOG(logDEBUG1) << "Return parameters";
336
+	//FILE_LOG(logDEBUG1) << "Return parameters";
328 337
 	// also return the estimated transition matrix and the initial probs
329 338
 	for (int i=0; i<*N; i++)
330 339
 	{
... ...
@@ -336,9 +345,30 @@ void R_multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states,
336 345
 	}
337 346
 	*loglik = hmm->get_logP();
338 347
 
339
-	FILE_LOG(logDEBUG1) << "Deleting the hmm";
348
+	//FILE_LOG(logDEBUG1) << "Deleting the hmm";
340 349
 	delete hmm;
341
-	freeDoubleMatrix(multiD, *N);
350
+	hmm = NULL; // assign NULL to defuse the additional delete in on.exit() call
351
+// 	FreeDoubleMatrix(multiD, *N);
342 352
 }
343 353
 } // extern C
344 354
 
355
+
356
+// =======================================================
357
+// This function make a cleanup if anything was left over
358
+// =======================================================
359
+extern "C" {
360
+void R_univariate_cleanup()
361
+{
362
+// 	//FILE_LOG(logDEBUG2) << __PRETTY_FUNCTION__; // This message will be shown if interrupt happens before start of C-code
363
+	delete hmm;
364
+}
365
+}
366
+
367
+extern "C" {
368
+void R_multivariate_cleanup(int* N)
369
+{
370
+	delete hmm;
371
+	FreeDoubleMatrix(multiD, *N);
372
+}
373
+}
374
+
Browse code

improved error and warning handling

chakalakka authored on 06/12/2014 11:14:36
Showing1 changed files
... ...
@@ -13,7 +13,7 @@ void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, d
13 13
 	// Define logging level
14 14
 // 	FILE* pFile = fopen("chromStar.log", "w");
15 15
 // 	Output2FILE::Stream() = pFile;
16
- 	FILELog::ReportingLevel() = FILELog::FromString("ERROR");
16
+ 	FILELog::ReportingLevel() = FILELog::FromString("NONE");
17 17
 //  	FILELog::ReportingLevel() = FILELog::FromString("DEBUG2");
18 18
 
19 19
 // 	// Parallelization settings
Browse code

bivariate analysis added

chakalakka authored on 29/10/2014 15:34:33
Showing1 changed files
... ...
@@ -7,7 +7,7 @@
7 7
 // This function takes parameters from R, creates a univariate HMM object, creates the distributions, runs the Baum-Welch and returns the result to R.
8 8
 // ===================================================================================================================================================
9 9
 extern "C" {
10
-void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, double* lambda, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, int* iniproc, double* initial_size, double* initial_prob, double* initial_lambda, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff)
10
+void R_univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, double* lambda, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, int* iniproc, double* initial_size, double* initial_prob, double* initial_lambda, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff)
11 11
 {
12 12
 
13 13
 	// Define logging level
... ...
@@ -155,7 +155,7 @@ void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, double
155 155
 		{
156 156
 			posterior_per_t[iN] = hmm->get_posterior(iN, t);
157 157
 		}
158
-		states[t] = argMax(posterior_per_t, *N);
158
+		states[t] = state_labels[argMax(posterior_per_t, *N)];
159 159
 	}
160 160
 
161 161
 	FILE_LOG(logDEBUG1) << "Return parameters";
... ...
@@ -210,3 +210,135 @@ void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, double
210 210
 }
211 211
 } // extern C
212 212
 
213
+
214
+// =====================================================================================================================================================
215
+// This function takes parameters from R, creates a multivariate HMM object, runs the Baum-Welch and returns the result to R.
216
+// =====================================================================================================================================================
217
+extern "C" {
218
+void R_multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error)
219
+{
220
+
221
+	// Define logging level {"ERROR", "WARNING", "INFO", "ITERATION", "DEBUG", "DEBUG1", "DEBUG2", "DEBUG3", "DEBUG4"}
222
+// 	FILE* pFile = fopen("chromStar.log", "w");
223
+// 	Output2FILE::Stream() = pFile;
224
+//  	FILELog::ReportingLevel() = FILELog::FromString("ITERATION");
225
+//  	FILELog::ReportingLevel() = FILELog::FromString("DEBUG2");
226
+ 	FILELog::ReportingLevel() = FILELog::FromString("ERROR");
227
+
228
+	// Parallelization settings
229
+// 	omp_set_num_threads(*num_threads);
230
+
231
+	// Print some information
232
+	FILE_LOG(logINFO) << "number of states = " << *N;
233
+	Rprintf("number of states = %d\n", *N);
234
+	FILE_LOG(logINFO) << "number of bins = " << *T;
235
+	Rprintf("number of bins = %d\n", *T);
236
+	if (*maxiter < 0)
237
+	{
238
+		FILE_LOG(logINFO) << "maximum number of iterations = none";
239
+		Rprintf("maximum number of iterations = none\n");
240
+	} else {
241
+		FILE_LOG(logINFO) << "maximum number of iterations = " << *maxiter;
242
+		Rprintf("maximum number of iterations = %d\n", *maxiter);
243
+	}
244
+	if (*maxtime < 0)
245
+	{
246
+		FILE_LOG(logINFO) << "maximum running time = none";
247
+		Rprintf("maximum running time = none\n");
248
+	} else {
249
+		FILE_LOG(logINFO) << "maximum running time = " << *maxtime << " sec";
250
+		Rprintf("maximum running time = %d sec\n", *maxtime);
251
+	}
252
+	FILE_LOG(logINFO) << "epsilon = " << *eps;
253
+	Rprintf("epsilon = %g\n", *eps);
254
+	FILE_LOG(logINFO) << "number of modifications = " << *Nmod;
255
+	Rprintf("number of modifications = %d\n", *Nmod);
256
+
257
+	// Flush Rprintf statements to console
258
+	R_FlushConsole();
259
+
260
+	// Recode the densities vector to matrix representation
261
+// 	clock_t clocktime = clock(), dtime;
262
+	double** multiD = allocDoubleMatrix(*N, *T);
263
+	for (int iN=0; iN<*N; iN++)
264
+	{
265
+		for (int t=0; t<*T; t++)
266
+		{
267
+			multiD[iN][t] = D[iN*(*T)+t];
268
+		}
269
+	}
270
+// 	dtime = clock() - clocktime;
271
+// 	FILE_LOG(logDEBUG1) << "recoding densities vector to matrix representation: " << dtime << " clicks";
272
+
273
+	// Create the HMM
274
+	FILE_LOG(logDEBUG1) << "Creating the multivariate HMM";
275
+	ScaleHMM* hmm = new ScaleHMM(*T, *N, *Nmod, multiD);
276
+	// Initialize the transition probabilities and proba
277
+	hmm->initialize_transition_probs(initial_A, *use_initial_params);
278
+	hmm->initialize_proba(initial_proba, *use_initial_params);
279
+	
280
+	// Print logproba and A
281
+// 	for (int iN=0; iN<*N; iN++)
282
+// 	{
283
+// 		FILE_LOG(logDEBUG) << "proba["<<iN<<"] = " <<exp(hmm->logproba[iN]);
284
+// 		for (int jN=0; jN<*N; jN++)
285
+// 		{
286
+// 			FILE_LOG(logDEBUG) << "A["<<iN<<"]["<<jN<<"] = " << hmm->A[iN][jN];
287
+// 		}
288
+// 	}
289
+
290
+	// Estimate the parameters
291
+	FILE_LOG(logDEBUG1) << "Starting Baum-Welch estimation";
292
+	try
293
+	{
294
+		hmm->baumWelch(maxiter, maxtime, eps);
295
+	}
296
+	catch (std::exception& e)
297
+	{
298
+		FILE_LOG(logERROR) << "Error in Baum-Welch: " << e.what();
299
+		Rprintf("Error in Baum-Welch: %s\n", e.what());
300
+		if (e.what()=="nan detected") { *error = 1; }
301
+		else { *error = 2; }
302
+	}
303
+	FILE_LOG(logDEBUG1) << "Finished with Baum-Welch estimation";
304
+	
305
+// 	// Compute the posteriors and save results directly to the R pointer
306
+// 	FILE_LOG(logDEBUG1) << "Recode posteriors into column representation";
307
+// 	for (int iN=0; iN<*N; iN++)
308
+// 	{
309
+// 		for (int t=0; t<*T; t++)
310
+// 		{
311
+// 			posteriors[t + iN * (*T)] = hmm->get_posterior(iN, t);
312
+// 		}
313
+// 	}
314
+
315
+	// Compute the states from posteriors
316
+	FILE_LOG(logDEBUG1) << "Computing states from posteriors";
317
+	double posterior_per_t [*N];
318
+	for (int t=0; t<*T; t++)
319
+	{
320
+		for (int iN=0; iN<*N; iN++)
321
+		{
322
+			posterior_per_t[iN] = hmm->get_posterior(iN, t);
323
+		}
324
+		states[t] = comb_states[argMax(posterior_per_t, *N)];
325
+	}
326
+	
327
+	FILE_LOG(logDEBUG1) << "Return parameters";
328
+	// also return the estimated transition matrix and the initial probs
329
+	for (int i=0; i<*N; i++)
330
+	{
331
+		proba[i] = hmm->get_proba(i);
332
+		for (int j=0; j<*N; j++)
333
+		{
334
+				A[i * (*N) + j] = hmm->get_A(j,i);
335
+		}
336
+	}
337
+	*loglik = hmm->get_logP();
338
+
339
+	FILE_LOG(logDEBUG1) << "Deleting the hmm";
340
+	delete hmm;
341
+	freeDoubleMatrix(multiD, *N);
342
+}
343
+} // extern C
344
+
Browse code

read.cutoff changed, introduced several distributions (not used), windows compatible

chakalakka authored on 20/10/2014 07:10:07
Showing1 changed files
... ...
@@ -1,13 +1,13 @@
1 1
 #include "utility.h"
2 2
 #include "scalehmm.h"
3 3
 #include "loghmm.h"
4
-#include <omp.h> // parallelization options
4
+// #include <omp.h> // parallelization options
5 5
 
6 6
 // ===================================================================================================================================================
7 7
 // This function takes parameters from R, creates a univariate HMM object, creates the distributions, runs the Baum-Welch and returns the result to R.
8 8
 // ===================================================================================================================================================
9 9
 extern "C" {
10
-void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, int* iniproc, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff)
10
+void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, double* lambda, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, int* iniproc, double* initial_size, double* initial_prob, double* initial_lambda, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff)
11 11
 {
12 12
 
13 13
 	// Define logging level
... ...
@@ -16,8 +16,8 @@ void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* m
16 16
  	FILELog::ReportingLevel() = FILELog::FromString("ERROR");
17 17
 //  	FILELog::ReportingLevel() = FILELog::FromString("DEBUG2");
18 18
 
19
-	// Parallelization settings
20
-	omp_set_num_threads(*num_threads);
19
+// 	// Parallelization settings
20
+// 	omp_set_num_threads(*num_threads);
21 21
 
22 22
 	// Print some information
23 23
 	FILE_LOG(logINFO) << "number of states = " << *N;
... ...
@@ -75,43 +75,7 @@ void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* m
75 75
 	FILE_LOG(logINFO) << "data mean = " << mean << ", data variance = " << variance;		
76 76
 	Rprintf("data mean = %g, data variance = %g\n", mean, variance);		
77 77
 	
78
-	// Go through all states of the hmm and assign the density functions
79
-	// This loop assumes that the negative binomial states come last and are consecutive
80
-	double imean, ivariance;
81
-	for (int i_state=0; i_state<*N; i_state++)
82
-	{
83
-		if (*use_initial_params) {
84
-			FILE_LOG(logINFO) << "Using given parameters for size and prob";
85
-			Rprintf("Using given parameters for size and prob\n");
86
-			imean = (1-initial_prob[i_state])*initial_size[i_state] / initial_prob[i_state];
87
-			ivariance = imean / initial_prob[i_state];
88
-			FILE_LOG(logDEBUG2) << "imean = " << imean;
89
-			FILE_LOG(logDEBUG2) << "ivariance = " << ivariance;
90
-		} else {
91
-
92
-			if (*iniproc == 1)
93
-			{
94
-				// Simple initialization based on data mean, assumed to be the disomic mean
95
-				if (distr_type[i_state] == 1) { }
96
-				else if (distr_type[i_state] == 2) { }
97
-				else if (distr_type[i_state] == 3)
98
-				{
99
-					for (int ii_state=i_state; ii_state<*N; ii_state++)
100
-					{
101
-						imean = mean/2 * (ii_state-i_state+1);
102
-						ivariance = imean * 5;
103
-// 						ivariance = variance/2 * (i_state-1);
104
-						// Calculate r and p from mean and variance
105
-						initial_size[ii_state] = pow(imean,2)/(ivariance-imean);
106
-						initial_prob[ii_state] = imean/ivariance;
107
-					}
108
-					break;
109
-				}
110
-			}
111
-
112
-		}
113
-	}
114
-
78
+	// Create the emission densities and initialize
115 79
 	for (int i_state=0; i_state<*N; i_state++)
116 80
 	{
117 81
 		if (distr_type[i_state] == 1)
... ...
@@ -123,7 +87,7 @@ void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* m
123 87
 		else if (distr_type[i_state] == 2)
124 88
 		{
125 89
 			FILE_LOG(logDEBUG1) << "Using geometric distribution for state " << i_state;
126
-			Geometric *d = new Geometric(O, *T, 0.9); // delete is done inside ~ScaleHMM()
90
+			Geometric *d = new Geometric(O, *T, initial_prob[i_state]); // delete is done inside ~ScaleHMM()
127 91
 			hmm->densityFunctions.push_back(d);
128 92
 		}
129 93
 		else if (distr_type[i_state] == 3)
... ...
@@ -132,6 +96,18 @@ void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* m
132 96
 			NegativeBinomial *d = new NegativeBinomial(O, *T, initial_size[i_state], initial_prob[i_state]); // delete is done inside ~ScaleHMM()
133 97
 			hmm->densityFunctions.push_back(d);
134 98
 		}
99
+		else if (distr_type[i_state] == 4)
100
+		{
101
+			FILE_LOG(logDEBUG1) << "Using poisson for state " << i_state;
102
+			Poisson *d = new Poisson(O, *T, initial_lambda[i_state]); // delete is done inside ~ScaleHMM()
103
+			hmm->densityFunctions.push_back(d);
104
+		}
105
+		else if (distr_type[i_state] == 5)
106
+		{
107
+			FILE_LOG(logDEBUG1) << "Using binomial for state " << i_state;
108
+			NegativeBinomial *d = new NegativeBinomial(O, *T, initial_size[i_state], initial_prob[i_state]); // delete is done inside ~ScaleHMM()
109
+			hmm->densityFunctions.push_back(d);
110
+		}
135 111
 		else
136 112
 		{
137 113
 			FILE_LOG(logWARNING) << "Density not specified, using default negative binomial for state " << i_state;
... ...
@@ -189,7 +165,7 @@ void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* m
189 165
 		proba[i] = hmm->get_proba(i);
190 166
 		for (int j=0; j<*N; j++)
191 167
 		{
192
-			A[i * (*N) + j] = hmm->get_A(i,j);
168
+			A[i * (*N) + j] = hmm->get_A(j,i);
193 169
 		}
194 170
 	}
195 171
 
... ...
@@ -214,6 +190,17 @@ void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* m
214 190
 			size[i] = 0;
215 191
 			prob[i] = 1;
216 192
 		}
193
+		else if (hmm->densityFunctions[i]->get_name() == POISSON)
194
+		{
195
+			Poisson* d = (Poisson*)(hmm->densityFunctions[i]);
196
+			lambda[i] = d->get_lambda();
197
+		}
198
+		else if (hmm->densityFunctions[i]->get_name() == BINOMIAL) 
199
+		{
200
+			Binomial* d = (Binomial*)(hmm->densityFunctions[i]);
201
+			size[i] = d->get_size();
202
+			prob[i] = d->get_prob();
203
+		}
217 204
 	}
218 205
 	*loglik = hmm->get_logP();
219 206
 	hmm->calc_weights(weights);
Browse code

update_constrained now correct

chakalakka authored on 02/10/2014 09:24:28
Showing1 changed files
... ...
@@ -14,7 +14,7 @@ void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* m
14 14
 // 	FILE* pFile = fopen("chromStar.log", "w");
15 15
 // 	Output2FILE::Stream() = pFile;
16 16
  	FILELog::ReportingLevel() = FILELog::FromString("ERROR");
17
-//  	FILELog::ReportingLevel() = FILELog::FromString("DEBUG1");
17
+//  	FILELog::ReportingLevel() = FILELog::FromString("DEBUG2");
18 18
 
19 19
 	// Parallelization settings
20 20
 	omp_set_num_threads(*num_threads);
Browse code

corrected update, still problems with convergence

chakalakka authored on 01/10/2014 07:49:16
Showing1 changed files
... ...
@@ -7,14 +7,14 @@
7 7
 // This function takes parameters from R, creates a univariate HMM object, creates the distributions, runs the Baum-Welch and returns the result to R.
8 8
 // ===================================================================================================================================================
9 9
 extern "C" {
10
-void R_univariate_hmm(int* O, int* T, int* N, int* statelabels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* iniproc, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff)
10
+void R_univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, int* iniproc, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff)
11 11
 {
12 12
 
13 13
 	// Define logging level
14 14
 // 	FILE* pFile = fopen("chromStar.log", "w");
15 15
 // 	Output2FILE::Stream() = pFile;
16 16
  	FILELog::ReportingLevel() = FILELog::FromString("ERROR");
17
-//  	FILELog::ReportingLevel() = FILELog::FromString("DEBUG2");
17
+//  	FILELog::ReportingLevel() = FILELog::FromString("DEBUG1");
18 18
 
19 19
 	// Parallelization settings
20 20
 	omp_set_num_threads(*num_threads);
... ...
@@ -76,6 +76,7 @@ void R_univariate_hmm(int* O, int* T, int* N, int* statelabels, double* size, do
76 76
 	Rprintf("data mean = %g, data variance = %g\n", mean, variance);		
77 77
 	
78 78
 	// Go through all states of the hmm and assign the density functions
79
+	// This loop assumes that the negative binomial states come last and are consecutive
79 80
 	double imean, ivariance;
80 81
 	for (int i_state=0; i_state<*N; i_state++)
81 82
 	{
... ...
@@ -91,29 +92,41 @@ void R_univariate_hmm(int* O, int* T, int* N, int* statelabels, double* size, do
91 92
 			if (*iniproc == 1)
92 93
 			{
93 94
 				// Simple initialization based on data mean, assumed to be the disomic mean
94
-				imean = mean/2 * statelabels[i_state];
95
-				ivariance = variance/2 * statelabels[i_state];
95
+				if (distr_type[i_state] == 1) { }
96
+				else if (distr_type[i_state] == 2) { }
97
+				else if (distr_type[i_state] == 3)
98
+				{
99
+					for (int ii_state=i_state; ii_state<*N; ii_state++)
100
+					{
101
+						imean = mean/2 * (ii_state-i_state+1);
102
+						ivariance = imean * 5;
103
+// 						ivariance = variance/2 * (i_state-1);
104
+						// Calculate r and p from mean and variance
105
+						initial_size[ii_state] = pow(imean,2)/(ivariance-imean);
106
+						initial_prob[ii_state] = imean/ivariance;
107
+					}
108
+					break;
109
+				}
96 110
 			}
97 111
 
98
-			// Calculate r and p from mean and variance
99
-			initial_size[i_state] = pow(imean,2)/(ivariance-imean);
100
-			initial_prob[i_state] = imean/ivariance;
101
-
102 112
 		}
113
+	}
103 114
 
104
-		if (i_state == 0)
115
+	for (int i_state=0; i_state<*N; i_state++)
116
+	{
117
+		if (distr_type[i_state] == 1)
105 118
 		{
106
-			FILE_LOG(logDEBUG1) << "Using only zeros for state " << i_state;
119
+			FILE_LOG(logDEBUG1) << "Using delta distribution for state " << i_state;
107 120
 			ZeroInflation *d = new ZeroInflation(O, *T); // delete is done inside ~ScaleHMM()
108 121
 			hmm->densityFunctions.push_back(d);
109 122
 		}
110
-		else if (i_state == 1)
123
+		else if (distr_type[i_state] == 2)
111 124
 		{
112 125
 			FILE_LOG(logDEBUG1) << "Using geometric distribution for state " << i_state;
113 126
 			Geometric *d = new Geometric(O, *T, 0.9); // delete is done inside ~ScaleHMM()
114 127
 			hmm->densityFunctions.push_back(d);
115 128
 		}
116
-		else if (i_state >= 2)
129
+		else if (distr_type[i_state] == 3)
117 130
 		{
118 131
 			FILE_LOG(logDEBUG1) << "Using negative binomial for state " << i_state;
119 132
 			NegativeBinomial *d = new NegativeBinomial(O, *T, initial_size[i_state], initial_prob[i_state]); // delete is done inside ~ScaleHMM()
... ...
@@ -166,7 +179,7 @@ void R_univariate_hmm(int* O, int* T, int* N, int* statelabels, double* size, do
166 179
 		{
167 180
 			posterior_per_t[iN] = hmm->get_posterior(iN, t);
168 181
 		}
169
-		states[t] = statelabels[argMax(posterior_per_t, *N)];
182
+		states[t] = argMax(posterior_per_t, *N);
170 183
 	}
171 184
 
172 185
 	FILE_LOG(logDEBUG1) << "Return parameters";
Browse code

analysis and plotting improved

chakalakka authored on 19/09/2014 09:42:08
Showing1 changed files
... ...
@@ -1,5 +1,6 @@
1 1
 #include "utility.h"
2 2
 #include "scalehmm.h"
3
+#include "loghmm.h"
3 4
 #include <omp.h> // parallelization options
4 5
 
5 6
 // ===================================================================================================================================================
... ...
@@ -13,7 +14,7 @@ void R_univariate_hmm(int* O, int* T, int* N, int* statelabels, double* size, do
13 14
 // 	FILE* pFile = fopen("chromStar.log", "w");
14 15
 // 	Output2FILE::Stream() = pFile;
15 16
  	FILELog::ReportingLevel() = FILELog::FromString("ERROR");
16
-//  	FILELog::ReportingLevel() = FILELog::FromString("DEBUG1");
17
+//  	FILELog::ReportingLevel() = FILELog::FromString("DEBUG2");
17 18
 
18 19
 	// Parallelization settings
19 20
 	omp_set_num_threads(*num_threads);
... ...
@@ -53,6 +54,7 @@ void R_univariate_hmm(int* O, int* T, int* N, int* statelabels, double* size, do
53 54
 	// Create the HMM
54 55
 	FILE_LOG(logDEBUG1) << "Creating a univariate HMM";
55 56
 	ScaleHMM* hmm = new ScaleHMM(*T, *N);
57
+// 	LogHMM* hmm = new LogHMM(*T, *N);
56 58
 	hmm->set_cutoff(*read_cutoff);
57 59
 	// Initialize the transition probabilities and proba
58 60
 	hmm->initialize_transition_probs(initial_A, *use_initial_params);
... ...
@@ -99,16 +101,22 @@ void R_univariate_hmm(int* O, int* T, int* N, int* statelabels, double* size, do
99 101
 
100 102
 		}
101 103
 
102
-		if (i_state >= 1)
104
+		if (i_state == 0)
103 105
 		{
104
-			FILE_LOG(logDEBUG1) << "Using negative binomial for state " << i_state;
105
-			NegativeBinomial *d = new NegativeBinomial(O, *T, initial_size[i_state], initial_prob[i_state]); // delete is done inside ~ScaleHMM()
106
+			FILE_LOG(logDEBUG1) << "Using only zeros for state " << i_state;
107
+			ZeroInflation *d = new ZeroInflation(O, *T); // delete is done inside ~ScaleHMM()
106 108
 			hmm->densityFunctions.push_back(d);
107 109
 		}
108
-		else if (i_state == 0)
110
+		else if (i_state == 1)
109 111
 		{
110
-			FILE_LOG(logDEBUG1) << "Using only zeros for state " << i_state;
111
-			ZeroInflation *d = new ZeroInflation(O, *T); // delete is done inside ~ScaleHMM()
112
+			FILE_LOG(logDEBUG1) << "Using geometric distribution for state " << i_state;
113
+			Geometric *d = new Geometric(O, *T, 0.9); // delete is done inside ~ScaleHMM()
114
+			hmm->densityFunctions.push_back(d);
115
+		}
116
+		else if (i_state >= 2)
117
+		{
118
+			FILE_LOG(logDEBUG1) << "Using negative binomial for state " << i_state;
119
+			NegativeBinomial *d = new NegativeBinomial(O, *T, initial_size[i_state], initial_prob[i_state]); // delete is done inside ~ScaleHMM()
112 120
 			hmm->densityFunctions.push_back(d);
113 121
 		}
114 122
 		else
... ...
@@ -181,6 +189,12 @@ void R_univariate_hmm(int* O, int* T, int* N, int* statelabels, double* size, do
181 189
 			size[i] = d->get_size();
182 190
 			prob[i] = d->get_prob();
183 191
 		}
192
+		else if (hmm->densityFunctions[i]->get_name() == GEOMETRIC)
193
+		{
194
+			Geometric* d = (Geometric*)(hmm->densityFunctions[i]);
195
+			size[i] = 0;
196
+			prob[i] = d->get_prob();
197
+		}
184 198
 		else if (hmm->densityFunctions[i]->get_name() == ZERO_INFLATION)
185 199
 		{
186 200
 			// These values for a Negative Binomial define a zero-inflation (delta distribution)
Browse code

binned.data is now GRanges, reduced memory consumption, posteriors are not returned

chakalakka authored on 09/07/2014 15:24:19
Showing1 changed files
... ...
@@ -6,7 +6,7 @@
6 6
 // This function takes parameters from R, creates a univariate HMM object, creates the distributions, runs the Baum-Welch and returns the result to R.
7 7
 // ===================================================================================================================================================
8 8
 extern "C" {
9
-void R_univariate_hmm(int* O, int* T, int* N, int* states, double* size, double* prob, int* maxiter, int* maxtime, double* eps, double* posteriors, double* A, double* proba, double* loglik, double* weights, int* iniproc, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff)
9
+void R_univariate_hmm(int* O, int* T, int* N, int* statelabels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* weights, int* iniproc, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff)
10 10
 {
11 11
 
12 12
 	// Define logging level
... ...
@@ -89,8 +89,8 @@ void R_univariate_hmm(int* O, int* T, int* N, int* states, double* size, double*
89 89
 			if (*iniproc == 1)
90 90
 			{
91 91
 				// Simple initialization based on data mean, assumed to be the disomic mean
92
-				imean = mean/2 * states[i_state];
93
-				ivariance = variance/2 * states[i_state];
92
+				imean = mean/2 * statelabels[i_state];
93
+				ivariance = variance/2 * statelabels[i_state];
94 94
 			}
95 95
 
96 96
 			// Calculate r and p from mean and variance
... ...
@@ -137,15 +137,28 @@ void R_univariate_hmm(int* O, int* T, int* N, int* states, double* size, double*
137 137
 	}
138 138
 		
139 139
 	FILE_LOG(logDEBUG1) << "Finished with Baum-Welch estimation";
140
-	// Compute the posteriors and save results directly to the R pointer
141
-	FILE_LOG(logDEBUG1) << "Recode posteriors into column representation";
142
-	#pragma omp parallel for
143
-	for (int iN=0; iN<*N; iN++)
140
+
141
+// 	// Compute the posteriors and save results directly to the R pointer
142
+// 	FILE_LOG(logDEBUG1) << "Recode posteriors into column representation";
143
+// 	#pragma omp parallel for
144