Browse code

added support for up to 53 experiments in multivariate

chakalakka authored on 09/06/2016 13:55:17
Showing 7 changed files

... ...
@@ -269,6 +269,20 @@ Chromstar <- function(inputfolder, experiment.table, outputfolder, configfile=NU
269 269
     } else {
270 270
         combined.model <- loadHmmsFromFiles(savename, check.class=class.combined.multivariate.hmm)[[1]]
271 271
     }
272
+    ## Plot correlations
273
+    ptm <- startTimedMessage("Plotting read count correlation ...")
274
+    char.per.cm <- 10
275
+    legend.cm <- 3
276
+    savename <- file.path(plotpath, 'read-count-correlation.pdf')
277
+    ggplt <- plotMultivariateCorrelation(combined.model, cluster=FALSE)
278
+    width <- length(combined.model$info$ID) + max(sapply(combined.model$info$ID, nchar)) / char.per.cm + legend.cm
279
+    height <- length(combined.model$info$ID) + max(sapply(combined.model$info$ID, nchar)) / char.per.cm
280
+    ggsave(savename, plot=ggplt, width=width, height=height, limitsize=FALSE, units='cm')
281
+    savename <- file.path(plotpath, 'read-count-correlation-clustered.pdf')
282
+    ggplt <- plotMultivariateCorrelation(combined.model, cluster=TRUE)
283
+    width <- length(combined.model$info$ID) + max(sapply(combined.model$info$ID, nchar)) / char.per.cm + legend.cm
284
+    height <- length(combined.model$info$ID) + max(sapply(combined.model$info$ID, nchar)) / char.per.cm
285
+    ggsave(savename, plot=ggplt, width=width, height=height, limitsize=FALSE, units='cm')
272 286
   
273 287
     #-------------------------
274 288
     ## Export browser files ##
... ...
@@ -297,17 +311,6 @@ Chromstar <- function(inputfolder, experiment.table, outputfolder, configfile=NU
297 311
         ptm <- startTimedMessage("Making plots ...")
298 312
         char.per.cm <- 10
299 313
         legend.cm <- 3
300
-        savename1 <- paste0(savename, '_correlation.pdf')
301
-        ggplt <- suppressMessages( graphics::plot(multimodel, type='correlation', cluster=FALSE) )
302
-        width <- length(multimodel$info$ID) + max(sapply(multimodel$info$ID, nchar)) / char.per.cm + legend.cm
303
-        height <- length(multimodel$info$ID) + max(sapply(multimodel$info$ID, nchar)) / char.per.cm
304
-        ggsave(savename1, plot=ggplt, width=width, height=height, limitsize=FALSE, units='cm')
305
-
306
-        savename1.1 <- paste0(savename, '_correlation-clustered.pdf')
307
-        ggplt <- suppressMessages( graphics::plot(multimodel, type='correlation', cluster=TRUE) )
308
-        width <- length(multimodel$info$ID) + max(sapply(multimodel$info$ID, nchar)) / char.per.cm + legend.cm
309
-        height <- length(multimodel$info$ID) + max(sapply(multimodel$info$ID, nchar)) / char.per.cm
310
-        ggsave(savename1.1, plot=ggplt, width=width, height=height, limitsize=FALSE, units='cm')
311 314
 
312 315
         savename2 <- paste0(savename, '_transitionMatrix.pdf')
313 316
         ggplt <- suppressMessages( graphics::plot(multimodel, type='transitionMatrix') )
... ...
@@ -232,7 +232,7 @@ runMultivariate <- function(bins, info, comb.states, use.states, distributions,
232 232
         num.bins = as.integer(length(bins)), # int* T
233 233
         max.states = as.integer(length(comb.states)), # int* N
234 234
         num.modifications = as.integer(ncol(bins$counts)), # int* Nmod
235
-        comb.states = as.integer(comb.states), # int* comb_states
235
+        comb.states = as.numeric(comb.states), # double* comb_states
236 236
         size = as.double(rs), # double* size
237 237
         prob = as.double(ps), # double* prob
238 238
         w = as.double(ws), # double* w
... ...
@@ -359,6 +359,9 @@ runMultivariate <- function(bins, info, comb.states, use.states, distributions,
359 359
 prepareMultivariate = function(hmms, use.states=NULL, max.states=NULL, chromosomes=NULL) {
360 360
 
361 361
     nummod <- length(hmms)
362
+    if (nummod > 53) {
363
+        stop("We can't handle more than 53 'hmms' Please decrease the number of input 'hmms'.")
364
+    }
362 365
 
363 366
     ## Load first HMM for coordinates
364 367
     ptm <- startTimedMessage("Getting coordinates ...")
... ...
@@ -410,7 +413,7 @@ prepareMultivariate = function(hmms, use.states=NULL, max.states=NULL, chromosom
410 413
     remove(hmm)
411 414
 
412 415
     ## Transform binary to decimal
413
-    ptm <- startTimedMessage("Getting combinatorial states")
416
+    ptm <- startTimedMessage("Getting combinatorial states ...")
414 417
     decimal_states <- rep(0,length(bins))
415 418
     for (imod in 1:nummod) {
416 419
         decimal_states <- decimal_states + 2^(nummod-imod) * bins$binary_statesmatrix[,imod]
... ...
@@ -431,7 +434,7 @@ prepareMultivariate = function(hmms, use.states=NULL, max.states=NULL, chromosom
431 434
     stopTimedMessage(ptm)
432 435
 
433 436
     ## We pre-compute the z-values for each number of counts
434
-    ptm <- startTimedMessage("Computing pre z-matrix...")
437
+    ptm <- startTimedMessage("Computing pre z-matrix ...")
435 438
     z.per.read <- array(NA, dim=c(maxcounts+1, nummod, 2))
436 439
     xcounts = 0:maxcounts
437 440
     for (imod in 1:nummod) {
... ...
@@ -463,7 +466,7 @@ prepareMultivariate = function(hmms, use.states=NULL, max.states=NULL, chromosom
463 466
     stopTimedMessage(ptm)
464 467
 
465 468
     ## Compute the z matrix
466
-    ptm <- startTimedMessage("Transfering values into z-matrix...")
469
+    ptm <- startTimedMessage("Transfering values into z-matrix ...")
467 470
     z.per.bin = array(NA, dim=c(length(bins), nummod, 2), dimnames=list(bin=1:length(bins), track=info$ID, state.labels[2:3]))
468 471
     for (imod in 1:nummod) {
469 472
         for (i1 in 1:2) {
... ...
@@ -476,18 +479,30 @@ prepareMultivariate = function(hmms, use.states=NULL, max.states=NULL, chromosom
476 479
     stopTimedMessage(ptm)
477 480
 
478 481
     ### Calculate correlation matrix
479
-    ptm <- startTimedMessage("Computing inverse of correlation matrix...")
482
+    ptm <- startTimedMessage("Computing inverse of correlation matrix ...")
480 483
     correlationMatrix = array(NA, dim=c(nummod,nummod,length(comb.states)), dimnames=list(track=info$ID, track=info$ID, comb.state=comb.states))
481 484
     correlationMatrixInverse = array(NA, dim=c(nummod,nummod,length(comb.states)), dimnames=list(track=info$ID, track=info$ID, comb.state=comb.states))
482 485
     determinant = rep(NA, length(comb.states))
483 486
     names(determinant) <- comb.states
484 487
 
488
+    numericToBin <- function(x, num.digits) {
489
+        bin <- logical(length=num.digits)
490
+        xi <- x
491
+        i1 <- num.digits
492
+        while (xi > 0) {
493
+            bin[i1] <- xi %% 2
494
+            xi <- xi %/% 2
495
+            i1 <- i1 - 1
496
+        }
497
+        return(bin)
498
+    }
499
+    
485 500
     ## Calculate correlation matrix serial
486 501
     for (state in comb.states) {
487 502
         istate = which(comb.states==state)
488
-        mask = which(bins$state==as.integer(state))
503
+        mask = which(bins$state==as.numeric(state))
489 504
         # Convert state to binary representation
490
-        binary_state = rev(as.integer(intToBits(as.integer(state)))[1:nummod])
505
+        binary_state <- numericToBin(as.numeric(state), nummod)
491 506
         # Subselect z
492 507
         z.temp <- matrix(NA, ncol=nummod, nrow=length(mask))
493 508
         for (i1 in 1:length(binary_state)) {
... ...
@@ -79,11 +79,13 @@ combineMultivariates <- function(hmms, mode) {
79 79
         hmm <- suppressMessages( loadHmmsFromFiles(hmms[[1]], check.class=class.multivariate.hmm)[[1]] )
80 80
         bins <- hmm$bins
81 81
         mcols(bins) <- NULL
82
-        ## Add combinatorial states and posteriors
82
+        ## Add combinatorial states, counts and posteriors
83 83
         infos <- list()
84 84
         infos[[1]] <- hmm$info
85 85
         combs <- list()
86 86
         combs[[1]] <- hmm$bins$combination
87
+        counts <- list()
88
+        counts[[1]] <- hmm$bins$counts
87 89
         posteriors <- list()
88 90
         posteriors[[1]] <- hmm$bins$posteriors
89 91
         binstates <- list()
... ...
@@ -96,11 +98,13 @@ combineMultivariates <- function(hmms, mode) {
96 98
                 hmm <- suppressMessages( loadHmmsFromFiles(hmms[[i1]], check.class=class.multivariate.hmm)[[1]] )
97 99
                 infos[[i1]] <- hmm$info
98 100
                 combs[[i1]] <- hmm$bins$combination
101
+                counts[[i1]] <- hmm$bins$counts
99 102
                 posteriors[[i1]] <- hmm$bins$posteriors
100 103
                 binstates[[i1]] <- dec2bin(hmm$bins$state, colnames=hmm$info$ID)
101 104
                 stopTimedMessage(ptm)
102 105
             }
103 106
         }
107
+        counts <- do.call(cbind, counts)
104 108
         posteriors <- do.call(cbind, posteriors)
105 109
         infos <- do.call(rbind, infos)
106 110
         conditions <- unique(infos$condition)
... ...
@@ -112,16 +116,19 @@ combineMultivariates <- function(hmms, mode) {
112 116
     } else if (mode == 'condition') {
113 117
         ### Get posteriors and binary states
114 118
         infos <- list()
119
+        counts <- list()
115 120
         posteriors <- list()
116 121
         binstates <- list()
117 122
         for (i1 in 1:length(hmms)) {
118 123
             ptm <- startTimedMessage("Processing HMM ",i1," ...")
119 124
             hmm <- suppressMessages( loadHmmsFromFiles(hmms[[i1]], check.class=class.multivariate.hmm)[[1]] )
120 125
             infos[[i1]] <- hmm$info
126
+            counts[[i1]] <- hmm$bins$counts
121 127
             posteriors[[i1]] <- hmm$bins$posteriors
122 128
             binstates[[i1]] <- dec2bin(hmm$bins$state, colnames=hmm$info$ID)
123 129
             stopTimedMessage(ptm)
124 130
         }
131
+        counts <- do.call(cbind, counts)
125 132
         posteriors <- do.call(cbind, posteriors)
126 133
         infos <- do.call(rbind, infos)
127 134
         conditions <- unique(infos$condition)
... ...
@@ -157,6 +164,7 @@ combineMultivariates <- function(hmms, mode) {
157 164
         mcols(bins) <- NULL
158 165
         infos <- hmm$info
159 166
         conditions <- unique(infos$condition)
167
+        counts <- hmm$bins$counts
160 168
         posteriors <- hmm$bins$posteriors
161 169
         states <- hmm$bins$state
162 170
         combs <- list()
... ...
@@ -177,9 +185,11 @@ combineMultivariates <- function(hmms, mode) {
177 185
         hmm <- suppressMessages( loadHmmsFromFiles(hmms[[1]], check.class=class.multivariate.hmm)[[1]] )
178 186
         bins <- hmm$bins
179 187
         mcols(bins) <- NULL
180
-        ## Add combinatorial states and posteriors
188
+        ## Add combinatorial states, counts and posteriors
181 189
         infos <- list()
182 190
         infos[[1]] <- hmm$info
191
+        counts <- list()
192
+        counts[[1]] <- hmm$bins$counts
183 193
         posteriors <- list()
184 194
         posteriors[[1]] <- hmm$bins$posteriors
185 195
         binstates <- list()
... ...
@@ -191,11 +201,13 @@ combineMultivariates <- function(hmms, mode) {
191 201
                 ptm <- startTimedMessage("Processing mark-condition ",i1," ...")
192 202
                 hmm <- suppressMessages( loadHmmsFromFiles(hmms[[i1]], check.class=class.multivariate.hmm)[[1]] )
193 203
                 infos[[i1]] <- hmm$info
204
+                counts[[i1]] <- hmm$bins$counts
194 205
                 posteriors[[i1]] <- hmm$bins$posteriors
195 206
                 binstates[[i1]] <- dec2bin(hmm$bins$state, colnames=hmm$info$ID)
196 207
                 stopTimedMessage(ptm)
197 208
             }
198 209
         }
210
+        counts <- do.call(cbind, counts)
199 211
         posteriors <- do.call(cbind, posteriors)
200 212
         binstates <- do.call(cbind, binstates)
201 213
         infos <- do.call(rbind, infos)
... ...
@@ -230,7 +242,8 @@ combineMultivariates <- function(hmms, mode) {
230 242
     bins$state <- states
231 243
     mcols(bins)[names(combs.df)] <- combs.df
232 244
     
233
-    ## Transferring posteriors
245
+    ## Transferring counts and posteriors
246
+    bins$counts <- counts
234 247
     bins$posteriors <- posteriors
235 248
 
236 249
     ## Add differential score ##
... ...
@@ -239,7 +252,7 @@ combineMultivariates <- function(hmms, mode) {
239 252
     ### Redo the segmentation for all conditions combined
240 253
     ptm <- startTimedMessage("Redoing segmentation for all conditions combined ...")
241 254
     segments <- suppressMessages( multivariateSegmentation(bins, column2collapseBy='state') )
242
-    names(mcols(segments)) <- setdiff(names(mcols(bins)), 'posteriors')
255
+    names(mcols(segments)) <- setdiff(names(mcols(bins)), c('posteriors','counts'))
243 256
     stopTimedMessage(ptm)
244 257
     
245 258
     ### Redo the segmentation for each condition separately
... ...
@@ -51,7 +51,7 @@ unis2pseudomulti <- function(uni.hmm.list) {
51 51
     weights = lapply(uni.hmm.list,"[[","weights")
52 52
 
53 53
     # Extract the reads
54
-    ptm <- startTimedMessage("Extracting read counts from uni.hmm.list...")
54
+    ptm <- startTimedMessage("Extracting read counts from uni.hmm.list ...")
55 55
     reads = matrix(NA, ncol=nummod, nrow=numbins)
56 56
     colnames(reads) <- info$ID
57 57
     for (imod in 1:nummod) {
... ...
@@ -62,7 +62,7 @@ unis2pseudomulti <- function(uni.hmm.list) {
62 62
     stopTimedMessage(ptm)
63 63
 
64 64
     ## Get combinatorial states
65
-    ptm <- startTimedMessage("Getting combinatorial states")
65
+    ptm <- startTimedMessage("Getting combinatorial states ...")
66 66
     combstates.per.bin = combinatorialStates(uni.hmm.list)
67 67
     comb.states.table = table(combstates.per.bin)
68 68
     comb.states = as.numeric(names(sort(comb.states.table, decreasing=TRUE)))
... ...
@@ -84,7 +84,7 @@ unis2pseudomulti <- function(uni.hmm.list) {
84 84
     stopTimedMessage(ptm)
85 85
     
86 86
     ## Calculate transition matrix
87
-    ptm <- startTimedMessage("Estimating transition matrix...")
87
+    ptm <- startTimedMessage("Estimating transition matrix ...")
88 88
     A.estimated = matrix(0, ncol=2^nummod, nrow=2^nummod)
89 89
     colnames(A.estimated) = 1:2^nummod-1
90 90
     rownames(A.estimated) = 1:2^nummod-1
... ...
@@ -258,7 +258,7 @@ void univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* max
258 258
 // =====================================================================================================================================================
259 259
 // This function takes parameters from R, creates a multivariate HMM object, creates the distributions, runs the Baum-Welch and returns the result to R.
260 260
 // =====================================================================================================================================================
261
-void multivariate_hmm(int* O, int* T, int* N, int *Nmod, int* comb_states, double* size, double* prob, double* w, double* cor_matrix_inv, double* det, int* maxiter, int* maxtime, double* eps, double* posteriors, bool* keep_posteriors, double* densities, bool* keep_densities, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* verbosity)
261
+void multivariate_hmm(int* O, int* T, int* N, int *Nmod, double* comb_states, double* size, double* prob, double* w, double* cor_matrix_inv, double* det, int* maxiter, int* maxtime, double* eps, double* posteriors, bool* keep_posteriors, double* densities, bool* keep_densities, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* verbosity)
262 262
 {
263 263
 
264 264
 	// Define logging level {"ERROR", "WARNING", "INFO", "ITERATION", "DEBUG", "DEBUG1", "DEBUG2", "DEBUG3", "DEBUG4"}
... ...
@@ -334,14 +334,15 @@ void multivariate_hmm(int* O, int* T, int* N, int *Nmod, int* comb_states, doubl
334 334
 
335 335
 	// Prepare the binary_states (univariate) vector: binary_states[N][Nmod], e.g., binary_states[iN][imod] tells me at state comb_states[iN], modification imod is non-enriched (0) or enriched (1)
336 336
 	//FILE_LOG(logDEBUG1) << "Preparing the binary_states vector";
337
+	double res;
337 338
 	bool **binary_states = CallocBoolMatrix(*N, *Nmod);
338
-	for(int iN=0; iN < *N; iN++) //for each comb state considered
339
+	for (int iN=0; iN < *N; iN++) //for each comb state considered
339 340
 	{
340
-		for(int imod=0; imod < *Nmod; imod++) //for each modification of this comb state
341
+		res = comb_states[iN];
342
+		for (int imod=(*Nmod-1); imod >= 0; imod--) //for each modification of this comb state
341 343
 		{
342
-			binary_states[iN][imod] = comb_states[iN]&(int)pow(2,*Nmod-imod-1);//if =0->hidden state comb_states[iN] has modification imod non enriched; if !=0->enriched
343
-			if (binary_states[iN][imod] != 0 )
344
-				binary_states[iN][imod] = 1;
344
+			binary_states[iN][imod] = (bool)fmod(res,2);
345
+			res = (res - (double)binary_states[iN][imod]) / 2.0;
345 346
 		}
346 347
 	}
347 348
 
... ...
@@ -20,7 +20,7 @@ extern "C"
20 20
 void univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* maxiter, int* maxtime, double* eps, double* posteriors, double* densities, bool* keep_densities, double* A, double* proba, double* loglik, double* weights, int* iniproc, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff, int* verbosity);
21 21
 
22 22
 extern "C"
23
-void multivariate_hmm(int* O, int* T, int* N, int *Nmod, int* comb_states, double* size, double* prob, double* w, double* cor_matrix_inv, double* det, int* maxiter, int* maxtime, double* eps, double* posteriors, bool* keep_posteriors, double* densities, bool* keep_densities, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* verbosity);
23
+void multivariate_hmm(int* O, int* T, int* N, int *Nmod, double* comb_states, double* size, double* prob, double* w, double* cor_matrix_inv, double* det, int* maxiter, int* maxtime, double* eps, double* posteriors, bool* keep_posteriors, double* densities, bool* keep_densities, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* verbosity);
24 24
 
25 25
 extern "C"
26 26
 void univariate_cleanup();
... ...
@@ -4,7 +4,7 @@
4 4
 
5 5
 
6 6
 R_NativePrimitiveArgType arg1[] = {INTSXP, INTSXP, INTSXP, REALSXP, REALSXP, INTSXP, INTSXP, REALSXP, REALSXP, REALSXP, LGLSXP, REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, LGLSXP, INTSXP, INTSXP, INTSXP, INTSXP};
7
-R_NativePrimitiveArgType arg2[] = {INTSXP, INTSXP, INTSXP, INTSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, INTSXP, REALSXP, REALSXP, LGLSXP, REALSXP, LGLSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, LGLSXP, INTSXP, INTSXP, INTSXP};
7
+R_NativePrimitiveArgType arg2[] = {INTSXP, INTSXP, INTSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, INTSXP, REALSXP, REALSXP, LGLSXP, REALSXP, LGLSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, LGLSXP, INTSXP, INTSXP, INTSXP};
8 8
 R_NativePrimitiveArgType arg4[] = {INTSXP};
9 9
 
10 10
 static const R_CMethodDef CEntries[]  = {