... | ... |
@@ -269,6 +269,20 @@ Chromstar <- function(inputfolder, experiment.table, outputfolder, configfile=NU |
269 | 269 |
} else { |
270 | 270 |
combined.model <- loadHmmsFromFiles(savename, check.class=class.combined.multivariate.hmm)[[1]] |
271 | 271 |
} |
272 |
+ ## Plot correlations |
|
273 |
+ ptm <- startTimedMessage("Plotting read count correlation ...") |
|
274 |
+ char.per.cm <- 10 |
|
275 |
+ legend.cm <- 3 |
|
276 |
+ savename <- file.path(plotpath, 'read-count-correlation.pdf') |
|
277 |
+ ggplt <- plotMultivariateCorrelation(combined.model, cluster=FALSE) |
|
278 |
+ width <- length(combined.model$info$ID) + max(sapply(combined.model$info$ID, nchar)) / char.per.cm + legend.cm |
|
279 |
+ height <- length(combined.model$info$ID) + max(sapply(combined.model$info$ID, nchar)) / char.per.cm |
|
280 |
+ ggsave(savename, plot=ggplt, width=width, height=height, limitsize=FALSE, units='cm') |
|
281 |
+ savename <- file.path(plotpath, 'read-count-correlation-clustered.pdf') |
|
282 |
+ ggplt <- plotMultivariateCorrelation(combined.model, cluster=TRUE) |
|
283 |
+ width <- length(combined.model$info$ID) + max(sapply(combined.model$info$ID, nchar)) / char.per.cm + legend.cm |
|
284 |
+ height <- length(combined.model$info$ID) + max(sapply(combined.model$info$ID, nchar)) / char.per.cm |
|
285 |
+ ggsave(savename, plot=ggplt, width=width, height=height, limitsize=FALSE, units='cm') |
|
272 | 286 |
|
273 | 287 |
#------------------------- |
274 | 288 |
## Export browser files ## |
... | ... |
@@ -297,17 +311,6 @@ Chromstar <- function(inputfolder, experiment.table, outputfolder, configfile=NU |
297 | 311 |
ptm <- startTimedMessage("Making plots ...") |
298 | 312 |
char.per.cm <- 10 |
299 | 313 |
legend.cm <- 3 |
300 |
- savename1 <- paste0(savename, '_correlation.pdf') |
|
301 |
- ggplt <- suppressMessages( graphics::plot(multimodel, type='correlation', cluster=FALSE) ) |
|
302 |
- width <- length(multimodel$info$ID) + max(sapply(multimodel$info$ID, nchar)) / char.per.cm + legend.cm |
|
303 |
- height <- length(multimodel$info$ID) + max(sapply(multimodel$info$ID, nchar)) / char.per.cm |
|
304 |
- ggsave(savename1, plot=ggplt, width=width, height=height, limitsize=FALSE, units='cm') |
|
305 |
- |
|
306 |
- savename1.1 <- paste0(savename, '_correlation-clustered.pdf') |
|
307 |
- ggplt <- suppressMessages( graphics::plot(multimodel, type='correlation', cluster=TRUE) ) |
|
308 |
- width <- length(multimodel$info$ID) + max(sapply(multimodel$info$ID, nchar)) / char.per.cm + legend.cm |
|
309 |
- height <- length(multimodel$info$ID) + max(sapply(multimodel$info$ID, nchar)) / char.per.cm |
|
310 |
- ggsave(savename1.1, plot=ggplt, width=width, height=height, limitsize=FALSE, units='cm') |
|
311 | 314 |
|
312 | 315 |
savename2 <- paste0(savename, '_transitionMatrix.pdf') |
313 | 316 |
ggplt <- suppressMessages( graphics::plot(multimodel, type='transitionMatrix') ) |
... | ... |
@@ -232,7 +232,7 @@ runMultivariate <- function(bins, info, comb.states, use.states, distributions, |
232 | 232 |
num.bins = as.integer(length(bins)), # int* T |
233 | 233 |
max.states = as.integer(length(comb.states)), # int* N |
234 | 234 |
num.modifications = as.integer(ncol(bins$counts)), # int* Nmod |
235 |
- comb.states = as.integer(comb.states), # int* comb_states |
|
235 |
+ comb.states = as.numeric(comb.states), # double* comb_states |
|
236 | 236 |
size = as.double(rs), # double* size |
237 | 237 |
prob = as.double(ps), # double* prob |
238 | 238 |
w = as.double(ws), # double* w |
... | ... |
@@ -359,6 +359,9 @@ runMultivariate <- function(bins, info, comb.states, use.states, distributions, |
359 | 359 |
prepareMultivariate = function(hmms, use.states=NULL, max.states=NULL, chromosomes=NULL) { |
360 | 360 |
|
361 | 361 |
nummod <- length(hmms) |
362 |
+ if (nummod > 53) { |
|
363 |
+ stop("We can't handle more than 53 'hmms' Please decrease the number of input 'hmms'.") |
|
364 |
+ } |
|
362 | 365 |
|
363 | 366 |
## Load first HMM for coordinates |
364 | 367 |
ptm <- startTimedMessage("Getting coordinates ...") |
... | ... |
@@ -410,7 +413,7 @@ prepareMultivariate = function(hmms, use.states=NULL, max.states=NULL, chromosom |
410 | 413 |
remove(hmm) |
411 | 414 |
|
412 | 415 |
## Transform binary to decimal |
413 |
- ptm <- startTimedMessage("Getting combinatorial states") |
|
416 |
+ ptm <- startTimedMessage("Getting combinatorial states ...") |
|
414 | 417 |
decimal_states <- rep(0,length(bins)) |
415 | 418 |
for (imod in 1:nummod) { |
416 | 419 |
decimal_states <- decimal_states + 2^(nummod-imod) * bins$binary_statesmatrix[,imod] |
... | ... |
@@ -431,7 +434,7 @@ prepareMultivariate = function(hmms, use.states=NULL, max.states=NULL, chromosom |
431 | 434 |
stopTimedMessage(ptm) |
432 | 435 |
|
433 | 436 |
## We pre-compute the z-values for each number of counts |
434 |
- ptm <- startTimedMessage("Computing pre z-matrix...") |
|
437 |
+ ptm <- startTimedMessage("Computing pre z-matrix ...") |
|
435 | 438 |
z.per.read <- array(NA, dim=c(maxcounts+1, nummod, 2)) |
436 | 439 |
xcounts = 0:maxcounts |
437 | 440 |
for (imod in 1:nummod) { |
... | ... |
@@ -463,7 +466,7 @@ prepareMultivariate = function(hmms, use.states=NULL, max.states=NULL, chromosom |
463 | 466 |
stopTimedMessage(ptm) |
464 | 467 |
|
465 | 468 |
## Compute the z matrix |
466 |
- ptm <- startTimedMessage("Transfering values into z-matrix...") |
|
469 |
+ ptm <- startTimedMessage("Transfering values into z-matrix ...") |
|
467 | 470 |
z.per.bin = array(NA, dim=c(length(bins), nummod, 2), dimnames=list(bin=1:length(bins), track=info$ID, state.labels[2:3])) |
468 | 471 |
for (imod in 1:nummod) { |
469 | 472 |
for (i1 in 1:2) { |
... | ... |
@@ -476,18 +479,30 @@ prepareMultivariate = function(hmms, use.states=NULL, max.states=NULL, chromosom |
476 | 479 |
stopTimedMessage(ptm) |
477 | 480 |
|
478 | 481 |
### Calculate correlation matrix |
479 |
- ptm <- startTimedMessage("Computing inverse of correlation matrix...") |
|
482 |
+ ptm <- startTimedMessage("Computing inverse of correlation matrix ...") |
|
480 | 483 |
correlationMatrix = array(NA, dim=c(nummod,nummod,length(comb.states)), dimnames=list(track=info$ID, track=info$ID, comb.state=comb.states)) |
481 | 484 |
correlationMatrixInverse = array(NA, dim=c(nummod,nummod,length(comb.states)), dimnames=list(track=info$ID, track=info$ID, comb.state=comb.states)) |
482 | 485 |
determinant = rep(NA, length(comb.states)) |
483 | 486 |
names(determinant) <- comb.states |
484 | 487 |
|
488 |
+ numericToBin <- function(x, num.digits) { |
|
489 |
+ bin <- logical(length=num.digits) |
|
490 |
+ xi <- x |
|
491 |
+ i1 <- num.digits |
|
492 |
+ while (xi > 0) { |
|
493 |
+ bin[i1] <- xi %% 2 |
|
494 |
+ xi <- xi %/% 2 |
|
495 |
+ i1 <- i1 - 1 |
|
496 |
+ } |
|
497 |
+ return(bin) |
|
498 |
+ } |
|
499 |
+ |
|
485 | 500 |
## Calculate correlation matrix serial |
486 | 501 |
for (state in comb.states) { |
487 | 502 |
istate = which(comb.states==state) |
488 |
- mask = which(bins$state==as.integer(state)) |
|
503 |
+ mask = which(bins$state==as.numeric(state)) |
|
489 | 504 |
# Convert state to binary representation |
490 |
- binary_state = rev(as.integer(intToBits(as.integer(state)))[1:nummod]) |
|
505 |
+ binary_state <- numericToBin(as.numeric(state), nummod) |
|
491 | 506 |
# Subselect z |
492 | 507 |
z.temp <- matrix(NA, ncol=nummod, nrow=length(mask)) |
493 | 508 |
for (i1 in 1:length(binary_state)) { |
... | ... |
@@ -79,11 +79,13 @@ combineMultivariates <- function(hmms, mode) { |
79 | 79 |
hmm <- suppressMessages( loadHmmsFromFiles(hmms[[1]], check.class=class.multivariate.hmm)[[1]] ) |
80 | 80 |
bins <- hmm$bins |
81 | 81 |
mcols(bins) <- NULL |
82 |
- ## Add combinatorial states and posteriors |
|
82 |
+ ## Add combinatorial states, counts and posteriors |
|
83 | 83 |
infos <- list() |
84 | 84 |
infos[[1]] <- hmm$info |
85 | 85 |
combs <- list() |
86 | 86 |
combs[[1]] <- hmm$bins$combination |
87 |
+ counts <- list() |
|
88 |
+ counts[[1]] <- hmm$bins$counts |
|
87 | 89 |
posteriors <- list() |
88 | 90 |
posteriors[[1]] <- hmm$bins$posteriors |
89 | 91 |
binstates <- list() |
... | ... |
@@ -96,11 +98,13 @@ combineMultivariates <- function(hmms, mode) { |
96 | 98 |
hmm <- suppressMessages( loadHmmsFromFiles(hmms[[i1]], check.class=class.multivariate.hmm)[[1]] ) |
97 | 99 |
infos[[i1]] <- hmm$info |
98 | 100 |
combs[[i1]] <- hmm$bins$combination |
101 |
+ counts[[i1]] <- hmm$bins$counts |
|
99 | 102 |
posteriors[[i1]] <- hmm$bins$posteriors |
100 | 103 |
binstates[[i1]] <- dec2bin(hmm$bins$state, colnames=hmm$info$ID) |
101 | 104 |
stopTimedMessage(ptm) |
102 | 105 |
} |
103 | 106 |
} |
107 |
+ counts <- do.call(cbind, counts) |
|
104 | 108 |
posteriors <- do.call(cbind, posteriors) |
105 | 109 |
infos <- do.call(rbind, infos) |
106 | 110 |
conditions <- unique(infos$condition) |
... | ... |
@@ -112,16 +116,19 @@ combineMultivariates <- function(hmms, mode) { |
112 | 116 |
} else if (mode == 'condition') { |
113 | 117 |
### Get posteriors and binary states |
114 | 118 |
infos <- list() |
119 |
+ counts <- list() |
|
115 | 120 |
posteriors <- list() |
116 | 121 |
binstates <- list() |
117 | 122 |
for (i1 in 1:length(hmms)) { |
118 | 123 |
ptm <- startTimedMessage("Processing HMM ",i1," ...") |
119 | 124 |
hmm <- suppressMessages( loadHmmsFromFiles(hmms[[i1]], check.class=class.multivariate.hmm)[[1]] ) |
120 | 125 |
infos[[i1]] <- hmm$info |
126 |
+ counts[[i1]] <- hmm$bins$counts |
|
121 | 127 |
posteriors[[i1]] <- hmm$bins$posteriors |
122 | 128 |
binstates[[i1]] <- dec2bin(hmm$bins$state, colnames=hmm$info$ID) |
123 | 129 |
stopTimedMessage(ptm) |
124 | 130 |
} |
131 |
+ counts <- do.call(cbind, counts) |
|
125 | 132 |
posteriors <- do.call(cbind, posteriors) |
126 | 133 |
infos <- do.call(rbind, infos) |
127 | 134 |
conditions <- unique(infos$condition) |
... | ... |
@@ -157,6 +164,7 @@ combineMultivariates <- function(hmms, mode) { |
157 | 164 |
mcols(bins) <- NULL |
158 | 165 |
infos <- hmm$info |
159 | 166 |
conditions <- unique(infos$condition) |
167 |
+ counts <- hmm$bins$counts |
|
160 | 168 |
posteriors <- hmm$bins$posteriors |
161 | 169 |
states <- hmm$bins$state |
162 | 170 |
combs <- list() |
... | ... |
@@ -177,9 +185,11 @@ combineMultivariates <- function(hmms, mode) { |
177 | 185 |
hmm <- suppressMessages( loadHmmsFromFiles(hmms[[1]], check.class=class.multivariate.hmm)[[1]] ) |
178 | 186 |
bins <- hmm$bins |
179 | 187 |
mcols(bins) <- NULL |
180 |
- ## Add combinatorial states and posteriors |
|
188 |
+ ## Add combinatorial states, counts and posteriors |
|
181 | 189 |
infos <- list() |
182 | 190 |
infos[[1]] <- hmm$info |
191 |
+ counts <- list() |
|
192 |
+ counts[[1]] <- hmm$bins$counts |
|
183 | 193 |
posteriors <- list() |
184 | 194 |
posteriors[[1]] <- hmm$bins$posteriors |
185 | 195 |
binstates <- list() |
... | ... |
@@ -191,11 +201,13 @@ combineMultivariates <- function(hmms, mode) { |
191 | 201 |
ptm <- startTimedMessage("Processing mark-condition ",i1," ...") |
192 | 202 |
hmm <- suppressMessages( loadHmmsFromFiles(hmms[[i1]], check.class=class.multivariate.hmm)[[1]] ) |
193 | 203 |
infos[[i1]] <- hmm$info |
204 |
+ counts[[i1]] <- hmm$bins$counts |
|
194 | 205 |
posteriors[[i1]] <- hmm$bins$posteriors |
195 | 206 |
binstates[[i1]] <- dec2bin(hmm$bins$state, colnames=hmm$info$ID) |
196 | 207 |
stopTimedMessage(ptm) |
197 | 208 |
} |
198 | 209 |
} |
210 |
+ counts <- do.call(cbind, counts) |
|
199 | 211 |
posteriors <- do.call(cbind, posteriors) |
200 | 212 |
binstates <- do.call(cbind, binstates) |
201 | 213 |
infos <- do.call(rbind, infos) |
... | ... |
@@ -230,7 +242,8 @@ combineMultivariates <- function(hmms, mode) { |
230 | 242 |
bins$state <- states |
231 | 243 |
mcols(bins)[names(combs.df)] <- combs.df |
232 | 244 |
|
233 |
- ## Transferring posteriors |
|
245 |
+ ## Transferring counts and posteriors |
|
246 |
+ bins$counts <- counts |
|
234 | 247 |
bins$posteriors <- posteriors |
235 | 248 |
|
236 | 249 |
## Add differential score ## |
... | ... |
@@ -239,7 +252,7 @@ combineMultivariates <- function(hmms, mode) { |
239 | 252 |
### Redo the segmentation for all conditions combined |
240 | 253 |
ptm <- startTimedMessage("Redoing segmentation for all conditions combined ...") |
241 | 254 |
segments <- suppressMessages( multivariateSegmentation(bins, column2collapseBy='state') ) |
242 |
- names(mcols(segments)) <- setdiff(names(mcols(bins)), 'posteriors') |
|
255 |
+ names(mcols(segments)) <- setdiff(names(mcols(bins)), c('posteriors','counts')) |
|
243 | 256 |
stopTimedMessage(ptm) |
244 | 257 |
|
245 | 258 |
### Redo the segmentation for each condition separately |
... | ... |
@@ -51,7 +51,7 @@ unis2pseudomulti <- function(uni.hmm.list) { |
51 | 51 |
weights = lapply(uni.hmm.list,"[[","weights") |
52 | 52 |
|
53 | 53 |
# Extract the reads |
54 |
- ptm <- startTimedMessage("Extracting read counts from uni.hmm.list...") |
|
54 |
+ ptm <- startTimedMessage("Extracting read counts from uni.hmm.list ...") |
|
55 | 55 |
reads = matrix(NA, ncol=nummod, nrow=numbins) |
56 | 56 |
colnames(reads) <- info$ID |
57 | 57 |
for (imod in 1:nummod) { |
... | ... |
@@ -62,7 +62,7 @@ unis2pseudomulti <- function(uni.hmm.list) { |
62 | 62 |
stopTimedMessage(ptm) |
63 | 63 |
|
64 | 64 |
## Get combinatorial states |
65 |
- ptm <- startTimedMessage("Getting combinatorial states") |
|
65 |
+ ptm <- startTimedMessage("Getting combinatorial states ...") |
|
66 | 66 |
combstates.per.bin = combinatorialStates(uni.hmm.list) |
67 | 67 |
comb.states.table = table(combstates.per.bin) |
68 | 68 |
comb.states = as.numeric(names(sort(comb.states.table, decreasing=TRUE))) |
... | ... |
@@ -84,7 +84,7 @@ unis2pseudomulti <- function(uni.hmm.list) { |
84 | 84 |
stopTimedMessage(ptm) |
85 | 85 |
|
86 | 86 |
## Calculate transition matrix |
87 |
- ptm <- startTimedMessage("Estimating transition matrix...") |
|
87 |
+ ptm <- startTimedMessage("Estimating transition matrix ...") |
|
88 | 88 |
A.estimated = matrix(0, ncol=2^nummod, nrow=2^nummod) |
89 | 89 |
colnames(A.estimated) = 1:2^nummod-1 |
90 | 90 |
rownames(A.estimated) = 1:2^nummod-1 |
... | ... |
@@ -258,7 +258,7 @@ void univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* max |
258 | 258 |
// ===================================================================================================================================================== |
259 | 259 |
// This function takes parameters from R, creates a multivariate HMM object, creates the distributions, runs the Baum-Welch and returns the result to R. |
260 | 260 |
// ===================================================================================================================================================== |
261 |
-void multivariate_hmm(int* O, int* T, int* N, int *Nmod, int* comb_states, double* size, double* prob, double* w, double* cor_matrix_inv, double* det, int* maxiter, int* maxtime, double* eps, double* posteriors, bool* keep_posteriors, double* densities, bool* keep_densities, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* verbosity) |
|
261 |
+void multivariate_hmm(int* O, int* T, int* N, int *Nmod, double* comb_states, double* size, double* prob, double* w, double* cor_matrix_inv, double* det, int* maxiter, int* maxtime, double* eps, double* posteriors, bool* keep_posteriors, double* densities, bool* keep_densities, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* verbosity) |
|
262 | 262 |
{ |
263 | 263 |
|
264 | 264 |
// Define logging level {"ERROR", "WARNING", "INFO", "ITERATION", "DEBUG", "DEBUG1", "DEBUG2", "DEBUG3", "DEBUG4"} |
... | ... |
@@ -334,14 +334,15 @@ void multivariate_hmm(int* O, int* T, int* N, int *Nmod, int* comb_states, doubl |
334 | 334 |
|
335 | 335 |
// Prepare the binary_states (univariate) vector: binary_states[N][Nmod], e.g., binary_states[iN][imod] tells me at state comb_states[iN], modification imod is non-enriched (0) or enriched (1) |
336 | 336 |
//FILE_LOG(logDEBUG1) << "Preparing the binary_states vector"; |
337 |
+ double res; |
|
337 | 338 |
bool **binary_states = CallocBoolMatrix(*N, *Nmod); |
338 |
- for(int iN=0; iN < *N; iN++) //for each comb state considered |
|
339 |
+ for (int iN=0; iN < *N; iN++) //for each comb state considered |
|
339 | 340 |
{ |
340 |
- for(int imod=0; imod < *Nmod; imod++) //for each modification of this comb state |
|
341 |
+ res = comb_states[iN]; |
|
342 |
+ for (int imod=(*Nmod-1); imod >= 0; imod--) //for each modification of this comb state |
|
341 | 343 |
{ |
342 |
- binary_states[iN][imod] = comb_states[iN]&(int)pow(2,*Nmod-imod-1);//if =0->hidden state comb_states[iN] has modification imod non enriched; if !=0->enriched |
|
343 |
- if (binary_states[iN][imod] != 0 ) |
|
344 |
- binary_states[iN][imod] = 1; |
|
344 |
+ binary_states[iN][imod] = (bool)fmod(res,2); |
|
345 |
+ res = (res - (double)binary_states[iN][imod]) / 2.0; |
|
345 | 346 |
} |
346 | 347 |
} |
347 | 348 |
|
... | ... |
@@ -20,7 +20,7 @@ extern "C" |
20 | 20 |
void univariate_hmm(int* O, int* T, int* N, double* size, double* prob, int* maxiter, int* maxtime, double* eps, double* posteriors, double* densities, bool* keep_densities, double* A, double* proba, double* loglik, double* weights, int* iniproc, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff, int* verbosity); |
21 | 21 |
|
22 | 22 |
extern "C" |
23 |
-void multivariate_hmm(int* O, int* T, int* N, int *Nmod, int* comb_states, double* size, double* prob, double* w, double* cor_matrix_inv, double* det, int* maxiter, int* maxtime, double* eps, double* posteriors, bool* keep_posteriors, double* densities, bool* keep_densities, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* verbosity); |
|
23 |
+void multivariate_hmm(int* O, int* T, int* N, int *Nmod, double* comb_states, double* size, double* prob, double* w, double* cor_matrix_inv, double* det, int* maxiter, int* maxtime, double* eps, double* posteriors, bool* keep_posteriors, double* densities, bool* keep_densities, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* verbosity); |
|
24 | 24 |
|
25 | 25 |
extern "C" |
26 | 26 |
void univariate_cleanup(); |
... | ... |
@@ -4,7 +4,7 @@ |
4 | 4 |
|
5 | 5 |
|
6 | 6 |
R_NativePrimitiveArgType arg1[] = {INTSXP, INTSXP, INTSXP, REALSXP, REALSXP, INTSXP, INTSXP, REALSXP, REALSXP, REALSXP, LGLSXP, REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, LGLSXP, INTSXP, INTSXP, INTSXP, INTSXP}; |
7 |
-R_NativePrimitiveArgType arg2[] = {INTSXP, INTSXP, INTSXP, INTSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, INTSXP, REALSXP, REALSXP, LGLSXP, REALSXP, LGLSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, LGLSXP, INTSXP, INTSXP, INTSXP}; |
|
7 |
+R_NativePrimitiveArgType arg2[] = {INTSXP, INTSXP, INTSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, INTSXP, REALSXP, REALSXP, LGLSXP, REALSXP, LGLSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, LGLSXP, INTSXP, INTSXP, INTSXP}; |
|
8 | 8 |
R_NativePrimitiveArgType arg4[] = {INTSXP}; |
9 | 9 |
|
10 | 10 |
static const R_CMethodDef CEntries[] = { |