... | ... |
@@ -338,8 +338,8 @@ runMultivariate <- function(bins, info, comb.states, use.states, distributions, |
338 | 338 |
} # loop over offsets |
339 | 339 |
|
340 | 340 |
### Find maximum posterior for each bin between offsets |
341 |
- ptm <- startTimedMessage("Finding maximum posterior between offsets ...") |
|
342 | 341 |
## Make bins with offset |
342 |
+ ptm <- startTimedMessage("Making bins with offsets ...") |
|
343 | 343 |
if (length(offsets) > 1) { |
344 | 344 |
stepbins <- suppressMessages( fixedWidthBins(chrom.lengths = seqlengths(bins), binsizes = as.numeric(offsets[2]))[[1]] ) |
345 | 345 |
} else { |
... | ... |
@@ -360,13 +360,37 @@ runMultivariate <- function(bins, info, comb.states, use.states, distributions, |
360 | 360 |
astates.step[ind@from, offset] <- states.list[[offset]][ind@to] |
361 | 361 |
} |
362 | 362 |
rm(aposteriors) |
363 |
+ stopTimedMessage(ptm) |
|
364 |
+ |
|
363 | 365 |
# Average and normalize counts to RPKM |
364 |
- counts.step <- apply(X = acounts.step, MARGIN = c(1,2), FUN = mean, na.rm=TRUE) |
|
365 |
- counts.step <- rpkm.matrix(counts.step, binsize = width(bins)[1]) |
|
366 |
- rm(acounts.step) |
|
366 |
+ ptm <- startTimedMessage("Averaging counts between offsets ...") |
|
367 |
+ # Start stuff to call C code |
|
368 |
+ dim_acounts.step <- dim(acounts.step) |
|
369 |
+ dimnames_acounts.step <- dimnames(acounts.step) |
|
370 |
+ dim(acounts.step) <- NULL |
|
371 |
+ z <- .C("C_array3D_mean", |
|
372 |
+ array3D = acounts.step, |
|
373 |
+ dim = as.integer(dim_acounts.step), |
|
374 |
+ mean = double(dim_acounts.step[1]*dim_acounts.step[2])) |
|
375 |
+ # dim(acounts.step) <- dim_acounts.step |
|
376 |
+ rm(acounts.step) |
|
377 |
+ dim(z$mean) <- dim_acounts.step[1:2] |
|
378 |
+ counts.step <- z$mean |
|
379 |
+ counts.step <- rpkm.matrix(counts.step, binsize = width(bins)[1]) |
|
380 |
+ # End stuff to call C code |
|
381 |
+ stopTimedMessage(ptm) |
|
367 | 382 |
|
368 | 383 |
## Find offset that maximizes the posteriors for each bin |
369 |
- ind <- apply(aposteriors.step, 1, which.max) |
|
384 |
+ ptm <- startTimedMessage("Finding maximum posterior between offsets ...") |
|
385 |
+ dim_aposteriors.step <- dim(aposteriors.step) |
|
386 |
+ dimnames_aposteriors.step <- dimnames(aposteriors.step) |
|
387 |
+ dim(aposteriors.step) <- NULL |
|
388 |
+ z <- .C("C_array3D_which_max", |
|
389 |
+ array3D = aposteriors.step, |
|
390 |
+ dim = as.integer(dim_aposteriors.step), |
|
391 |
+ ind_max = integer(dim_aposteriors.step[1])) |
|
392 |
+ ind <- z$ind_max |
|
393 |
+ dim(aposteriors.step) <- dim_aposteriors.step |
|
370 | 394 |
numstates <- length(comb.states) |
371 | 395 |
ind <- ceiling(ind / numstates) |
372 | 396 |
posteriors.step <- array(0, dim = c(length(stepbins), numstates), dimnames = list(bin=NULL, state=comb.states)) |
... | ... |
@@ -346,7 +346,7 @@ callPeaksUnivariateAllChr <- function(binned.data, input.data=NULL, eps=0.01, in |
346 | 346 |
# Rerun the HMM with different epsilon and initial parameters from trial run |
347 | 347 |
if (verbosity>=1) message("------------------------- Rerunning try ",indexmax," with eps = ",eps," -------------------------") |
348 | 348 |
hmm <- .C("C_univariate_hmm", |
349 |
- counts = as.integer(counts), # double* O |
|
349 |
+ counts = as.integer(counts), # int* O |
|
350 | 350 |
num.bins = as.integer(numbins), # int* T |
351 | 351 |
num.states = as.integer(numstates), # int* N |
352 | 352 |
size = double(length=numstates), # double* size |
... | ... |
@@ -434,8 +434,8 @@ callPeaksUnivariateAllChr <- function(binned.data, input.data=NULL, eps=0.01, in |
434 | 434 |
} # loop over offsets |
435 | 435 |
|
436 | 436 |
### Find maximum posterior for each bin between offsets |
437 |
- ptm <- startTimedMessage("Finding maximum posterior between offsets ...") |
|
438 | 437 |
## Make bins with offset |
438 |
+ ptm <- startTimedMessage("Making bins with offsets ...") |
|
439 | 439 |
if (length(offsets) > 1) { |
440 | 440 |
stepbins <- suppressMessages( fixedWidthBins(chrom.lengths = seqlengths(binned.data), binsizes = as.numeric(offsets[2]))[[1]] ) |
441 | 441 |
} else { |
... | ... |
@@ -454,19 +454,45 @@ callPeaksUnivariateAllChr <- function(binned.data, input.data=NULL, eps=0.01, in |
454 | 454 |
acounts.step[ind@from, offset] <- counts.list[[offset]][ind@to] |
455 | 455 |
} |
456 | 456 |
rm(aposteriors) |
457 |
+ stopTimedMessage(ptm) |
|
458 |
+ |
|
457 | 459 |
# Average and normalize counts to RPKM |
458 |
- counts.step <- apply(X = acounts.step, MARGIN = 1, FUN = mean, na.rm=TRUE, drop=FALSE) |
|
459 |
- counts.step <- rpkm.vector(counts.step, binsize = binsize) |
|
460 |
- rm(acounts.step) |
|
460 |
+ ptm <- startTimedMessage("Averaging counts between offsets ...") |
|
461 |
+ # Start stuff to call C code |
|
462 |
+ dim_acounts.step <- dim(acounts.step) |
|
463 |
+ dimnames_acounts.step <- dimnames(acounts.step) |
|
464 |
+ dim(acounts.step) <- NULL |
|
465 |
+ z <- .C("C_array2D_mean", |
|
466 |
+ array2D = acounts.step, |
|
467 |
+ dim = as.integer(dim_acounts.step), |
|
468 |
+ mean = double(dim_acounts.step[1])) |
|
469 |
+ # dim(acounts.step) <- dim_acounts.step |
|
470 |
+ rm(acounts.step) |
|
471 |
+ counts.step <- z$mean |
|
472 |
+ counts.step <- rpkm.vector(counts.step, binsize = binsize) |
|
473 |
+ # End stuff to call C code |
|
474 |
+ stopTimedMessage(ptm) |
|
461 | 475 |
|
462 | 476 |
## Find offset that maximizes the posteriors for each bin |
463 |
- ind <- apply(aposteriors.step, 1, which.max) |
|
477 |
+ ptm <- startTimedMessage("Finding maximum posterior between offsets ...") |
|
478 |
+ # Start stuff to call C code |
|
479 |
+ dim_aposteriors.step <- dim(aposteriors.step) |
|
480 |
+ dimnames_aposteriors.step <- dimnames(aposteriors.step) |
|
481 |
+ dim(aposteriors.step) <- NULL |
|
482 |
+ z <- .C("C_array3D_which_max", |
|
483 |
+ array3D = aposteriors.step, |
|
484 |
+ dim = as.integer(dim_aposteriors.step), |
|
485 |
+ ind_max = integer(dim_aposteriors.step[1])) |
|
486 |
+ dim(aposteriors.step) <- dim_aposteriors.step |
|
487 |
+ ind <- z$ind_max |
|
488 |
+ # End stuff to call C code |
|
464 | 489 |
ind <- ceiling(ind / numstates) |
465 | 490 |
posteriors.step <- array(0, dim = c(length(stepbins), numstates), dimnames = list(bin=NULL, state=state.labels)) |
466 | 491 |
for (i1 in 1:length(offsets)) { |
467 | 492 |
mask <- ind == i1 |
468 | 493 |
posteriors.step[mask,] <- aposteriors.step[mask,,i1, drop=FALSE] |
469 | 494 |
} |
495 |
+ rm(aposteriors.step) |
|
470 | 496 |
stepbins$posteriors <- posteriors.step |
471 | 497 |
stopTimedMessage(ptm) |
472 | 498 |
|
... | ... |
@@ -36,11 +36,13 @@ dnbinom.variance <- function(size, prob) { |
36 | 36 |
|
37 | 37 |
rpkm.vector <- function(counts, binsize) { |
38 | 38 |
rpkm <- counts / sum(as.numeric(counts)) * 1e6 * 1000 / binsize |
39 |
+ rpkm[is.nan(rpkm)] <- 0 |
|
39 | 40 |
return(rpkm) |
40 | 41 |
} |
41 | 42 |
|
42 | 43 |
rpkm.matrix <- function(counts, binsize) { |
43 | 44 |
rpkm <- sweep(counts, MARGIN = 2, STATS = colSums(counts), FUN = '/') |
45 |
+ rpkm[is.nan(rpkm)] <- 0 |
|
44 | 46 |
rpkm <- rpkm * 1e6 * 1000 / binsize |
45 | 47 |
return(rpkm) |
46 | 48 |
} |
47 | 49 |
\ No newline at end of file |
... | ... |
@@ -483,3 +483,69 @@ void multivariate_cleanup(int* Nmod) |
483 | 483 |
FreeIntMatrix(multiO, *Nmod); |
484 | 484 |
} |
485 | 485 |
|
486 |
+ |
|
487 |
+// ======================================================= |
|
488 |
+// C version of apply(array3D, 1, which.max) |
|
489 |
+// ======================================================= |
|
490 |
+void array3D_which_max(double* array3D, int* dim, int* ind_max) |
|
491 |
+{ |
|
492 |
+ // array3D is actually a vector, but is intended to originate from a 3D array in R |
|
493 |
+ std::vector<double> value_per_i0(dim[1] * dim[2]); |
|
494 |
+ for (int i0=0; i0<dim[0]; i0++) |
|
495 |
+ { |
|
496 |
+ for (int i1=0; i1<dim[1]; i1++) |
|
497 |
+ { |
|
498 |
+ for (int i2=0; i2<dim[2]; i2++) |
|
499 |
+ { |
|
500 |
+ value_per_i0[i1*dim[2]+i2] = array3D[(i1*dim[2]+i2) * dim[0] + i0]; |
|
501 |
+ // Rprintf("i0=%d, i1=%d, i2=%d, value_per_i0[%d] = %g\n", i0, i1, i2, i1*dim[2]+i2, value_per_i0[i1*dim[2]+i2]); |
|
502 |
+ } |
|
503 |
+ } |
|
504 |
+ ind_max[i0] = 1 + std::distance(value_per_i0.begin(), std::max_element(value_per_i0.begin(), value_per_i0.end())); |
|
505 |
+ } |
|
506 |
+ |
|
507 |
+} |
|
508 |
+ |
|
509 |
+ |
|
510 |
+// ==================================================================================== |
|
511 |
+// C version of apply(array2D, MARGIN = 1, FUN = mean) |
|
512 |
+// ==================================================================================== |
|
513 |
+void array2D_mean(double* array2D, int* dim, double* mean) |
|
514 |
+{ |
|
515 |
+ // array2D is actually a vector, but is intended to originate from a matrix in R |
|
516 |
+ double sum=0; |
|
517 |
+ for (int i0=0; i0<dim[0]; i0++) |
|
518 |
+ { |
|
519 |
+ sum = 0; |
|
520 |
+ for (int i1=0; i1<dim[1]; i1++) |
|
521 |
+ { |
|
522 |
+ sum += array2D[i1*dim[0] + i0]; |
|
523 |
+ } |
|
524 |
+ mean[i0] = sum / dim[1]; |
|
525 |
+ } |
|
526 |
+ |
|
527 |
+} |
|
528 |
+ |
|
529 |
+ |
|
530 |
+// ==================================================================================== |
|
531 |
+// C version of apply(array3D, MARGIN = c(1,2), FUN = mean) |
|
532 |
+// ==================================================================================== |
|
533 |
+void array3D_mean(double* array3D, int* dim, double* mean) |
|
534 |
+{ |
|
535 |
+ // array3D is actually a vector, but is intended to originate from a 3D array in R |
|
536 |
+ double sum=0; |
|
537 |
+ for (int i0=0; i0<dim[0]; i0++) |
|
538 |
+ { |
|
539 |
+ for (int i1=0; i1<dim[1]; i1++) |
|
540 |
+ { |
|
541 |
+ sum = 0; |
|
542 |
+ for (int i2=0; i2<dim[2]; i2++) |
|
543 |
+ { |
|
544 |
+ sum += array3D[(i2*dim[1] + i1)*dim[0] + i0]; |
|
545 |
+ // Rprintf("i0=%d, i1=%d, i2=%d, array3D[(i2*dim[1] + i1)*dim[0] + i0 = %d] = %g\n", i0, i1, i2, (i2*dim[1] + i1)*dim[0] + i0, array3D[(i2*dim[1] + i1)*dim[0] + i0]); |
|
546 |
+ } |
|
547 |
+ mean[i1*dim[0] + i0] = sum / dim[2]; |
|
548 |
+ } |
|
549 |
+ } |
|
550 |
+ |
|
551 |
+} |
|
486 | 552 |
\ No newline at end of file |
... | ... |
@@ -28,3 +28,11 @@ void univariate_cleanup(); |
28 | 28 |
extern "C" |
29 | 29 |
void multivariate_cleanup(int* Nmod); |
30 | 30 |
|
31 |
+extern "C" |
|
32 |
+void array3D_which_max(double* array3D, int* dim, int* ind_max); |
|
33 |
+ |
|
34 |
+extern "C" |
|
35 |
+void array2D_mean(double* array2D, int* dim, double* mean); |
|
36 |
+ |
|
37 |
+extern "C" |
|
38 |
+void array3D_mean(double* array3D, int* dim, double* mean); |
|
31 | 39 |
\ No newline at end of file |
... | ... |
@@ -6,12 +6,18 @@ |
6 | 6 |
R_NativePrimitiveArgType arg1[] = {INTSXP, INTSXP, INTSXP, REALSXP, REALSXP, INTSXP, INTSXP, REALSXP, REALSXP, REALSXP, LGLSXP, REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, LGLSXP, INTSXP, INTSXP, INTSXP, INTSXP}; |
7 | 7 |
R_NativePrimitiveArgType arg2[] = {INTSXP, INTSXP, INTSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, INTSXP, REALSXP, REALSXP, LGLSXP, REALSXP, LGLSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, LGLSXP, INTSXP, INTSXP, INTSXP}; |
8 | 8 |
R_NativePrimitiveArgType arg4[] = {INTSXP}; |
9 |
+R_NativePrimitiveArgType arg5[] = {REALSXP, INTSXP, INTSXP}; |
|
10 |
+R_NativePrimitiveArgType arg6[] = {REALSXP, INTSXP, REALSXP}; |
|
11 |
+R_NativePrimitiveArgType arg7[] = {REALSXP, INTSXP, REALSXP}; |
|
9 | 12 |
|
10 | 13 |
static const R_CMethodDef CEntries[] = { |
11 | 14 |
{"C_univariate_hmm", (DL_FUNC) &univariate_hmm, 25, arg1}, |
12 | 15 |
{"C_multivariate_hmm", (DL_FUNC) &multivariate_hmm, 27, arg2}, |
13 | 16 |
{"C_univariate_cleanup", (DL_FUNC) &univariate_cleanup, 0, NULL}, |
14 | 17 |
{"C_multivariate_cleanup", (DL_FUNC) &multivariate_cleanup, 1, arg4}, |
18 |
+ {"C_array3D_which_max", (DL_FUNC) &array3D_which_max, 3, arg5}, |
|
19 |
+ {"C_array2D_mean", (DL_FUNC) &array2D_mean, 3, arg6}, |
|
20 |
+ {"C_array3D_mean", (DL_FUNC) &array3D_mean, 3, arg7}, |
|
15 | 21 |
{NULL, NULL, 0, NULL} |
16 | 22 |
}; |
17 | 23 |
|