... | ... |
@@ -2,10 +2,16 @@ CHANGES IN VERSION 1.5.1 |
2 | 2 |
------------------------ |
3 | 3 |
|
4 | 4 |
SIGNIFICANT USER-LEVEL CHANGES |
5 |
+ |
|
6 |
+ o GC correction is now done with a loess-fit by default instead of the quadratic fit. This should improve accuracy. |
|
7 |
+ |
|
8 |
+ o A stepsize for a sliding window can be selected in addition to the binsize. This improves resolution of detected breakpoints. |
|
9 |
+ |
|
10 |
+ o Breakpoints are reported with confidence intervals in folder BROWSERFILES. |
|
5 | 11 |
|
6 | 12 |
BUG FIXES |
7 | 13 |
|
8 |
- o Can use BSgenome*NCBI* for GC correction now (worked only for BSgenome*UCSC* before). |
|
14 |
+ o Can use BSgenome*NCBI* for GC correction (worked only for BSgenome*UCSC* before). |
|
9 | 15 |
|
10 | 16 |
|
11 | 17 |
CHANGES IN VERSION 1.3.4 |
... | ... |
@@ -150,30 +150,3 @@ mergeStrandseqFiles <- function(files, assembly, chromosomes=NULL, pairedEndRead |
150 | 150 |
return(reads) |
151 | 151 |
|
152 | 152 |
} |
153 |
- |
|
154 |
- |
|
155 |
-createBlacklistFromReads <- function(reads, binsize=1e5, quantile.cutoff=0.99) { |
|
156 |
- |
|
157 |
- bins <- binReads(reads, binsizes = binsize)[[1]] |
|
158 |
- # bins.split <- split(bins, bins@seqnames) |
|
159 |
- # blacklist.list <- GRangesList() |
|
160 |
- # for (i1 in 1:length(bins.split)) { |
|
161 |
- # cutoff <- quantile(bins.split[[i1]]$mcounts, quantile.cutoff) |
|
162 |
- # blacklist.list[[i1]] <- bins.split[[i1]][bins.split[[i1]]$mcounts > cutoff] |
|
163 |
- # } |
|
164 |
- # blacklist <- unlist(blacklist.list, use.names=FALSE) |
|
165 |
- cutoff <- quantile(bins$mcounts, quantile.cutoff) |
|
166 |
- blacklist <- bins[bins$mcounts >= cutoff] |
|
167 |
- # print(as.data.frame(blacklist)) |
|
168 |
- blacklist <- reduce(blacklist) |
|
169 |
- # print(as.data.frame(blacklist)) |
|
170 |
- frac <- sum(as.numeric(width(blacklist))) / sum(as.numeric(seqlengths(blacklist))) |
|
171 |
- message("Fraction of genome in blacklist: ", round(frac,4)) |
|
172 |
- |
|
173 |
- ggplt <- plotProfile(bins, both.strands=TRUE) |
|
174 |
- ggplt <- ggplt + geom_hline(data=data.frame(y=-cutoff), mapping=aes_string(yintercept='y'), col='red') |
|
175 |
- print(ggplt) |
|
176 |
- |
|
177 |
- return(blacklist) |
|
178 |
- |
|
179 |
-} |
... | ... |
@@ -528,7 +528,7 @@ univariate.findCNVs <- function(binned.data, ID=NULL, eps=0.1, init="standard", |
528 | 528 |
result$bins$state <- state.labels[states.step] |
529 | 529 |
result$bins$copy.number <- multiplicity[as.character(result$bins$state)] |
530 | 530 |
## Counts |
531 |
- result$binned.data <- binned.data.list |
|
531 |
+ result$bincounts <- binned.data.list |
|
532 | 532 |
## Segmentation |
533 | 533 |
message("Making segmentation ...", appendLF=FALSE) |
534 | 534 |
ptm <- proc.time() |
... | ... |
@@ -563,10 +563,16 @@ univariate.findCNVs <- function(binned.data, ID=NULL, eps=0.1, init="standard", |
563 | 563 |
bivariate.findCNVs <- function(binned.data, ID=NULL, eps=0.1, init="standard", max.time=-1, max.iter=-1, num.trials=1, eps.try=NULL, num.threads=1, count.cutoff.quantile=0.999, states=c("zero-inflation",paste0(0:10,"-somy")), most.frequent.state="1-somy", algorithm='EM', initial.params=NULL, verbosity=1) { |
564 | 564 |
|
565 | 565 |
## Intercept user input |
566 |
- binned.data <- loadFromFiles(binned.data, check.class='GRanges')[[1]] |
|
566 |
+ binned.data <- loadFromFiles(binned.data, check.class=c('GRanges','GRangesList'))[[1]] |
|
567 | 567 |
if (is.null(ID)) { |
568 | 568 |
ID <- attr(binned.data, 'ID') |
569 | 569 |
} |
570 |
+ if (class(binned.data) == 'GRangesList') { |
|
571 |
+ binned.data.list <- binned.data |
|
572 |
+ binned.data <- binned.data.list[[1]] |
|
573 |
+ } else if (class(binned.data) == 'GRanges') { |
|
574 |
+ binned.data.list <- GRangesList('0'=binned.data) |
|
575 |
+ } |
|
570 | 576 |
if (check.positive(eps)!=0) stop("argument 'eps' expects a positive numeric") |
571 | 577 |
if (check.integer(max.time)!=0) stop("argument 'max.time' expects an integer") |
572 | 578 |
if (check.integer(max.iter)!=0) stop("argument 'max.iter' expects an integer") |
... | ... |
@@ -585,392 +591,471 @@ bivariate.findCNVs <- function(binned.data, ID=NULL, eps=0.1, init="standard", m |
585 | 591 |
if (!is.null(initial.params)) { |
586 | 592 |
init <- 'initial.params' |
587 | 593 |
} |
588 |
- |
|
589 |
- warlist <- list() |
|
590 |
- if (is.null(eps.try)) eps.try <- eps |
|
591 |
- |
|
594 |
+ |
|
592 | 595 |
## Variables |
593 |
- num.bins <- length(binned.data) |
|
594 | 596 |
algorithm <- factor(algorithm, levels=c('baumWelch','viterbi','EM')) |
595 |
- |
|
596 |
- ## Get counts |
|
597 |
- select <- 'counts' |
|
598 |
- counts <- matrix(c(mcols(binned.data)[,paste0('m',select)], mcols(binned.data)[,paste0('p',select)]), ncol=2, dimnames=list(bin=1:num.bins, strand=c('minus','plus'))) |
|
599 |
- maxcounts <- max(counts) |
|
600 |
- |
|
601 |
- ## Filter high counts out, makes HMM faster |
|
602 |
- count.cutoff <- quantile(counts, count.cutoff.quantile) |
|
603 |
- names.count.cutoff <- names(count.cutoff) |
|
604 |
- count.cutoff <- ceiling(count.cutoff) |
|
605 |
- mask <- counts > count.cutoff |
|
606 |
- counts[mask] <- count.cutoff |
|
607 |
- numfiltered <- length(which(mask)) |
|
608 |
- if (numfiltered > 0) { |
|
609 |
- message(paste0("Replaced read counts > ",count.cutoff," (",names.count.cutoff," quantile) by ",count.cutoff," in ",numfiltered," bins. Set option 'count.cutoff.quantile=1' to disable this filtering. This filtering was done to enhance performance.")) |
|
610 |
- } |
|
611 | 597 |
|
612 | 598 |
### Make return object |
613 |
- result <- list() |
|
614 |
- class(result) <- class.bivariate.hmm |
|
615 |
- result$ID <- ID |
|
616 |
- result$bins <- binned.data |
|
599 |
+ result <- list() |
|
600 |
+ class(result) <- class.bivariate.hmm |
|
601 |
+ result$ID <- ID |
|
602 |
+ result$bins <- binned.data |
|
617 | 603 |
## Quality info |
618 |
- result$qualityInfo <- as.list(getQC(binned.data)) |
|
604 |
+ result$qualityInfo <- as.list(getQC(binned.data)) |
|
619 | 605 |
|
620 |
- # Check if there are counts in the data, otherwise HMM will blow up |
|
621 |
- if (!any(counts!=0)) { |
|
622 |
- warlist[[length(warlist)+1]] <- warning(paste0("ID = ",ID,": All counts in data are zero. No HMM done.")) |
|
623 |
- result$warnings <- warlist |
|
624 |
- return(result) |
|
625 |
- } else if (any(counts<0)) { |
|
626 |
- warlist[[length(warlist)+1]] <- warning(paste0("ID = ",ID,": Some counts in data are negative. No HMM done.")) |
|
627 |
- result$warnings <- warlist |
|
628 |
- return(result) |
|
629 |
- } |
|
606 |
+ warlist <- list() |
|
607 |
+ if (is.null(eps.try)) eps.try <- eps |
|
630 | 608 |
|
631 |
- if (init=='initial.params') { |
|
632 |
- uni.transitionProbs <- initial.params$univariateParams$transitionProbs |
|
633 |
- uni.startProbs <- initial.params$univariateParams$startProbs |
|
634 |
- distributions <- initial.params$distributions |
|
635 |
- uni.weights <- initial.params$univariateParams$weights |
|
636 |
- uni.states <- names(uni.weights) |
|
637 |
- num.uni.states <- length(uni.states) |
|
638 |
- num.models <- length(distributions) |
|
639 |
- num.bins <- length(initial.params$bins) |
|
640 |
- comb.states <- factor(names(initial.params$startProbs), levels=names(initial.params$startProbs)) |
|
641 |
- num.comb.states <- length(comb.states) |
|
642 |
- states.list <- list(minus=initial.params$bins$mstate, plus=initial.params$bins$pstate) |
|
643 |
- comb.states.per.bin <- factor(do.call(paste, states.list), levels=levels(comb.states)) |
|
644 |
- A.initial <- initial.params$transitionProbs |
|
645 |
- proba.initial <- initial.params$startProbs |
|
646 |
- use.initial <- TRUE |
|
647 |
- } else { |
|
648 |
- ### Stack the strands and run one univariate findCNVs |
|
649 |
- message("") |
|
650 |
- message(paste(rep('-',getOption('width')), collapse='')) |
|
651 |
- binned.data.minus <- binned.data |
|
652 |
- strand(binned.data.minus) <- '-' |
|
653 |
- binned.data.minus$counts <- counts[,'minus', drop=FALSE] |
|
654 |
- binned.data.plus <- binned.data |
|
655 |
- strand(binned.data.plus) <- '+' |
|
656 |
- binned.data.plus$counts <- counts[,'plus', drop=FALSE] |
|
657 |
- binned.data.stacked <- c(binned.data.minus, binned.data.plus) |
|
658 |
- mask.attributes <- c('qualityInfo', 'ID', 'min.mapq') |
|
659 |
- attributes(binned.data.stacked)[mask.attributes] <- attributes(binned.data)[mask.attributes] |
|
609 |
+ ### Arrays for finding maximum posterior for each bin between offsets |
|
610 |
+ ## Make bins with offset |
|
611 |
+ ptm <- startTimedMessage("Making bins with offsets ...") |
|
612 |
+ if (length(binned.data.list) > 1) { |
|
613 |
+ stepbins <- disjoin(unlist(binned.data.list, use.names=FALSE)) |
|
614 |
+ } else { |
|
615 |
+ stepbins <- binned.data.list[[1]] |
|
616 |
+ mcols(stepbins) <- NULL |
|
617 |
+ } |
|
618 |
+ amaxPosterior.step <- array(0, dim = c(length(stepbins), 2), dimnames = list(bin=NULL, offset=c('previousOffsets', 'currentOffset'))) # to store maximum posterior for current and max-of-previous offsets |
|
619 |
+ astates.step <- array(0, dim = c(length(stepbins), 2), dimnames = list(bin=NULL, offset=c('previousOffsets', 'currentOffset'))) # to store states for current and max-of-previous offsets |
|
620 |
+ stopTimedMessage(ptm) |
|
621 |
+ |
|
622 |
+ ### Loop over offsets ### |
|
623 |
+ for (istep in 1:length(binned.data.list)) { |
|
624 |
+ binned.data <- binned.data.list[[istep]] |
|
625 |
+ num.bins <- length(binned.data) |
|
626 |
+ if (istep > 1) { |
|
627 |
+ ptm.offset <- startTimedMessage("Obtaining states for step = ", istep, " ...") |
|
628 |
+ ## Run only one iteration (no updating) if we are already over istep==1 |
|
629 |
+ initial.params <- result |
|
630 |
+ init <- 'initial.params' |
|
631 |
+ algorithm <- factor('baumWelch', levels=c('baumWelch','viterbi','EM')) |
|
632 |
+ num.trials <- 1 |
|
633 |
+ verbosity <- 0 |
|
634 |
+ } |
|
635 |
+ |
|
636 |
+ ## Get counts |
|
637 |
+ select <- 'counts' |
|
638 |
+ counts <- matrix(c(mcols(binned.data)[,paste0('m',select)], mcols(binned.data)[,paste0('p',select)]), ncol=2, dimnames=list(bin=1:num.bins, strand=c('minus','plus'))) |
|
639 |
+ maxcounts <- max(counts) |
|
640 |
+ |
|
641 |
+ ## Filter high counts out, makes HMM faster |
|
642 |
+ count.cutoff <- quantile(counts, count.cutoff.quantile) |
|
643 |
+ names.count.cutoff <- names(count.cutoff) |
|
644 |
+ count.cutoff <- ceiling(count.cutoff) |
|
645 |
+ mask <- counts > count.cutoff |
|
646 |
+ counts[mask] <- count.cutoff |
|
647 |
+ numfiltered <- length(which(mask)) |
|
648 |
+ if (numfiltered > 0 & istep == 1) { |
|
649 |
+ message(paste0("Replaced read counts > ",count.cutoff," (",names.count.cutoff," quantile) by ",count.cutoff," in ",numfiltered," bins. Set option 'count.cutoff.quantile=1' to disable this filtering. This filtering was done to enhance performance.")) |
|
650 |
+ } |
|
651 |
+ |
|
652 |
+ # Check if there are counts in the data, otherwise HMM will blow up |
|
653 |
+ if (!any(counts!=0)) { |
|
654 |
+ warlist[[length(warlist)+1]] <- warning(paste0("ID = ",ID,": All counts in data are zero. No HMM done.")) |
|
655 |
+ result$warnings <- warlist |
|
656 |
+ return(result) |
|
657 |
+ } else if (any(counts<0)) { |
|
658 |
+ warlist[[length(warlist)+1]] <- warning(paste0("ID = ",ID,": Some counts in data are negative. No HMM done.")) |
|
659 |
+ result$warnings <- warlist |
|
660 |
+ return(result) |
|
661 |
+ } |
|
660 | 662 |
|
661 |
- # if (method == 'HMM') { |
|
663 |
+ if (init=='initial.params') { |
|
664 |
+ uni.transitionProbs <- initial.params$univariateParams$transitionProbs |
|
665 |
+ uni.startProbs <- initial.params$univariateParams$startProbs |
|
666 |
+ distributions <- initial.params$distributions |
|
667 |
+ uni.weights <- initial.params$univariateParams$weights |
|
668 |
+ uni.states <- names(uni.weights) |
|
669 |
+ num.uni.states <- length(uni.states) |
|
670 |
+ num.models <- length(distributions) |
|
671 |
+ comb.states <- factor(names(initial.params$startProbs), levels=names(initial.params$startProbs)) |
|
672 |
+ num.comb.states <- length(comb.states) |
|
673 |
+ states.list <- list(minus=initial.params$bins$mstate, plus=initial.params$bins$pstate) |
|
674 |
+ comb.states.per.bin <- factor(do.call(paste, states.list), levels=levels(comb.states)) |
|
675 |
+ A.initial <- initial.params$transitionProbs |
|
676 |
+ proba.initial <- initial.params$startProbs |
|
677 |
+ use.initial <- TRUE |
|
678 |
+ } else { |
|
679 |
+ ### Stack the strands and run one univariate findCNVs |
|
680 |
+ message("") |
|
681 |
+ message(paste(rep('-',getOption('width')), collapse='')) |
|
682 |
+ binned.data.minus <- binned.data |
|
683 |
+ strand(binned.data.minus) <- '-' |
|
684 |
+ binned.data.minus$counts <- counts[,'minus', drop=FALSE] |
|
685 |
+ binned.data.plus <- binned.data |
|
686 |
+ strand(binned.data.plus) <- '+' |
|
687 |
+ binned.data.plus$counts <- counts[,'plus', drop=FALSE] |
|
688 |
+ binned.data.stacked <- c(binned.data.minus, binned.data.plus) |
|
689 |
+ mask.attributes <- c('qualityInfo', 'ID', 'min.mapq') |
|
690 |
+ attributes(binned.data.stacked)[mask.attributes] <- attributes(binned.data)[mask.attributes] |
|
691 |
+ |
|
662 | 692 |
message("Running univariate HMM") |
663 | 693 |
model.stacked <- univariate.findCNVs(binned.data.stacked, ID, eps=eps, init=init, max.time=max.time, max.iter=max.iter, num.trials=num.trials, eps.try=eps.try, num.threads=num.threads, count.cutoff.quantile=1, states=states, most.frequent.state=most.frequent.state) |
664 | 694 |
if (is.na(model.stacked$convergenceInfo$error)) { |
665 | 695 |
result$warnings <- model.stacked$warnings |
666 | 696 |
return(result) |
667 | 697 |
} |
668 |
-# } else if (method == 'dnacopy') { |
|
669 |
-# message("Running DNAcopy") |
|
670 |
-# model.stacked <- DNAcopy.findCNVs(binned.data.stacked, ID, CNgrid.start=0.5, count.cutoff.quantile=1) |
|
671 |
-# } |
|
672 |
- model.minus <- model.stacked |
|
673 |
- model.minus$bins <- model.minus$bins[strand(model.minus$bins)=='-'] |
|
674 |
- model.minus$segments <- model.minus$segments[strand(model.minus$segments)=='-'] |
|
675 |
- model.plus <- model.stacked |
|
676 |
- model.plus$bins <- model.plus$bins[strand(model.plus$bins)=='+'] |
|
677 |
- model.plus$segments <- model.plus$segments[strand(model.plus$segments)=='+'] |
|
678 |
- |
|
679 |
- models <- list(minus=model.minus, plus=model.plus) |
|
680 |
- |
|
681 |
- ## Extract counts and other stuff |
|
682 |
- uni.transitionProbs <- lapply(models, '[[', 'transitionProbs')[[1]] |
|
683 |
- uni.startProbs <- lapply(models, '[[', 'startProbs')[[1]] |
|
684 |
- distributions <- lapply(models, '[[', 'distributions') |
|
685 |
- uni.weights <- lapply(models, '[[', 'weights')[[1]] |
|
686 |
- uni.states <- names(uni.weights) |
|
687 |
- num.uni.states <- length(uni.states) |
|
688 |
- num.models <- length(distributions) |
|
689 |
- num.bins <- length(models[[1]]$bins) |
|
690 |
- comb.states <- vector() |
|
691 |
- levels.state <- levels(models[[1]]$bins$state) |
|
692 |
- for (i1 in 1:length(levels.state)) { |
|
693 |
- for (i2 in 1:length(levels.state)) { |
|
694 |
- comb.state <- paste(levels.state[i1], levels.state[i2]) |
|
695 |
- comb.states[length(comb.states)+1] <- comb.state |
|
696 |
- } |
|
697 |
- } |
|
698 |
- comb.states <- factor(comb.states, levels=comb.states) |
|
699 |
- num.comb.states <- length(comb.states) |
|
700 |
- states.list <- lapply(models, function(x) { x$bins$state }) |
|
701 |
- comb.states.per.bin <- factor(do.call(paste, states.list), levels=levels(comb.states)) |
|
702 |
- A.initial <- double(length=num.comb.states*num.comb.states) |
|
703 |
- proba.initial <- double(length=num.comb.states) |
|
704 |
- use.initial <- FALSE |
|
705 |
- } |
|
706 |
- |
|
707 |
- ### Prepare the multivariate HMM |
|
708 |
- message("") |
|
709 |
- message(paste(rep('-',getOption('width')), collapse='')) |
|
710 |
- message("Preparing bivariate HMM\n") |
|
711 |
- |
|
712 |
- ## Pre-compute z-values for each number of counts |
|
713 |
- ptm <- startTimedMessage("Computing pre z-matrix...") |
|
714 |
- z.per.count <- array(NA, dim=c(maxcounts+1, num.models, num.uni.states), dimnames=list(counts=0:maxcounts, strand=names(distributions), state=uni.states)) |
|
715 |
- xcounts <- 0:maxcounts |
|
716 |
- for (istrand in 1:num.models) { |
|
717 |
- for (istate in 1:num.uni.states) { |
|
718 |
- if (distributions[[istrand]][istate,'type']=='dnbinom') { |
|
719 |
- size <- distributions[[istrand]][istate,'size'] |
|
720 |
- prob <- distributions[[istrand]][istate,'prob'] |
|
721 |
- u <- stats::pnbinom(xcounts, size, prob) |
|
722 |
- } else if (distributions[[istrand]][istate,'type']=='dbinom') { |
|
723 |
- size <- distributions[[istrand]][istate,'size'] |
|
724 |
- prob <- distributions[[istrand]][istate,'prob'] |
|
725 |
- u <- stats::pbinom(xcounts, size, prob) |
|
726 |
- } else if (distributions[[istrand]][istate,'type']=='delta') { |
|
727 |
- u <- rep(1, length(xcounts)) |
|
728 |
- } else if (distributions[[istrand]][istate,'type']=='dgeom') { |
|
729 |
- prob <- distributions[[istrand]][istate,'prob'] |
|
730 |
- u <- stats::pgeom(xcounts, prob) |
|
731 |
- } |
|
732 |
- qnorm_u <- stats::qnorm(u) |
|
733 |
- mask <- qnorm_u==Inf |
|
734 |
- qnorm_u[mask] <- stats::qnorm(1-1e-16) |
|
735 |
- z.per.count[ , istrand, istate] <- qnorm_u |
|
736 |
- } |
|
737 |
- } |
|
738 |
- stopTimedMessage(ptm) |
|
739 |
- |
|
740 |
- ## Compute the z matrix |
|
741 |
- ptm <- startTimedMessage("Transfering values into z-matrix...") |
|
742 |
- z.per.bin = array(NA, dim=c(num.bins, num.models, num.uni.states), dimnames=list(bin=1:num.bins, strand=names(distributions), state=uni.states)) |
|
743 |
- for (istrand in 1:num.models) { |
|
744 |
- for (istate in 1:num.uni.states) { |
|
745 |
- z.per.bin[ , istrand, istate] = z.per.count[counts[,istrand]+1, istrand, istate] |
|
698 |
+ model.minus <- model.stacked |
|
699 |
+ model.minus$bins <- model.minus$bins[strand(model.minus$bins)=='-'] |
|
700 |
+ model.minus$segments <- model.minus$segments[strand(model.minus$segments)=='-'] |
|
701 |
+ model.plus <- model.stacked |
|
702 |
+ model.plus$bins <- model.plus$bins[strand(model.plus$bins)=='+'] |
|
703 |
+ model.plus$segments <- model.plus$segments[strand(model.plus$segments)=='+'] |
|
704 |
+ |
|
705 |
+ models <- list(minus=model.minus, plus=model.plus) |
|
706 |
+ |
|
707 |
+ ## Extract counts and other stuff |
|
708 |
+ uni.transitionProbs <- lapply(models, '[[', 'transitionProbs')[[1]] |
|
709 |
+ uni.startProbs <- lapply(models, '[[', 'startProbs')[[1]] |
|
710 |
+ distributions <- lapply(models, '[[', 'distributions') |
|
711 |
+ uni.weights <- lapply(models, '[[', 'weights')[[1]] |
|
712 |
+ uni.states <- names(uni.weights) |
|
713 |
+ num.uni.states <- length(uni.states) |
|
714 |
+ num.models <- length(distributions) |
|
715 |
+ num.bins <- length(models[[1]]$bins) |
|
716 |
+ comb.states <- vector() |
|
717 |
+ levels.state <- levels(models[[1]]$bins$state) |
|
718 |
+ for (i1 in 1:length(levels.state)) { |
|
719 |
+ for (i2 in 1:length(levels.state)) { |
|
720 |
+ comb.state <- paste(levels.state[i1], levels.state[i2]) |
|
721 |
+ comb.states[length(comb.states)+1] <- comb.state |
|
722 |
+ } |
|
723 |
+ } |
|
724 |
+ comb.states <- factor(comb.states, levels=comb.states) |
|
725 |
+ num.comb.states <- length(comb.states) |
|
726 |
+ states.list <- lapply(models, function(x) { x$bins$state }) |
|
727 |
+ comb.states.per.bin <- factor(do.call(paste, states.list), levels=levels(comb.states)) |
|
728 |
+ A.initial <- double(length=num.comb.states*num.comb.states) |
|
729 |
+ proba.initial <- double(length=num.comb.states) |
|
730 |
+ use.initial <- FALSE |
|
731 |
+ } |
|
732 |
+ |
|
733 |
+ ### Prepare the multivariate HMM |
|
734 |
+ if (verbosity >= 1 & istep == 1) { |
|
735 |
+ message("") |
|
736 |
+ message(paste(rep('-',getOption('width')), collapse='')) |
|
737 |
+ message("Preparing bivariate HMM\n") |
|
738 |
+ } |
|
739 |
+ |
|
740 |
+ ## Pre-compute z-values for each number of counts |
|
741 |
+ if (verbosity >=1) { |
|
742 |
+ ptm <- startTimedMessage("Computing pre z-matrix...") |
|
746 | 743 |
} |
747 |
- } |
|
748 |
- remove(z.per.count) |
|
749 |
- stopTimedMessage(ptm) |
|
750 |
- |
|
751 |
- ## Calculate correlation matrix |
|
752 |
- ptm <- startTimedMessage("Computing inverse of correlation matrix...") |
|
753 |
- correlationMatrix <- array(0, dim=c(num.models,num.models,num.comb.states), dimnames=list(strand=names(distributions), strand=names(distributions), comb.state=comb.states)) |
|
754 |
- correlationMatrixInverse <- correlationMatrix |
|
755 |
- determinant <- rep(0, num.comb.states) |
|
756 |
- names(determinant) <- comb.states |
|
757 |
- usestateTF <- rep(NA, num.comb.states) # TRUE, FALSE vector for usable states |
|
758 |
- names(usestateTF) <- comb.states |
|
759 |
- for (comb.state in comb.states) { |
|
760 |
- state <- strsplit(comb.state, ' ')[[1]] |
|
761 |
- mask <- which(comb.states.per.bin==comb.state) |
|
762 |
- # Subselect z |
|
763 |
- z.temp <- matrix(NA, ncol=num.models, nrow=length(mask)) |
|
764 |
- for (istrand in 1:num.models) { |
|
765 |
- z.temp[,istrand] <- z.per.bin[mask, istrand, state[istrand]] |
|
744 |
+ z.per.count <- array(NA, dim=c(maxcounts+1, num.models, num.uni.states), dimnames=list(counts=0:maxcounts, strand=names(distributions), state=uni.states)) |
|
745 |
+ xcounts <- 0:maxcounts |
|
746 |
+ for (istrand in 1:num.models) { |
|
747 |
+ for (istate in 1:num.uni.states) { |
|
748 |
+ if (distributions[[istrand]][istate,'type']=='dnbinom') { |
|
749 |
+ size <- distributions[[istrand]][istate,'size'] |
|
750 |
+ prob <- distributions[[istrand]][istate,'prob'] |
|
751 |
+ u <- stats::pnbinom(xcounts, size, prob) |
|
752 |
+ } else if (distributions[[istrand]][istate,'type']=='dbinom') { |
|
753 |
+ size <- distributions[[istrand]][istate,'size'] |
|
754 |
+ prob <- distributions[[istrand]][istate,'prob'] |
|
755 |
+ u <- stats::pbinom(xcounts, size, prob) |
|
756 |
+ } else if (distributions[[istrand]][istate,'type']=='delta') { |
|
757 |
+ u <- rep(1, length(xcounts)) |
|
758 |
+ } else if (distributions[[istrand]][istate,'type']=='dgeom') { |
|
759 |
+ prob <- distributions[[istrand]][istate,'prob'] |
|
760 |
+ u <- stats::pgeom(xcounts, prob) |
|
761 |
+ } |
|
762 |
+ qnorm_u <- stats::qnorm(u) |
|
763 |
+ mask <- qnorm_u==Inf |
|
764 |
+ qnorm_u[mask] <- stats::qnorm(1-1e-16) |
|
765 |
+ z.per.count[ , istrand, istate] <- qnorm_u |
|
766 |
+ } |
|
767 |
+ } |
|
768 |
+ if (verbosity >=1) { |
|
769 |
+ stopTimedMessage(ptm) |
|
766 | 770 |
} |
767 |
- temp <- tryCatch({ |
|
768 |
- if (nrow(z.temp) > 1) { |
|
769 |
- correlationMatrix[,,comb.state] <- cor(z.temp) |
|
770 |
- determinant[comb.state] <- det( correlationMatrix[,,comb.state] ) |
|
771 |
- correlationMatrixInverse[,,comb.state] <- solve(correlationMatrix[,,comb.state]) |
|
772 |
- usestateTF[comb.state] <- TRUE |
|
773 |
- } else { |
|
774 |
- correlationMatrix[,,comb.state] <- diag(num.models) |
|
775 |
- determinant[comb.state] <- det( correlationMatrix[,,comb.state] ) |
|
776 |
- correlationMatrixInverse[,,comb.state] <- solve(correlationMatrix[,,comb.state]) |
|
777 |
- usestateTF[comb.state] <- TRUE |
|
778 |
- } |
|
779 |
- 0 |
|
780 |
- }, warning = function(war) { |
|
781 |
- 1 |
|
782 |
- }, error = function(err) { |
|
783 |
- 1 |
|
784 |
- }) |
|
785 |
- if (temp!=0) { |
|
786 |
- correlationMatrix[,,comb.state] <- diag(num.models) |
|
787 |
- determinant[comb.state] <- det( correlationMatrix[,,comb.state] ) |
|
788 |
- correlationMatrixInverse[,,comb.state] <- solve(correlationMatrix[,,comb.state]) |
|
789 |
- usestateTF[comb.state] <- TRUE |
|
771 |
+ |
|
772 |
+ ## Compute the z matrix |
|
773 |
+ if (verbosity >=1) { |
|
774 |
+ ptm <- startTimedMessage("Transfering values into z-matrix...") |
|
790 | 775 |
} |
791 |
- } |
|
792 |
- stopTimedMessage(ptm) |
|
793 |
- |
|
794 |
- # Use only states with valid correlationMatrixInverse (all states are valid in this version) |
|
795 |
- correlationMatrix = correlationMatrix[,,usestateTF] |
|
796 |
- correlationMatrixInverse = correlationMatrixInverse[,,usestateTF] |
|
797 |
- comb.states = comb.states[usestateTF] |
|
798 |
- comb.states <- droplevels(comb.states) |
|
799 |
- determinant = determinant[usestateTF] |
|
800 |
- num.comb.states <- length(comb.states) |
|
801 |
- |
|
802 |
- ### Calculate multivariate densities for each state ### |
|
803 |
- ptm <- startTimedMessage("Calculating multivariate densities...") |
|
804 |
- densities <- matrix(1, ncol=num.comb.states, nrow=num.bins, dimnames=list(bin=1:num.bins, comb.state=comb.states)) |
|
805 |
- for (comb.state in comb.states) { |
|
806 |
- istate <- which(comb.state==comb.states) |
|
807 |
- state <- strsplit(comb.state, ' ')[[1]] |
|
808 |
- z.temp <- matrix(NA, ncol=num.models, nrow=num.bins) |
|
809 |
- product <- 1 |
|
810 |
- for (istrand in 1:num.models) { |
|
811 |
- z.temp[,istrand] <- z.per.bin[, istrand, state[istrand]] |
|
812 |
- if (distributions[[istrand]][state[istrand],'type'] == 'dnbinom') { |
|
813 |
- size <- distributions[[istrand]][state[istrand],'size'] |
|
814 |
- prob <- distributions[[istrand]][state[istrand],'prob'] |
|
815 |
- product <- product * stats::dnbinom(counts[,istrand], size, prob) |
|
816 |
- } else if (distributions[[istrand]][state[istrand],'type'] == 'dgeom') { |
|
817 |
- prob <- distributions[[istrand]][state[istrand],'prob'] |
|
818 |
- product <- product * stats::dgeom(counts[,istrand], prob) |
|
819 |
- } else if (distributions[[istrand]][state[istrand],'type'] == 'delta') { |
|
820 |
- product <- product * ifelse(counts[,istrand]==0, 1, 0) |
|
821 |
- } |
|
776 |
+ z.per.bin = array(NA, dim=c(num.bins, num.models, num.uni.states), dimnames=list(bin=1:num.bins, strand=names(distributions), state=uni.states)) |
|
777 |
+ for (istrand in 1:num.models) { |
|
778 |
+ for (istate in 1:num.uni.states) { |
|
779 |
+ z.per.bin[ , istrand, istate] = z.per.count[counts[,istrand]+1, istrand, istate] |
|
780 |
+ } |
|
781 |
+ } |
|
782 |
+ remove(z.per.count) |
|
783 |
+ if (verbosity >=1) { |
|
784 |
+ stopTimedMessage(ptm) |
|
822 | 785 |
} |
823 |
- exponent <- -0.5 * apply( ( z.temp %*% (correlationMatrixInverse[ , , istate] - diag(num.models)) ) * z.temp, 1, sum) |
|
824 |
- exponent[is.nan(exponent)] <- 0 |
|
825 |
- densities[,istate] <- product * determinant[istate]^(-0.5) * exp( exponent ) |
|
826 |
- } |
|
827 |
- # Check if densities are > 1 |
|
828 |
-# if (any(densities>1)) stop("Densities > 1") |
|
829 |
-# if (any(densities<0)) stop("Densities < 0") |
|
830 |
- densities[densities>1] <- 1 |
|
831 |
- densities[densities<0] <- 0 |
|
832 |
- # Check if densities are 0 everywhere in some bins |
|
833 |
- check <- which(apply(densities, 1, sum) == 0) |
|
834 |
- if (length(check)>0) { |
|
835 |
- if (check[1]==1) { |
|
836 |
- densities[1,] <- rep(1e-10, ncol(densities)) |
|
837 |
- check <- check[-1] |
|
786 |
+ |
|
787 |
+ ## Calculate correlation matrix |
|
788 |
+ if (verbosity >=1) { |
|
789 |
+ ptm <- startTimedMessage("Computing inverse of correlation matrix...") |
|
838 | 790 |
} |
839 |
- for (icheck in check) { |
|
840 |
- densities[icheck,] <- densities[icheck-1,] |
|
791 |
+ correlationMatrix <- array(0, dim=c(num.models,num.models,num.comb.states), dimnames=list(strand=names(distributions), strand=names(distributions), comb.state=comb.states)) |
|
792 |
+ correlationMatrixInverse <- correlationMatrix |
|
793 |
+ determinant <- rep(0, num.comb.states) |
|
794 |
+ names(determinant) <- comb.states |
|
795 |
+ usestateTF <- rep(NA, num.comb.states) # TRUE, FALSE vector for usable states |
|
796 |
+ names(usestateTF) <- comb.states |
|
797 |
+ for (comb.state in comb.states) { |
|
798 |
+ state <- strsplit(comb.state, ' ')[[1]] |
|
799 |
+ mask <- which(comb.states.per.bin==comb.state) |
|
800 |
+ # Subselect z |
|
801 |
+ z.temp <- matrix(NA, ncol=num.models, nrow=length(mask)) |
|
802 |
+ for (istrand in 1:num.models) { |
|
803 |
+ z.temp[,istrand] <- z.per.bin[mask, istrand, state[istrand]] |
|
804 |
+ } |
|
805 |
+ temp <- tryCatch({ |
|
806 |
+ if (nrow(z.temp) > 1) { |
|
807 |
+ correlationMatrix[,,comb.state] <- cor(z.temp) |
|
808 |
+ determinant[comb.state] <- det( correlationMatrix[,,comb.state] ) |
|
809 |
+ correlationMatrixInverse[,,comb.state] <- solve(correlationMatrix[,,comb.state]) |
|
810 |
+ usestateTF[comb.state] <- TRUE |
|
811 |
+ } else { |
|
812 |
+ correlationMatrix[,,comb.state] <- diag(num.models) |
|
813 |
+ determinant[comb.state] <- det( correlationMatrix[,,comb.state] ) |
|
814 |
+ correlationMatrixInverse[,,comb.state] <- solve(correlationMatrix[,,comb.state]) |
|
815 |
+ usestateTF[comb.state] <- TRUE |
|
816 |
+ } |
|
817 |
+ 0 |
|
818 |
+ }, warning = function(war) { |
|
819 |
+ 1 |
|
820 |
+ }, error = function(err) { |
|
821 |
+ 1 |
|
822 |
+ }) |
|
823 |
+ if (temp!=0) { |
|
824 |
+ correlationMatrix[,,comb.state] <- diag(num.models) |
|
825 |
+ determinant[comb.state] <- det( correlationMatrix[,,comb.state] ) |
|
826 |
+ correlationMatrixInverse[,,comb.state] <- solve(correlationMatrix[,,comb.state]) |
|
827 |
+ usestateTF[comb.state] <- TRUE |
|
828 |
+ } |
|
829 |
+ } |
|
830 |
+ if (verbosity >=1) { |
|
831 |
+ stopTimedMessage(ptm) |
|
841 | 832 |
} |
842 |
- } |
|
843 |
- stopTimedMessage(ptm) |
|
844 |
- |
|
845 |
- ### Define cleanup behaviour ### |
|
846 |
- on.exit(.C("C_multivariate_cleanup", as.integer(num.comb.states), PACKAGE = 'AneuFinder')) |
|
847 |
- |
|
848 |
- ### Run the multivariate HMM |
|
849 |
- # Call the C function |
|
850 |
- hmm <- .C("C_multivariate_hmm", |
|
851 |
- densities = as.double(densities), # double* D |
|
852 |
- num.bins = as.integer(num.bins), # int* T |
|
853 |
- num.comb.states = as.integer(num.comb.states), # int* N |
|
854 |
- num.strands = as.integer(num.models), # int* Nmod |
|
855 |
- comb.states = as.integer(comb.states), # int* comb_states |
|
856 |
- num.iterations = as.integer(max.iter), # int* maxiter |
|
857 |
- time.sec = as.integer(max.time), # double* maxtime |
|
858 |
- loglik.delta = as.double(eps), # double* eps |
|
859 |
- states = integer(length=num.bins), # int* states |
|
860 |
- A = double(length=num.comb.states*num.comb.states), # double* A |
|
861 |
- proba = double(length=num.comb.states), # double* proba |
|
862 |
- loglik = double(length=1), # double* loglik |
|
863 |
- A.initial = as.vector(A.initial), # double* initial_A |
|
864 |
- proba.initial = as.vector(proba.initial), # double* initial_proba |
|
865 |
- use.initial.params = as.logical(use.initial), # bool* use_initial_params |
|
866 |
- num.threads = as.integer(num.threads), # int* num_threads |
|
867 |
- error = as.integer(0), # error handling |
|
868 |
- algorithm = as.integer(algorithm), # int* algorithm |
|
869 |
- verbosity = as.integer(verbosity), # int* verbosity |
|
870 |
- PACKAGE = 'AneuFinder' |
|
871 |
- ) |
|
872 |
- |
|
873 |
- ### Check convergence ### |
|
874 |
- war <- NULL |
|
875 |
- if (hmm$loglik.delta > eps) { |
|
876 |
- war <- warning(paste0("ID = ",ID,": HMM did not converge!\n")) |
|
877 |
- } |
|
878 |
- if (hmm$error == 1) { |
|
879 |
- warning(paste0("ID = ",ID,": A NaN occurred during the Baum-Welch! Parameter estimation terminated prematurely. Check your library! The following factors are known to cause this error: 1) Your read counts contain very high numbers. Try again with a lower value for 'count.cutoff.quantile'. 2) Your library contains too few reads in each bin. 3) Your library contains reads for a different genome than it was aligned to.")) |
|
880 |
- } else if (hmm$error == 2) { |
|
881 |
- warning(paste0("ID = ",ID,": An error occurred during the Baum-Welch! Parameter estimation terminated prematurely. Check your library")) |
|
882 |
- } |
|
883 |
- |
|
884 |
- ### Make return object ### |
|
885 |
- if (hmm$error == 0) { |
|
886 |
- result <- list() |
|
887 |
- class(result) <- class.bivariate.hmm |
|
888 |
- result$ID <- ID |
|
889 |
- ## Bin coordinates and states |
|
890 |
- result$bins <- binned.data |
|
891 |
- result$bins$state <- comb.states[hmm$states] |
|
892 |
- # Get states as factors in data.frame |
|
893 |
- matrix.states <- matrix(unlist(strsplit(as.character(result$bins$state), split=' ')), byrow=TRUE, ncol=num.models, dimnames=list(bin=1:num.bins, strand=names(distributions))) |
|
894 |
- names <- c('mstate','pstate') |
|
895 |
- for (i1 in 1:num.models) { |
|
896 |
- mcols(result$bins)[names[i1]] <- factor(matrix.states[,i1], levels=uni.states) |
|
833 |
+ |
|
834 |
+ # Use only states with valid correlationMatrixInverse (all states are valid in this version) |
|
835 |
+ correlationMatrix = correlationMatrix[,,usestateTF] |
|
836 |
+ correlationMatrixInverse = correlationMatrixInverse[,,usestateTF] |
|
837 |
+ comb.states = comb.states[usestateTF] |
|
838 |
+ comb.states <- droplevels(comb.states) |
|
839 |
+ determinant = determinant[usestateTF] |
|
840 |
+ num.comb.states <- length(comb.states) |
|
841 |
+ |
|
842 |
+ ### Calculate multivariate densities for each state ### |
|
843 |
+ if (verbosity >=1) { |
|
844 |
+ ptm <- startTimedMessage("Calculating multivariate densities...") |
|
897 | 845 |
} |
898 |
- ## Segmentation |
|
899 |
- ptm <- startTimedMessage("Making segmentation ...") |
|
900 |
- suppressMessages( |
|
901 |
- result$segments <- as(collapseBins(as.data.frame(result$bins), column2collapseBy='state', columns2drop='width', columns2average=c('counts','mcounts','pcounts')), 'GRanges') |
|
902 |
- ) |
|
903 |
- seqlevels(result$segments) <- seqlevels(result$bins) # correct order from as() |
|
904 |
- seqlengths(result$segments) <- seqlengths(result$bins)[names(seqlengths(result$segments))] |
|
905 |
- stopTimedMessage(ptm) |
|
906 |
- ## CNV state for both strands combined |
|
907 |
- getnumbers <- function(x) { |
|
908 |
- x <- sub("zero-inflation", "0-somy", x) |
|
909 |
- x <- as.numeric(sub("-somy", "", x)) |
|
846 |
+ densities <- matrix(1, ncol=num.comb.states, nrow=num.bins, dimnames=list(bin=1:num.bins, comb.state=comb.states)) |
|
847 |
+ for (comb.state in comb.states) { |
|
848 |
+ istate <- which(comb.state==comb.states) |
|
849 |
+ state <- strsplit(comb.state, ' ')[[1]] |
|
850 |
+ z.temp <- matrix(NA, ncol=num.models, nrow=num.bins) |
|
851 |
+ product <- 1 |
|
852 |
+ for (istrand in 1:num.models) { |
|
853 |
+ z.temp[,istrand] <- z.per.bin[, istrand, state[istrand]] |
|
854 |
+ if (distributions[[istrand]][state[istrand],'type'] == 'dnbinom') { |
|
855 |
+ size <- distributions[[istrand]][state[istrand],'size'] |
|
856 |
+ prob <- distributions[[istrand]][state[istrand],'prob'] |
|
857 |
+ product <- product * stats::dnbinom(counts[,istrand], size, prob) |
|
858 |
+ } else if (distributions[[istrand]][state[istrand],'type'] == 'dgeom') { |
|
859 |
+ prob <- distributions[[istrand]][state[istrand],'prob'] |
|
860 |
+ product <- product * stats::dgeom(counts[,istrand], prob) |
|
861 |
+ } else if (distributions[[istrand]][state[istrand],'type'] == 'delta') { |
|
862 |
+ product <- product * ifelse(counts[,istrand]==0, 1, 0) |
|
863 |
+ } |
|
864 |
+ } |
|
865 |
+ exponent <- -0.5 * apply( ( z.temp %*% (correlationMatrixInverse[ , , istate] - diag(num.models)) ) * z.temp, 1, sum) |
|
866 |
+ exponent[is.nan(exponent)] <- 0 |
|
867 |
+ densities[,istate] <- product * determinant[istate]^(-0.5) * exp( exponent ) |
|
868 |
+ } |
|
869 |
+ # Check if densities are > 1 |
|
870 |
+ # if (any(densities>1)) stop("Densities > 1") |
|
871 |
+ # if (any(densities<0)) stop("Densities < 0") |
|
872 |
+ densities[densities>1] <- 1 |
|
873 |
+ densities[densities<0] <- 0 |
|
874 |
+ # Check if densities are 0 everywhere in some bins |
|
875 |
+ check <- which(apply(densities, 1, sum) == 0) |
|
876 |
+ if (length(check)>0) { |
|
877 |
+ if (check[1]==1) { |
|
878 |
+ densities[1,] <- rep(1e-10, ncol(densities)) |
|
879 |
+ check <- check[-1] |
|
880 |
+ } |
|
881 |
+ for (icheck in check) { |
|
882 |
+ densities[icheck,] <- densities[icheck-1,] |
|
883 |
+ } |
|
884 |
+ } |
|
885 |
+ if (verbosity >=1) { |
|
886 |
+ stopTimedMessage(ptm) |
|
910 | 887 |
} |
911 |
- inistates <- suppressWarnings( initializeStates(unique(c(states, paste0(sort(unique(getnumbers(result$bins$mstate) + getnumbers(result$bins$pstate))),"-somy")))) ) |
|
912 |
- multiplicity <- inistates$multiplicity |
|
913 |
- state.labels <- inistates$states |
|
914 |
- # Bins |
|
915 |
- state <- multiplicity[as.character(result$bins$mstate)] + multiplicity[as.character(result$bins$pstate)] |
|
916 |
- # state[state>max(multiplicity)] <- max(multiplicity) |
|
917 |
- multiplicity.inverse <- names(multiplicity) |
|
918 |
- names(multiplicity.inverse) <- multiplicity |
|
919 |
- state <- multiplicity.inverse[as.character(state)] |
|
920 |
- state[(result$bins$mstate=='0-somy' | result$bins$pstate=='0-somy') & state=='zero-inflation'] <- '0-somy' |
|
921 |
- result$bins$state <- factor(state, levels=names(multiplicity)) |
|
922 |
- result$bins$copy.number <- multiplicity[as.character(result$bins$state)] |
|
923 |
- result$bins$mcopy.number <- multiplicity[as.character(result$bins$mstate)] |
|
924 |
- result$bins$pcopy.number <- multiplicity[as.character(result$bins$pstate)] |
|
925 |
- # Segments |
|
926 |
- str <- strsplit(as.character(result$segments$state),' ') |
|
927 |
- result$segments$mstate <- factor(unlist(lapply(str, '[[', 1)), levels=state.labels) |
|
928 |
- result$segments$pstate <- factor(unlist(lapply(str, '[[', 2)), levels=state.labels) |
|
929 |
- state <- multiplicity[result$segments$mstate] + multiplicity[result$segments$pstate] |
|
930 |
- state[state>max(multiplicity)] <- max(multiplicity) |
|
931 |
- multiplicity.inverse <- names(multiplicity) |
|
932 |
- names(multiplicity.inverse) <- multiplicity |
|
933 |
- state <- multiplicity.inverse[as.character(state)] |
|
934 |
- state[(result$segments$mstate=='0-somy' | result$segments$pstate=='0-somy') & state=='zero-inflation'] <- '0-somy' |
|
935 |
- result$segments$state <- factor(state, levels=names(multiplicity)) |
|
936 |
- result$segments$copy.number <- multiplicity[as.character(result$segments$state)] |
|
937 |
- result$segments$mcopy.number <- multiplicity[as.character(result$segments$mstate)] |
|
938 |
- result$segments$pcopy.number <- multiplicity[as.character(result$segments$pstate)] |
|
939 |
- ## Parameters |
|
940 |
- # Weights |
|
941 |
- tstates <- table(result$bins$state) |
|
942 |
- result$weights <- tstates/sum(tstates) |
|
943 |
- # Transition matrices |
|
944 |
- result$transitionProbs <- matrix(hmm$A, ncol=num.comb.states) |
|
945 |
- colnames(result$transitionProbs) <- comb.states |
|
946 |
- rownames(result$transitionProbs) <- comb.states |
|
947 |
- result$transitionProbs.initial <- matrix(hmm$A.initial, ncol=num.comb.states) |
|
948 |
- colnames(result$transitionProbs.initial) <- comb.states |
|
949 |
- rownames(result$transitionProbs.initial) <- comb.states |
|
950 |
- # Initial probs |
|
951 |
- result$startProbs <- hmm$proba |
|
952 |
- names(result$startProbs) <- comb.states |
|
953 |
- result$startProbs.initial <- hmm$proba.initial |
|
954 |
- names(result$startProbs.initial) <- comb.states |
|
955 |
- # Distributions |
|
956 |
- result$distributions <- distributions |
|
957 |
- ## Convergence info |
|
958 |
- convergenceInfo <- list(eps=eps, loglik=hmm$loglik, loglik.delta=hmm$loglik.delta, num.iterations=hmm$num.iterations, time.sec=hmm$time.sec) |
|
959 |
- result$convergenceInfo <- convergenceInfo |
|
960 |
- ## Quality info |
|
961 |
- result$qualityInfo <- as.list(getQC(result)) |
|
962 |
- ## Univariate infos |
|
963 |
- univariateParams <- list(transitionProbs=uni.transitionProbs, startProbs=uni.startProbs, distributions=distributions[[1]], weights=uni.weights) |
|
964 |
- result$univariateParams <- univariateParams |
|
965 |
- } else if (hmm$error == 1) { |
|
966 |
- warlist[[length(warlist)+1]] <- warning(paste0("ID = ",ID,": A NaN occurred during the Baum-Welch! Parameter estimation terminated prematurely. Check your library! The following factors are known to cause this error: 1) Your read counts contain very high numbers. Try again with a lower value for 'count.cutoff.quantile'. 2) Your library contains too few reads in each bin. 3) Your library contains reads for a different genome than it was aligned to.")) |
|
967 |
- } else if (hmm$error == 2) { |
|
968 |
- warlist[[length(warlist)+1]] <- warning(paste0("ID = ",ID,": An error occurred during the Baum-Welch! Parameter estimation terminated prematurely. Check your library")) |
|
969 |
- } |
|
970 |
- |
|
971 |
- ## Issue warnings |
|
972 |
- result$warnings <- warlist |
|
888 |
+ |
|
889 |
+ ### Define cleanup behaviour ### |
|
890 |
+ on.exit(.C("C_multivariate_cleanup", as.integer(num.comb.states), PACKAGE = 'AneuFinder')) |
|
891 |
+ |
|
892 |
+ ### Run the multivariate HMM |
|
893 |
+ # Call the C function |
|
894 |
+ hmm <- .C("C_multivariate_hmm", |
|
895 |
+ densities = as.double(densities), # double* D |
|
896 |
+ num.bins = as.integer(num.bins), # int* T |
|
897 |
+ num.comb.states = as.integer(num.comb.states), # int* N |
|
898 |
+ num.strands = as.integer(num.models), # int* Nmod |
|
899 |
+ comb.states = as.integer(comb.states), # int* comb_states |
|
900 |
+ num.iterations = as.integer(max.iter), # int* maxiter |
|
901 |
+ time.sec = as.integer(max.time), # double* maxtime |
|
902 |
+ loglik.delta = as.double(eps), # double* eps |
|
903 |
+ maxPosterior = double(length=num.bins), # double* maxPosterior |
|
904 |
+ states = integer(length=num.bins), # int* states |
|
905 |
+ A = double(length=num.comb.states*num.comb.states), # double* A |
|
906 |
+ proba = double(length=num.comb.states), # double* proba |
|
907 |
+ loglik = double(length=1), # double* loglik |
|
908 |
+ A.initial = as.vector(A.initial), # double* initial_A |
|
909 |
+ proba.initial = as.vector(proba.initial), # double* initial_proba |
|
910 |
+ use.initial.params = as.logical(use.initial), # bool* use_initial_params |
|
911 |
+ num.threads = as.integer(num.threads), # int* num_threads |
|
912 |
+ error = as.integer(0), # error handling |
|
913 |
+ algorithm = as.integer(algorithm), # int* algorithm |
|
914 |
+ verbosity = as.integer(verbosity), # int* verbosity |
|
915 |
+ PACKAGE = 'AneuFinder' |
|
916 |
+ ) |
|
917 |
+ |
|
918 |
+ ### Check convergence ### |
|
919 |
+ war <- NULL |
|
920 |
+ if (hmm$loglik.delta > eps) { |
|
921 |
+ war <- warning(paste0("ID = ",ID,": HMM did not converge!\n")) |
|
922 |
+ } |
|
923 |
+ if (hmm$error == 1) { |
|
924 |
+ warning(paste0("ID = ",ID,": A NaN occurred during the Baum-Welch! Parameter estimation terminated prematurely. Check your library! The following factors are known to cause this error: 1) Your read counts contain very high numbers. Try again with a lower value for 'count.cutoff.quantile'. 2) Your library contains too few reads in each bin. 3) Your library contains reads for a different genome than it was aligned to.")) |
|
925 |
+ } else if (hmm$error == 2) { |
|
926 |
+ warning(paste0("ID = ",ID,": An error occurred during the Baum-Welch! Parameter estimation terminated prematurely. Check your library")) |
|
927 |
+ } |
|
973 | 928 |
|
929 |
+ if (istep == 1) { |
|
930 |
+ ### Make return object ### |
|
931 |
+ if (hmm$error == 0) { |
|
932 |
+ result <- list() |
|
933 |
+ class(result) <- class.bivariate.hmm |
|
934 |
+ result$ID <- ID |
|
935 |
+ ## Bin coordinates and states |
|
936 |
+ result$bins <- binned.data |
|
937 |
+ result$bins$state <- comb.states[hmm$states] |
|
938 |
+ # Get states as factors in data.frame |
|
939 |
+ matrix.states <- matrix(unlist(strsplit(as.character(result$bins$state), split=' ')), byrow=TRUE, ncol=num.models, dimnames=list(bin=1:num.bins, strand=names(distributions))) |
|
940 |
+ names <- c('mstate','pstate') |
|
941 |
+ for (i1 in 1:num.models) { |
|
942 |
+ mcols(result$bins)[names[i1]] <- factor(matrix.states[,i1], levels=uni.states) |
|
943 |
+ } |
|
944 |
+ ## Segmentation |
|
945 |
+ ptm <- startTimedMessage("Making segmentation ...") |
|
946 |
+ suppressMessages( |
|
947 |
+ result$segments <- as(collapseBins(as.data.frame(result$bins), column2collapseBy='state', columns2drop='width', columns2average=c('counts','mcounts','pcounts')), 'GRanges') |
|
948 |
+ ) |
|
949 |
+ seqlevels(result$segments) <- seqlevels(result$bins) # correct order from as() |
|
950 |
+ seqlengths(result$segments) <- seqlengths(result$bins)[names(seqlengths(result$segments))] |
|
951 |
+ stopTimedMessage(ptm) |
|
952 |
+ ## CNV state for both strands combined |
|
953 |
+ getnumbers <- function(x) { |
|
954 |
+ x <- sub("zero-inflation", "0-somy", x) |
|
955 |
+ x <- as.numeric(sub("-somy", "", x)) |
|
956 |
+ } |
|
957 |
+ inistates <- suppressWarnings( initializeStates(unique(c(states, paste0(sort(unique(getnumbers(result$bins$mstate) + getnumbers(result$bins$pstate))),"-somy")))) ) |
|
958 |
+ multiplicity <- inistates$multiplicity |
|
959 |
+ state.labels <- inistates$states |
|
960 |
+ # Bins |
|
961 |
+ state <- multiplicity[as.character(result$bins$mstate)] + multiplicity[as.character(result$bins$pstate)] |
|
962 |
+ # state[state>max(multiplicity)] <- max(multiplicity) |
|
963 |
+ multiplicity.inverse <- names(multiplicity) |
|
964 |
+ names(multiplicity.inverse) <- multiplicity |
|
965 |
+ state <- multiplicity.inverse[as.character(state)] |
|
966 |
+ state[(result$bins$mstate=='0-somy' | result$bins$pstate=='0-somy') & state=='zero-inflation'] <- '0-somy' |
|
967 |
+ result$bins$state <- factor(state, levels=names(multiplicity)) |
|
968 |
+ result$bins$copy.number <- multiplicity[as.character(result$bins$state)] |
|
969 |
+ result$bins$mcopy.number <- multiplicity[as.character(result$bins$mstate)] |
|
970 |
+ result$bins$pcopy.number <- multiplicity[as.character(result$bins$pstate)] |
|
971 |
+ # Segments |
|
972 |
+ str <- strsplit(as.character(result$segments$state),' ') |
|
973 |
+ result$segments$mstate <- factor(unlist(lapply(str, '[[', 1)), levels=state.labels) |
|
974 |
+ result$segments$pstate <- factor(unlist(lapply(str, '[[', 2)), levels=state.labels) |
|
975 |
+ state <- multiplicity[result$segments$mstate] + multiplicity[result$segments$pstate] |
|
976 |
+ state[state>max(multiplicity)] <- max(multiplicity) |
|
977 |
+ multiplicity.inverse <- names(multiplicity) |
|
978 |
+ names(multiplicity.inverse) <- multiplicity |
|
979 |
+ state <- multiplicity.inverse[as.character(state)] |
|
980 |
+ state[(result$segments$mstate=='0-somy' | result$segments$pstate=='0-somy') & state=='zero-inflation'] <- '0-somy' |
|
981 |
+ result$segments$state <- factor(state, levels=names(multiplicity)) |
|
982 |
+ result$segments$copy.number <- multiplicity[as.character(result$segments$state)] |
|
983 |
+ result$segments$mcopy.number <- multiplicity[as.character(result$segments$mstate)] |
|
984 |
+ result$segments$pcopy.number <- multiplicity[as.character(result$segments$pstate)] |
|
985 |
+ ## Parameters |
|
986 |
+ # Weights |
|
987 |
+ tstates <- table(result$bins$state) |
|
988 |
+ result$weights <- tstates/sum(tstates) |
|
989 |
+ # Transition matrices |
|
990 |
+ result$transitionProbs <- matrix(hmm$A, ncol=num.comb.states) |
|
991 |
+ colnames(result$transitionProbs) <- comb.states |
|
992 |
+ rownames(result$transitionProbs) <- comb.states |
|
993 |
+ result$transitionProbs.initial <- matrix(hmm$A.initial, ncol=num.comb.states) |
|
994 |
+ colnames(result$transitionProbs.initial) <- comb.states |
|
995 |
+ rownames(result$transitionProbs.initial) <- comb.states |
|
996 |
+ # Initial probs |
|
997 |
+ result$startProbs <- hmm$proba |
|
998 |
+ names(result$startProbs) <- comb.states |
|
999 |
+ result$startProbs.initial <- hmm$proba.initial |
|
1000 |
+ names(result$startProbs.initial) <- comb.states |
|
1001 |
+ # Distributions |
|
1002 |
+ result$distributions <- distributions |
|
1003 |
+ ## Convergence info |
|
1004 |
+ convergenceInfo <- list(eps=eps, loglik=hmm$loglik, loglik.delta=hmm$loglik.delta, num.iterations=hmm$num.iterations, time.sec=hmm$time.sec) |
|
1005 |
+ result$convergenceInfo <- convergenceInfo |
|
1006 |
+ ## Quality info |
|
1007 |
+ result$qualityInfo <- as.list(getQC(result)) |
|
1008 |
+ ## Univariate infos |
|
1009 |
+ univariateParams <- list(transitionProbs=uni.transitionProbs, startProbs=uni.startProbs, distributions=distributions[[1]], weights=uni.weights) |
|
1010 |
+ result$univariateParams <- univariateParams |
|
1011 |
+ } else if (hmm$error == 1) { |
|
1012 |
+ warlist[[length(warlist)+1]] <- warning(paste0("ID = ",ID,": A NaN occurred during the Baum-Welch! Parameter estimation terminated prematurely. Check your library! The following factors are known to cause this error: 1) Your read counts contain very high numbers. Try again with a lower value for 'count.cutoff.quantile'. 2) Your library contains too few reads in each bin. 3) Your library contains reads for a different genome than it was aligned to.")) |
|
1013 |
+ } else if (hmm$error == 2) { |
|
1014 |
+ warlist[[length(warlist)+1]] <- warning(paste0("ID = ",ID,": An error occurred during the Baum-Welch! Parameter estimation terminated prematurely. Check your library")) |
|
1015 |
+ } |
|
1016 |
+ if (hmm$error != 0) { |
|
1017 |
+ ## Issue warnings |
|
1018 |
+ result$warnings <- warlist |
|
1019 |
+ return(result) |
|
1020 |
+ } |
|
1021 |
+ } # if (istep==1) |
|
1022 |
+ |
|
1023 |
+ if (istep == 1) { ptm <- startTimedMessage("Collecting counts and posteriors ...") } |
|
1024 |
+ |
|
1025 |
+ ## Inflate posteriors, states, counts to new offset |
|
1026 |
+ ind <- findOverlaps(stepbins, binned.data) |
|
1027 |
+ astates.step[ind@from, 'currentOffset'] <- hmm$states[ind@to] |
|
1028 |
+ amaxPosterior.step[ind@from, 'currentOffset'] <- hmm$maxPosterior[ind@to] |
|
1029 |
+ |
|
1030 |
+ ## Find offset that maximizes the posteriors for each bin |
|
1031 |
+ ##-- Start stuff to call C code |
|
1032 |
+ # Work with changing dimensions to avoid copies being made |
|
1033 |
+ dim_amaxPosterior.step <- dim(amaxPosterior.step) |
|
1034 |
+ dimnames_amaxPosterior.step <- dimnames(amaxPosterior.step) |
|
1035 |
+ dim(amaxPosterior.step) <- NULL |
|
1036 |
+ z <- .C("C_array2D_which_max", |
|
1037 |
+ array2D = amaxPosterior.step, |
|
1038 |
+ dim = as.integer(dim_amaxPosterior.step), |
|
1039 |
+ ind_max = integer(dim_amaxPosterior.step[1]), |
|
1040 |
+ value_max = double(dim_amaxPosterior.step[1])) |
|
1041 |
+ dim(amaxPosterior.step) <- dim_amaxPosterior.step |
|
1042 |
+ dimnames(amaxPosterior.step) <- dimnames_amaxPosterior.step |
|
1043 |
+ ind <- z$ind_max |
|
1044 |
+ ##-- End stuff to call C code |
|
1045 |
+ for (i1 in 1:2) { |
|
1046 |
+ mask <- ind == i1 |
|
1047 |
+ astates.step[mask, 'previousOffsets'] <- astates.step[mask,i1, drop=FALSE] |
|
1048 |
+ amaxPosterior.step[mask, 'previousOffsets'] <- amaxPosterior.step[mask,i1, drop=FALSE] |
|
1049 |
+ } |
|
1050 |
+ if (istep == 1) { stopTimedMessage(ptm) } |
|
1051 |
+ |
|
1052 |
+ if (istep > 1) { |
|
1053 |
+ stopTimedMessage(ptm.offset) |
|
1054 |
+ } |
|
1055 |
+ |
|
1056 |
+ rm(hmm, ind) |
|
1057 |
+ } # loop over offsets |
|
1058 |
+ |
|
974 | 1059 |
## Return results |
975 | 1060 |
return(result) |
976 | 1061 |
} |
... | ... |
@@ -1183,7 +1268,11 @@ DNAcopy.findCNVs <- function(binned.data, ID=NULL, CNgrid.start=1.5, count.cutof |
1183 | 1268 |
biDNAcopy.findCNVs <- function(binned.data, ID=NULL, CNgrid.start=0.5, count.cutoff.quantile=0.999) { |
1184 | 1269 |
|
1185 | 1270 |
## Intercept user input |
1186 |
- binned.data <- loadFromFiles(binned.data, check.class='GRanges')[[1]] |
|
1271 |
+ binned.data <- loadFromFiles(binned.data, check.class=c('GRanges','GRangesList'))[[1]] |
|
1272 |
+ if (class(binned.data) == 'GRangesList') { |
|
1273 |
+ binned.data.list <- binned.data |
|
1274 |
+ binned.data <- binned.data.list[[1]] |
|
1275 |
+ } |
|
1187 | 1276 |
if (is.null(ID)) { |
1188 | 1277 |
ID <- attr(binned.data, 'ID') |
1189 | 1278 |
} |
... | ... |
@@ -28,10 +28,7 @@ |
28 | 28 |
findCNVs.strandseq <- function(binned.data, ID=NULL, eps=0.1, init="standard", max.time=-1, max.iter=1000, num.trials=5, eps.try=10*eps, num.threads=1, count.cutoff.quantile=0.999, strand='*', states=c('zero-inflation',paste0(0:10,'-somy')), most.frequent.state="1-somy", method='HMM', algorithm="EM", initial.params=NULL) { |
29 | 29 |
|
30 | 30 |
## Intercept user input |
31 |
- if (class(binned.data) != 'GRanges') { |
|
32 |
- binned.data <- get(load(binned.data)) |
|
33 |
- if (class(binned.data) != 'GRanges') stop("argument 'binned.data' expects a GRanges with meta-column 'counts' or a file that contains such an object") |
|
34 |
- } |
|
31 |
+ binned.data <- loadFromFiles(binned.data, check.class=c('GRanges','GRangesList'))[[1]] |
|
35 | 32 |
if (is.null(ID)) { |
36 | 33 |
ID <- attr(binned.data, 'ID') |
37 | 34 |
} |
... | ... |
@@ -91,6 +91,31 @@ plot.GRanges <- function(x, type='profile', ...) { |
91 | 91 |
} |
92 | 92 |
} |
93 | 93 |
|
94 |
+#' Plotting function for binned read counts (list) |
|
95 |
+#' |
|
96 |
+#' Make plots for binned read counts (list) from \code{\link{binned.data}}. |
|
97 |
+#' |
|
98 |
+#' @param x A \code{\link{GRangesList}} object with binned read counts. |
|
99 |
+#' @param type Type of the plot, one of \code{c('profile', 'histogram', 'karyogram')}. You can also specify the type with an integer number. |
|
100 |
+#' \describe{ |
|
101 |
+#' \item{\code{karyogram}}{A karyogram-like chromosome overview with read counts.} |
|
102 |
+#' \item{\code{histogram}}{A histogram of read counts.} |
|
103 |
+#' \item{\code{profile}}{An profile with read counts.} |
|
104 |
+#' } |
|
105 |
+#' @param ... Additional arguments for the different plot types. |
|
106 |
+#' @return A \code{\link[ggplot2:ggplot]{ggplot}} object. |
|
107 |
+#' @method plot GRanges |
|
108 |
+#' @export |
|
109 |
+plot.GRangesList <- function(x, type='profile', ...) { |
|
110 |
+ if (type == 'karyogram' | type==3) { |
|
111 |
+ plotKaryogram(x, ...) |
|
112 |
+ } else if (type == 'histogram' | type==2) { |
|
113 |
+ plotHistogram(x, ...) |
|
114 |
+ } else if (type == 'profile' | type==1) { |
|
115 |
+ plotProfile(x, ...) |
|
116 |
+ } |
|
117 |
+} |
|
118 |
+ |
|
94 | 119 |
#' Plotting function for \code{\link{aneuHMM}} objects |
95 | 120 |
#' |
96 | 121 |
#' Make different types of plots for \code{\link{aneuHMM}} objects. |
... | ... |
@@ -215,7 +240,7 @@ plotBivariateHistograms <- function(bihmm) { |
215 | 240 |
uni.hmm$distributions <- bihmm$distributions[[strand]] |
216 | 241 |
uni.hmm$qualityInfo <- bihmm$qualityInfo |
217 | 242 |
class(uni.hmm) <- class.univariate.hmm |
218 |
- ggplts <- plotHistogram(uni.hmm, strand='*') |
|
243 |
+ ggplts <- plotHistogram(uni.hmm) |
|
219 | 244 |
|
220 | 245 |
return(ggplts) |
221 | 246 |
|
... | ... |
@@ -233,19 +258,22 @@ plotBivariateHistograms <- function(bihmm) { |
233 | 258 |
#' @importFrom stats dgeom dnbinom dpois reshape |
234 | 259 |
plotHistogram <- function(model) { |
235 | 260 |
|
236 |
- model <- suppressMessages( loadFromFiles(model, check.class=c('GRanges', class.univariate.hmm))[[1]] ) |
|
261 |
+ model <- suppressMessages( loadFromFiles(model, check.class=c('GRanges', 'GRangesList', class.univariate.hmm))[[1]] ) |
|
237 | 262 |
if (class(model) == 'GRanges') { |
238 | 263 |
bins <- model |
239 |
- countbins <- model |
|
264 |
+ bincounts <- model |
|
265 |
+ } else if (class(model) == 'GRangesList') { |
|
266 |
+ bins <- model[[1]] |
|
267 |
+ bincounts <- model[[1]] |
|
240 | 268 |
} else if (class(model) == class.univariate.hmm) { |
241 | 269 |
bins <- model$bins |
242 | 270 |
if (!is.null(model$bins$counts)) { |
243 |
- countbins <- model$bins |
|
244 |
- } else if (!is.null(model$binned.data[[1]]$counts)) { |
|
245 |
- countbins <- model$binned.data[[1]] |
|
271 |
+ bincounts <- model$bins |
|
272 |
+ } else if (!is.null(model$bincounts[[1]]$counts)) { |
|
273 |
+ bincounts <- model$bincounts[[1]] |
|
246 | 274 |
} |
247 | 275 |
} |
248 |
- counts <- countbins$counts |
|
276 |
+ counts <- bincounts$counts |
|
249 | 277 |
|
250 | 278 |
states <- bins$state |
251 | 279 |
if (!is.null(model$weights)) { |
... | ... |
@@ -361,8 +389,8 @@ plot.karyogram <- function(model, both.strands=FALSE, plot.SCE=FALSE, file=NULL) |
361 | 389 |
## Convert to GRanges |
362 | 390 |
if (!is.null(model$bins$counts)) { |
363 | 391 |
bins <- model$bins |
364 |
- } else if (!is.null(model$binned.data[[1]]$counts)) { |
|
365 |
- bins <- model$binned.data[[1]] |
|
392 |
+ } else if (!is.null(model$bincounts[[1]]$counts)) { |
|
393 |
+ bins <- model$bincounts[[1]] |
|
366 | 394 |
bins$state <- model$bins$state[findOverlaps(bins, model$bins, select='first')] |
367 | 395 |
} |
368 | 396 |
bins.split <- split(bins, seqnames(bins)) |
... | ... |
@@ -856,6 +884,14 @@ plotProfile <- function(model, both.strands=FALSE, plot.SCE=TRUE, file=NULL, nor |
856 | 884 |
model$bins <- binned.data |
857 | 885 |
model$qualityInfo <- list(entropy=qc.entropy(binned.data$counts), spikiness=qc.spikiness(binned.data$counts), complexity=attr(binned.data, 'complexity')) |
858 | 886 |
plot.profile(model, both.strands=both.strands, plot.SCE=FALSE, file=file) |
887 |
+ } else if (class(model)=='GRangesList') { |
|
888 |
+ binned.data <- model[[1]] |
|
889 |
+ model <- list() |
|
890 |
+ class(model) <- class.univariate.hmm |
|
891 |
+ model$ID <- '' |
|
892 |
+ model$bins <- binned.data |
|
893 |
+ model$qualityInfo <- list(entropy=qc.entropy(binned.data$counts), spikiness=qc.spikiness(binned.data$counts), complexity=attr(binned.data, 'complexity')) |
|
894 |
+ plot.profile(model, both.strands=both.strands, plot.SCE=FALSE, file=file) |
|
859 | 895 |
} else if (class(model)==class.univariate.hmm) { |
860 | 896 |
plot.profile(model, both.strands=FALSE, plot.SCE=FALSE, file=file, normalize.counts = normalize.counts) |
861 | 897 |
} else if (class(model)==class.bivariate.hmm) { |
... | ... |
@@ -872,8 +908,8 @@ plot.profile <- function(model, both.strands=FALSE, plot.SCE=TRUE, file=NULL, no |
872 | 908 |
## Convert to GRanges |
873 | 909 |
if (!is.null(model$bins$counts)) { |
874 | 910 |
bins <- model$bins |
875 |
- } else if (!is.null(model$binned.data[[1]]$counts)) { |
|
876 |
- bins <- model$binned.data[[1]] |
|
911 |
+ } else if (!is.null(model$bincounts[[1]]$counts)) { |
|
912 |
+ bins <- model$bincounts[[1]] |
|
877 | 913 |
} |
878 | 914 |
## Get breakpoint coordinates |
879 | 915 |
if (is.null(model$breakpoints) & plot.SCE) { |
... | ... |
@@ -56,8 +56,8 @@ qc.bhattacharyya <- function(hmm) { |
56 | 56 |
} |
57 | 57 |
if (!is.null(hmm$bins$counts)) { |
58 | 58 |
counts <- hmm$bins$counts |
59 |
- } else if (!is.null(hmm$binned.data[[1]]$counts)) { |
|
60 |
- counts <- hmm$binned.data[[1]]$counts |
|
59 |
+ } else if (!is.null(hmm$bincounts[[1]]$counts)) { |
|
60 |
+ counts <- hmm$bincounts[[1]]$counts |
|
61 | 61 |
} |
62 | 62 |
x <- 0:max(max(counts),500) |
63 | 63 |
if (!is.na(distr['1-somy','size']) & !is.na(distr['2-somy','size'])) { |
... | ... |
@@ -156,8 +156,8 @@ getQC <- function(models) { |
156 | 156 |
bins <- model$bins |
157 | 157 |
if (!is.null(model$bins$counts)) { |
158 | 158 |
counts <- model$bins$counts |
159 |
- } else if (!is.null(model$binned.data[[1]]$counts)) { |
|
160 |
- counts <- model$binned.data[[1]]$counts |
|
159 |
+ } else if (!is.null(model$bincounts[[1]]$counts)) { |
|
160 |
+ counts <- model$bincounts[[1]]$counts |
|
161 | 161 |
} |
162 | 162 |
qframe[[i1]] <- data.frame( total.read.count=sum(counts), |
163 | 163 |
avg.binsize=mean(width(bins)), |
164 | 164 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,27 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/plotting.R |
|
3 |
+\name{plot.GRangesList} |
|
4 |
+\alias{plot.GRangesList} |
|
5 |
+\title{Plotting function for binned read counts (list)} |
|
6 |
+\usage{ |
|
7 |
+\method{plot}{GRanges}(x, type = "profile", ...) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{x}{A \code{\link{GRangesList}} object with binned read counts.} |
|
11 |
+ |
|
12 |
+\item{type}{Type of the plot, one of \code{c('profile', 'histogram', 'karyogram')}. You can also specify the type with an integer number. |
|
13 |
+\describe{ |
|
14 |
+ \item{\code{karyogram}}{A karyogram-like chromosome overview with read counts.} |
|
15 |
+ \item{\code{histogram}}{A histogram of read counts.} |
|
16 |
+ \item{\code{profile}}{An profile with read counts.} |
|
17 |
+}} |
|
18 |
+ |
|
19 |
+\item{...}{Additional arguments for the different plot types.} |
|
20 |
+} |
|
21 |
+\value{ |
|
22 |
+A \code{\link[ggplot2:ggplot]{ggplot}} object. |
|
23 |
+} |
|
24 |
+\description{ |
|
25 |
+Make plots for binned read counts (list) from \code{\link{binned.data}}. |
|
26 |
+} |
|
27 |
+ |
... | ... |
@@ -214,7 +214,7 @@ void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, dou |
214 | 214 |
// ===================================================================================================================================================== |
215 | 215 |
// This function takes parameters from R, creates a multivariate HMM object, runs the EM and returns the result to R. |
216 | 216 |
// ===================================================================================================================================================== |
217 |
-void multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* algorithm, int* verbosity) |
|
217 |
+void multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, double* maxPosterior, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* algorithm, int* verbosity) |
|
218 | 218 |
{ |
219 | 219 |
|
220 | 220 |
// Define logging level {"ERROR", "WARNING", "INFO", "ITERATION", "DEBUG", "DEBUG1", "DEBUG2", "DEBUG3", "DEBUG4"} |
... | ... |
@@ -330,6 +330,7 @@ void multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, in |
330 | 330 |
} |
331 | 331 |
ind_max = std::distance(posterior_per_t.begin(), std::max_element(posterior_per_t.begin(), posterior_per_t.end())); |
332 | 332 |
states[t] = comb_states[ind_max]; |
333 |
+ maxPosterior[t] = posterior_per_t[ind_max]; |
|
333 | 334 |
} |
334 | 335 |
|
335 | 336 |
//FILE_LOG(logDEBUG1) << "Return parameters"; |
... | ... |
@@ -16,7 +16,7 @@ extern "C" |
16 | 16 |
void univariate_hmm(int* O, int* T, int* N, int* state_labels, double* size, double* prob, int* maxiter, int* maxtime, double* eps, double* maxPosterior, int* states, double* A, double* proba, double* loglik, double* weights, int* distr_type, double* initial_size, double* initial_prob, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* read_cutoff, int* algorithm, int* verbosity); |
17 | 17 |
|
18 | 18 |
extern "C" |
19 |
-void multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* algorithm, int* verbosity); |
|
19 |
+void multivariate_hmm(double* D, int* T, int* N, int *Nmod, int* comb_states, int* maxiter, int* maxtime, double* eps, double* maxPosterior, int* states, double* A, double* proba, double* loglik, double* initial_A, double* initial_proba, bool* use_initial_params, int* num_threads, int* error, int* algorithm, int* verbosity); |
|
20 | 20 |
|
21 | 21 |
extern "C" |
22 | 22 |
void univariate_cleanup(); |
... | ... |
@@ -4,13 +4,13 @@ |
4 | 4 |
|
5 | 5 |
|
6 | 6 |
R_NativePrimitiveArgType arg1[] = {INTSXP, INTSXP, INTSXP, INTSXP, REALSXP, REALSXP, INTSXP, INTSXP, REALSXP, REALSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, LGLSXP, INTSXP, INTSXP, INTSXP, INTSXP, INTSXP}; |
7 |
-R_NativePrimitiveArgType arg2[] = {REALSXP, INTSXP, INTSXP, INTSXP, INTSXP, INTSXP, INTSXP, REALSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, LGLSXP, INTSXP, INTSXP, INTSXP, INTSXP}; |
|
7 |
+R_NativePrimitiveArgType arg2[] = {REALSXP, INTSXP, INTSXP, INTSXP, INTSXP, INTSXP, INTSXP, REALSXP, REALSXP, INTSXP, REALSXP, REALSXP, REALSXP, REALSXP, REALSXP, LGLSXP, INTSXP, INTSXP, INTSXP, INTSXP}; |
|
8 | 8 |
R_NativePrimitiveArgType arg4[] = {INTSXP}; |
9 | 9 |
R_NativePrimitiveArgType arg5[] = {REALSXP, INTSXP, INTSXP, REALSXP}; |
10 | 10 |
|
11 | 11 |
static const R_CMethodDef CEntries[] = { |
12 | 12 |
{"C_univariate_hmm", (DL_FUNC) &univariate_hmm, 26, arg1}, |
13 |
- {"C_multivariate_hmm", (DL_FUNC) &multivariate_hmm, 19, arg2}, |
|
13 |
+ {"C_multivariate_hmm", (DL_FUNC) &multivariate_hmm, 20, arg2}, |
|
14 | 14 |
{"C_univariate_cleanup", (DL_FUNC) &univariate_cleanup, 0, NULL}, |
15 | 15 |
{"C_multivariate_cleanup", (DL_FUNC) &multivariate_cleanup, 1, arg4}, |
16 | 16 |
{"C_array2D_which_max", (DL_FUNC) &array2D_which_max, 4, arg5}, |