1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,14 @@ |
1 |
+Package: anafinder |
|
2 |
+Type: Package |
|
3 |
+Title: Anaploidy analysis of Seq-data using a Hidden Markov Model |
|
4 |
+Version: 0.1 |
|
5 |
+Date: 2014-05-01 |
|
6 |
+Author: Aaron Taudt, David Porubsky |
|
7 |
+Maintainer: Aaron Taudt <a.s.taudt@umcg.nl>, David Porubsky <d.porubsky@umcg.nl> |
|
8 |
+Description: not yet |
|
9 |
+Depends: R (>= 2.15.2) |
|
10 |
+Imports: Rsamtools, GenomicRanges, IRanges, ggplot2 |
|
11 |
+Suggests: |
|
12 |
+License: GPL |
|
13 |
+LazyLoad: yes |
|
14 |
+Packaged: 2014-02-13 13:29:00 CET+1; Taudt |
0 | 15 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,14 @@ |
1 |
+useDynLib(chromstar) |
|
2 |
+ |
|
3 |
+# Export all names |
|
4 |
+# exportPattern(".") |
|
5 |
+export( |
|
6 |
+bed2bin, |
|
7 |
+bam2bin, |
|
8 |
+bedGraph2bin, |
|
9 |
+) |
|
10 |
+ |
|
11 |
+# Import all packages listed as Imports or Depends |
|
12 |
+import( |
|
13 |
+ Rsamtools, GenomicRanges, IRanges, ggplot2 |
|
14 |
+) |
0 | 15 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,170 @@ |
1 |
+bedGraph2bin <- function(bedGraphfile, chrom.length.file, outputfolder="binned_data", binsizes=200, chromosomes=NULL, separate.chroms=TRUE, save.as.RData=TRUE) { |
|
2 |
+ return(align2bin(bedGraphfile, format="bedGraph", chrom.length.file=chrom.length.file, outputfolder=outputfolder, binsizes=binsizes, chromosomes=chromosomes, separate.chroms=separate.chroms, save.as.RData=save.as.RData)) |
|
3 |
+} |
|
4 |
+ |
|
5 |
+bam2bin <- function(bamfile, bamindex=bamfile, outputfolder="binned_data", binsizes=200, chromosomes=NULL, separate.chroms=TRUE, save.as.RData=TRUE) { |
|
6 |
+ return(align2bin(bamfile, format="bam", index=bamindex, outputfolder=outputfolder, binsizes=binsizes, chromosomes=chromosomes, separate.chroms=separate.chroms, save.as.RData=save.as.RData)) |
|
7 |
+} |
|
8 |
+ |
|
9 |
+bed2bin <- function(bedfile, chrom.length.file, outputfolder="binned_data", binsizes=200, chromosomes=NULL, separate.chroms=TRUE, save.as.RData=TRUE) { |
|
10 |
+ return(align2bin(bedfile, format="bed", chrom.length.file=chrom.length.file, outputfolder=outputfolder, binsizes=binsizes, chromosomes=chromosomes, separate.chroms=separate.chroms, save.as.RData=save.as.RData)) |
|
11 |
+} |
|
12 |
+ |
|
13 |
+align2bin <- function(file, format, index=file, chrom.length.file, outputfolder="binned_data", binsizes=200, chromosomes=NULL, separate.chroms=TRUE, save.as.RData=TRUE) { |
|
14 |
+ |
|
15 |
+ ## Load libraries |
|
16 |
+# library(GenomicRanges) |
|
17 |
+ |
|
18 |
+ ## Create outputfolder if not exists |
|
19 |
+ if (!file.exists(outputfolder) & save.as.RData==TRUE) { |
|
20 |
+ dir.create(outputfolder) |
|
21 |
+ } |
|
22 |
+ |
|
23 |
+ ## Read in the data |
|
24 |
+ if (format == "bed") { |
|
25 |
+ cat("Reading file",basename(file),"...") |
|
26 |
+ # File with chromosome lengths |
|
27 |
+ chrom.lengths.df <- read.table(chrom.length.file) |
|
28 |
+ chrom.lengths <- chrom.lengths.df[,2] |
|
29 |
+ names(chrom.lengths) <- chrom.lengths.df[,1] |
|
30 |
+ # File with reads, determine classes first for faster import |
|
31 |
+ tab5rows <- read.table(file, nrows=5) |
|
32 |
+ classes.in.bed <- sapply(tab5rows, class) |
|
33 |
+ classes <- rep("NULL",length(classes.in.bed)) |
|
34 |
+ classes[1:3] <- classes.in.bed[1:3] |
|
35 |
+ data <- read.table(file, colClasses=classes) |
|
36 |
+ # Convert to GRanges object |
|
37 |
+ data <- GRanges(seqnames=Rle(data[,1]), ranges=IRanges(start=data[,2], end=data[,3]), strand=Rle(strand("*"), nrow(data))) |
|
38 |
+ seqlengths(data) <- as.integer(chrom.lengths[names(seqlengths(data))]) |
|
39 |
+ chroms.in.data <- seqlevels(data) |
|
40 |
+ } else if (format == "bam") { |
|
41 |
+ library(Rsamtools) |
|
42 |
+ cat("Reading header of",basename(file),"...") |
|
43 |
+ file.header <- scanBamHeader(file)[[1]] |
|
44 |
+ chrom.lengths <- file.header$targets |
|
45 |
+ chroms.in.data <- names(chrom.lengths) |
|
46 |
+ } else if (format == "bedGraph") { |
|
47 |
+ cat("Reading file",basename(file),"...") |
|
48 |
+ # File with chromosome lengths |
|
49 |
+ chrom.lengths.df <- read.table(chrom.length.file) |
|
50 |
+ chrom.lengths <- chrom.lengths.df[,2] |
|
51 |
+ names(chrom.lengths) <- chrom.lengths.df[,1] |
|
52 |
+ # File with reads, determine classes first for faster import |
|
53 |
+ tab5rows <- read.table(file, nrows=5) |
|
54 |
+ classes.in.bed <- sapply(tab5rows, class) |
|
55 |
+ classes <- rep("NULL",length(classes.in.bed)) |
|
56 |
+ classes[1:4] <- classes.in.bed[1:4] |
|
57 |
+ data <- read.table(file, colClasses=classes) |
|
58 |
+ # Convert to GRanges object |
|
59 |
+ data <- GRanges(seqnames=Rle(data[,1]), ranges=IRanges(start=data[,2], end=data[,3]), strand=Rle(strand("*"), nrow(data)), signal=data[,4]) |
|
60 |
+ seqlengths(data) <- as.integer(chrom.lengths[names(seqlengths(data))]) |
|
61 |
+ chroms.in.data <- seqlevels(data) |
|
62 |
+ } |
|
63 |
+ cat(" done\n") |
|
64 |
+ |
|
65 |
+ |
|
66 |
+ ### Do the loop for all binsizes |
|
67 |
+ for (binsize in binsizes) { |
|
68 |
+ cat("Binning into binsize",binsize,"\n") |
|
69 |
+ |
|
70 |
+ ### Iterate over all chromosomes |
|
71 |
+ binned.data.allchroms <- NULL |
|
72 |
+ if (is.null(chromosomes)) { |
|
73 |
+ chromosomes <- chroms.in.data |
|
74 |
+ } |
|
75 |
+ for (chromosome in chromosomes) { |
|
76 |
+ ## Check if chromosome exists in data |
|
77 |
+ if ( !(chromosome %in% chroms.in.data) ) { |
|
78 |
+ warning("Skipped chromosome ",chromosome,", not in the data!") |
|
79 |
+ next |
|
80 |
+ } else if ( !(chromosome %in% names(chrom.lengths)) ) { |
|
81 |
+ warning("Skipped chromosome ",chromosome,", no length found!") |
|
82 |
+ next |
|
83 |
+ } |
|
84 |
+ cat(chromosome," \n") |
|
85 |
+ ## Check last incomplete bin |
|
86 |
+ incomplete.bin <- chrom.lengths[chromosome] %% binsize > 0 |
|
87 |
+ if (incomplete.bin) { |
|
88 |
+ numbins <- ceiling(chrom.lengths[chromosome]/binsize) |
|
89 |
+ } else { |
|
90 |
+ numbins <- chrom.lengths[chromosome]/binsize |
|
91 |
+ } |
|
92 |
+ ## Initialize vectors |
|
93 |
+ cat("initialize vectors...\r") |
|
94 |
+ chroms <- rep(chromosome,numbins) |
|
95 |
+ reads <- rep(0,numbins) |
|
96 |
+ start <- seq(from=0, by=binsize, length.out=numbins) |
|
97 |
+ end <- seq(from=binsize-1, by=binsize, length.out=numbins) |
|
98 |
+ end[length(end)] <- chrom.lengths[chromosome] |
|
99 |
+ |
|
100 |
+ ## Create binned chromosome as GRanges object |
|
101 |
+ cat("creating GRanges container... \r") |
|
102 |
+ ichrom <- GRanges(seqnames = Rle(chromosome, numbins), |
|
103 |
+ ranges = IRanges(start=start, end=end), |
|
104 |
+ strand = Rle(strand("*"), numbins) |
|
105 |
+ ) |
|
106 |
+ |
|
107 |
+ if (format=="bam") { |
|
108 |
+ cat("reading reads from file... \r") |
|
109 |
+ data <- readGAlignmentsFromBam(file, index=index, param=ScanBamParam(what=c("pos"),which=range(ichrom))) |
|
110 |
+ } |
|
111 |
+ |
|
112 |
+ ## Count overlaps |
|
113 |
+ cat("counting overlaps... \r") |
|
114 |
+ if (format=="bam" | format=="bed") { |
|
115 |
+ reads <- countOverlaps(ichrom, data[seqnames(data)==chromosome]) |
|
116 |
+ } else if (format=="bedGraph") { |
|
117 |
+ # Take the max value from all regions that fall into / overlap a given bin as read count |
|
118 |
+ midx <- as.matrix(findOverlaps(ichrom, data[seqnames(data)==chromosome])) |
|
119 |
+ reads <- rep(0,length(ichrom)) |
|
120 |
+ signal <- mcols(data)$signal |
|
121 |
+ rle <- rle(midx[,1]) |
|
122 |
+ read.idx <- rle$values |
|
123 |
+ max.idx <- cumsum(rle$lengths) |
|
124 |
+ maxvalues <- rep(NA, length(read.idx)) |
|
125 |
+ maxvalues[1] <- max(signal[midx[1:(max.idx[1]),2]]) |
|
126 |
+ for (i1 in 2:length(read.idx)) { |
|
127 |
+ maxvalues[i1] <- max(signal[midx[(max.idx[i1-1]+1):(max.idx[i1]),2]]) |
|
128 |
+ } |
|
129 |
+ reads[read.idx] <- maxvalues |
|
130 |
+ } |
|
131 |
+ |
|
132 |
+ ## Concatenate |
|
133 |
+ cat("concatenate... \r") |
|
134 |
+ binned.data <- data.frame(chroms,start,end,reads) |
|
135 |
+ names(binned.data) <- c("chrom","start","end","reads") |
|
136 |
+ |
|
137 |
+ if (separate.chroms==TRUE) { |
|
138 |
+ if (save.as.RData==TRUE) { |
|
139 |
+ ## Print to file |
|
140 |
+ filename <- paste(basename(file),"_binsize_",binsize,"_",chromosome,".RData", sep="") |
|
141 |
+ cat("save... \r") |
|
142 |
+ save(binned.data, file=file.path(outputfolder,filename) ) |
|
143 |
+ } else { |
|
144 |
+ cat(" \r") |
|
145 |
+ return(binned.data) |
|
146 |
+ } |
|
147 |
+ } else { |
|
148 |
+ binned.data.allchroms[[length(binned.data.allchroms)+1]] <- binned.data |
|
149 |
+ } |
|
150 |
+ cat(" \r") |
|
151 |
+ |
|
152 |
+ } |
|
153 |
+ if (separate.chroms!=TRUE) { |
|
154 |
+ cat("Concatenating chromosomes ...") |
|
155 |
+ binned.data.allchroms <- do.call("rbind",binned.data.allchroms) |
|
156 |
+ cat(" done\n") |
|
157 |
+ if (save.as.RData==TRUE) { |
|
158 |
+ # Print to file |
|
159 |
+ filename <- paste(basename(file),"_binsize_",binsize,".RData", sep="") |
|
160 |
+ cat("Saving to file ...") |
|
161 |
+ save(binned.data.allchroms, file=file.path(outputfolder,filename) ) |
|
162 |
+ cat(" done\n") |
|
163 |
+ } else { |
|
164 |
+ return(binned.data.allchroms) |
|
165 |
+ } |
|
166 |
+ } |
|
167 |
+ |
|
168 |
+ } |
|
169 |
+ |
|
170 |
+} |
0 | 171 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,67 @@ |
1 |
+check.positive.integer = function(testvar) { |
|
2 |
+ if (!is(testvar,"numeric") & !is(testvar,"integer")) return(1) |
|
3 |
+ if (length(testvar)>1) return(2) |
|
4 |
+ if (testvar <= 0) return(3) |
|
5 |
+ if (testvar != as.integer(testvar)) return(4) |
|
6 |
+ return(0) |
|
7 |
+} |
|
8 |
+check.positive.integer.vector = function(testvec) { |
|
9 |
+ if (!is(testvec,"numeric") & !is(testvec,"integer")) return(1) |
|
10 |
+ for (elem in testvec) { |
|
11 |
+ if (elem <= 0) return(2) |
|
12 |
+ if (elem != as.integer(elem)) return(3) |
|
13 |
+ } |
|
14 |
+ return(0) |
|
15 |
+} |
|
16 |
+check.nonnegative.integer.vector = function(testvec) { |
|
17 |
+ if (!is(testvec,"numeric") & !is(testvec,"integer")) return(1) |
|
18 |
+ for (elem in testvec) { |
|
19 |
+ if (elem < 0) return(2) |
|
20 |
+ if (elem != as.integer(elem)) return(3) |
|
21 |
+ } |
|
22 |
+ return(0) |
|
23 |
+} |
|
24 |
+check.positive = function(testvar) { |
|
25 |
+ if (!is(testvar,"numeric") & !is(testvar,"integer")) return(1) |
|
26 |
+ if (length(testvar)>1) return(2) |
|
27 |
+ if (testvar <= 0) return(3) |
|
28 |
+ return(0) |
|
29 |
+} |
|
30 |
+check.positive.vector = function(testvec) { |
|
31 |
+ if (!is(testvec,"numeric") & !is(testvec,"integer")) return(1) |
|
32 |
+ for (elem in testvec) { |
|
33 |
+ if (elem <= 0) return(2) |
|
34 |
+ } |
|
35 |
+ return(0) |
|
36 |
+} |
|
37 |
+check.nonnegative.vector = function(testvec) { |
|
38 |
+ if (!is(testvec,"numeric") & !is(testvec,"integer")) return(1) |
|
39 |
+ for (elem in testvec) { |
|
40 |
+ if (elem < 0) return(2) |
|
41 |
+ } |
|
42 |
+ return(0) |
|
43 |
+} |
|
44 |
+check.integer = function(testvar) { |
|
45 |
+ if (!is(testvar,"numeric") & !is(testvar,"integer")) return(1) |
|
46 |
+ if (length(testvar)>1) return(2) |
|
47 |
+ if (testvar != as.integer(testvar)) return(3) |
|
48 |
+ return(0) |
|
49 |
+} |
|
50 |
+ |
|
51 |
+check.univariate.modellist = function(modellist) { |
|
52 |
+ if (!is(modellist,"list")) return(1) |
|
53 |
+ for (model in modellist) { |
|
54 |
+ if (!is(model,"chromStar.univariate.model")) return(2) |
|
55 |
+ } |
|
56 |
+ return(0) |
|
57 |
+} |
|
58 |
+check.univariate.model = function(model) { |
|
59 |
+ if (!is(model,"chromStar.univariate.model")) return(1) |
|
60 |
+ return(0) |
|
61 |
+} |
|
62 |
+ |
|
63 |
+check.logical = function(testbool) { |
|
64 |
+ if (!is(testbool,"logical")) return(1) |
|
65 |
+ if (length(testbool)>1) return(2) |
|
66 |
+ return(0) |
|
67 |
+} |
0 | 68 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,90 @@ |
1 |
+collapse.bins = function(data, column2collapseBy=NULL, columns2sumUp=NULL) { |
|
2 |
+ |
|
3 |
+ # Indexing stuff |
|
4 |
+ ind_coords = 1:3 |
|
5 |
+ ind_morecols = setdiff(1:ncol(data), c(ind_coords,columns2sumUp)) |
|
6 |
+ ind_sumcols = columns2sumUp |
|
7 |
+ |
|
8 |
+ # Make the comparison vector |
|
9 |
+ if (is.null(column2collapseBy)) { |
|
10 |
+ c = data$start |
|
11 |
+ cShift1 = rep(NA,length(c)) |
|
12 |
+ cShift1[2:length(cShift1)] = data$end[-length(c)] + 1 |
|
13 |
+ } else { |
|
14 |
+ if (is(data[,column2collapseBy], "factor")) { |
|
15 |
+ c <- as.integer(data[,column2collapseBy]) |
|
16 |
+ } else { |
|
17 |
+ c = data[,column2collapseBy] |
|
18 |
+ } |
|
19 |
+ cShift1 = rep(NA,length(c)) |
|
20 |
+ cShift1[-1] = c[-length(c)] |
|
21 |
+ } |
|
22 |
+ compare = c != cShift1 |
|
23 |
+ compare[1] = TRUE |
|
24 |
+ numcollapsedbins = length(which(compare==TRUE)) |
|
25 |
+ numbins = nrow(data) |
|
26 |
+ |
|
27 |
+ # Select the collapsed rows |
|
28 |
+ collapsed.bins = NULL |
|
29 |
+ collapsed.bins$chrom = data[compare,1] |
|
30 |
+ collapsed.bins$start = data[compare,2] |
|
31 |
+ collapsed.bins$end = data[c((which(compare==TRUE)-1)[-1],numbins), 3] |
|
32 |
+ if (length(ind_morecols)==1) { |
|
33 |
+ collapsed.bins[[4]] = data[compare, ind_morecols] |
|
34 |
+ } else if (length(ind_morecols)>1) { |
|
35 |
+ lcb = length(collapsed.bins) |
|
36 |
+ lmc = length(ind_morecols) |
|
37 |
+ collapsed.bins[(lcb+1):(lcb+lmc)] = data[compare, ind_morecols] |
|
38 |
+ } |
|
39 |
+ |
|
40 |
+ # Sum up columns |
|
41 |
+ if (!is.null(columns2sumUp)) { |
|
42 |
+ sumcols = as.matrix(data[,columns2sumUp]) |
|
43 |
+ collapsed_sumcols = matrix(rep(NA,numcollapsedbins*length(columns2sumUp)), ncol=length(columns2sumUp)) |
|
44 |
+ pb = txtProgressBar(min=1, max=length(compare), style=3) |
|
45 |
+ icount = 1 |
|
46 |
+ i1_lasttrue = 1 |
|
47 |
+ for (i1 in 1:length(compare)) { |
|
48 |
+ if (compare[i1]==TRUE) { |
|
49 |
+ if (length(columns2sumUp)==1) { |
|
50 |
+ collapsed_sumcols[icount-1] = sum(sumcols[i1_lasttrue:(i1-1),]) |
|
51 |
+ } else { |
|
52 |
+ if (i1_lasttrue==i1-1 | i1==1) { |
|
53 |
+ collapsed_sumcols[icount-1,] = as.numeric(sumcols[i1_lasttrue,]) |
|
54 |
+ } else { |
|
55 |
+ collapsed_sumcols[icount-1,] = apply(sumcols[i1_lasttrue:(i1-1),],2,sum) |
|
56 |
+ } |
|
57 |
+ } |
|
58 |
+ |
|
59 |
+ icount = icount+1 |
|
60 |
+ i1_lasttrue = i1 |
|
61 |
+ setTxtProgressBar(pb, i1) |
|
62 |
+ } |
|
63 |
+ } |
|
64 |
+ i1 = i1+1 |
|
65 |
+ if (length(columns2sumUp)==1) { |
|
66 |
+ collapsed_sumcols[icount-1] = sum(sumcols[i1_lasttrue:(i1-1),]) |
|
67 |
+ } else { |
|
68 |
+ if (i1_lasttrue==i1-1 | i1==1) { |
|
69 |
+ collapsed_sumcols[icount-1,] = as.numeric(sumcols[i1_lasttrue,]) |
|
70 |
+ } else { |
|
71 |
+ collapsed_sumcols[icount-1,] = apply(sumcols[i1_lasttrue:(i1-1),],2,sum) |
|
72 |
+ } |
|
73 |
+ } |
|
74 |
+ setTxtProgressBar(pb, length(compare)) |
|
75 |
+ close(pb) |
|
76 |
+ |
|
77 |
+ lcb = length(collapsed.bins) |
|
78 |
+ lsc = length(ind_sumcols) |
|
79 |
+ collapsed.bins[(lcb+1):(lcb+lsc)] = as.data.frame(collapsed_sumcols) |
|
80 |
+ } |
|
81 |
+ |
|
82 |
+ # Create the return data.frame with same order as input data.frame |
|
83 |
+ collapsed.bins.reordered = NULL |
|
84 |
+ collapsed.bins.reordered[c(ind_coords,ind_morecols,ind_sumcols)] = collapsed.bins |
|
85 |
+ names(collapsed.bins.reordered) = names(data) |
|
86 |
+ |
|
87 |
+ return(as.data.frame(collapsed.bins.reordered)) |
|
88 |
+ |
|
89 |
+} |
|
90 |
+ |
0 | 91 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,291 @@ |
1 |
+# ---------------------------------------------------------------- |
|
2 |
+# Write univariate states to file |
|
3 |
+# ---------------------------------------------------------------- |
|
4 |
+univariate2bed <- function(modellist, file="view_me_in_genome_browser", threshold=0.5, separate.zeroinflation=FALSE, chrom.length.file=NULL) { |
|
5 |
+ |
|
6 |
+ # Check user input |
|
7 |
+ if (is.null(names(modellist))) { |
|
8 |
+ stop("Please name the list entries. The names will be used as track name for the Genome Browser file.") |
|
9 |
+ } |
|
10 |
+ |
|
11 |
+ # Variables |
|
12 |
+ nummod <- length(modellist) |
|
13 |
+ file <- paste(file,"bed", sep=".") |
|
14 |
+ |
|
15 |
+ # Generate the colors |
|
16 |
+ colors <- c("zeroinflation"="black", "unmodified"="gray48","modified"="orangered3") |
|
17 |
+ RGBs <- t(col2rgb(colors)) |
|
18 |
+ RGBs <- apply(RGBs,1,paste,collapse=",") |
|
19 |
+ |
|
20 |
+ # Write first line to file |
|
21 |
+ cat("browser hide all\n", file=file) |
|
22 |
+ |
|
23 |
+ for (imod in 1:nummod) { |
|
24 |
+ model <- modellist[[imod]] |
|
25 |
+ priority <- 50 + imod |
|
26 |
+ cat(paste("track name=",names(modellist)[imod]," description=\"univariate calls for ",names(modellist)[imod],", threshold = ",threshold,"\" visibility=1 itemRgb=On priority=",priority,"\n", sep=""), file=file, append=TRUE) |
|
27 |
+ |
|
28 |
+ # Collapse the calls |
|
29 |
+ if (separate.zeroinflation) { |
|
30 |
+ calls <- cbind(model$coordinates, name=model$states.with.zeroinflation) |
|
31 |
+ collapsed.calls <- collapse.bins(calls, column2collapseBy=4) |
|
32 |
+ } else { |
|
33 |
+ calls <- cbind(model$coordinates, name=model$states) |
|
34 |
+ collapsed.calls <- collapse.bins(calls, column2collapseBy=4) |
|
35 |
+ collapsed.calls <- collapsed.calls[collapsed.calls$name=="modified",] |
|
36 |
+ } |
|
37 |
+ |
|
38 |
+ # Check length of chromosomes if chrom.length.file was given |
|
39 |
+ if (!is.null(chrom.length.file)) { |
|
40 |
+ chrom.lengths.df <- read.table(chrom.length.file) |
|
41 |
+ chrom.lengths <- chrom.lengths.df[,2] |
|
42 |
+ names(chrom.lengths) <- chrom.lengths.df[,1] |
|
43 |
+ for (chrom in levels(as.factor(collapsed_calls$chrom))) { |
|
44 |
+ index <- which(collapsed_calls$chrom == chrom & collapsed_calls$end > chrom.lengths[chrom]) |
|
45 |
+ collapsed_calls$end[index] <- chrom.lengths[chrom] |
|
46 |
+ if (length(index) >= 1) { |
|
47 |
+ warning("Adjusted entry in chromosome ",chrom,", because it was longer than the length of the chromosome") |
|
48 |
+ } |
|
49 |
+ } |
|
50 |
+ } |
|
51 |
+ |
|
52 |
+ # Append to file |
|
53 |
+ itemRgb <- RGBs[as.character(collapsed.calls$name)] |
|
54 |
+ numsegments <- nrow(collapsed.calls) |
|
55 |
+ df <- cbind(collapsed.calls, score=rep(0,numsegments), strand=rep(".",numsegments), thickStart=collapsed.calls$start, thickEnd=collapsed.calls$end, itemRgb=itemRgb) |
|
56 |
+ write.table(format(df, scientific=FALSE), file=file, append=TRUE, row.names=FALSE, col.names=FALSE, quote=FALSE) |
|
57 |
+ } |
|
58 |
+ |
|
59 |
+} |
|
60 |
+ |
|
61 |
+# ---------------------------------------------------------------- |
|
62 |
+# |
|
63 |
+# ---------------------------------------------------------------- |
|
64 |
+make.ucsc.file.combinatorial <- function(modellist, numstates=NULL, file="combinatorial-states-ucsc", threshold=0.5, chrom.length.file=NULL) { |
|
65 |
+ |
|
66 |
+ # Define variables |
|
67 |
+ nummod <- length(modellist) |
|
68 |
+ file <- paste(file,".BED", sep="") |
|
69 |
+ coordinates <- modellist[[1]]$coordinates |
|
70 |
+ |
|
71 |
+ # Write first line to file |
|
72 |
+ cat("browser position chr1:1-100000\n", file=file) |
|
73 |
+ |
|
74 |
+ # Get the combinatorial states |
|
75 |
+ cat("Getting the combinatorial states ...\n") |
|
76 |
+ combstates <- combinatorial.states(modellist) |
|
77 |
+ states <- as.numeric(names(sort(table(combstates), decreasing=TRUE))) |
|
78 |
+ |
|
79 |
+ # Clean up to reduce memory usage |
|
80 |
+ remove(modellist) |
|
81 |
+ |
|
82 |
+ if (is.null(numstates)) { |
|
83 |
+ numstates <- length(states) |
|
84 |
+ } else if (numstates > length(states)) { |
|
85 |
+ numstates <- length(states) |
|
86 |
+ } |
|
87 |
+ # Go through each state and write to file |
|
88 |
+ for (istate in states[1:numstates]) { |
|
89 |
+ cat("\nAnalyzing state",istate,"\n") |
|
90 |
+ cat(paste("track name=\"combinatorial state ",istate,"\" description=\"univariate calls for state ",istate,", threshold = ",threshold,"\" visibility=1\n", sep=""), file=file, append=TRUE) |
|
91 |
+ |
|
92 |
+ calls <- coordinates[ combstates == istate , ] |
|
93 |
+ if (length(calls$start) >= 1) { |
|
94 |
+ # Collapse the bins |
|
95 |
+ cat("collapsing the calls\n") |
|
96 |
+ collapsed_calls <- collapse.bins(calls) |
|
97 |
+ } else { |
|
98 |
+ cat("no bins in state",istate,"\n") |
|
99 |
+ collapsed_calls <- NULL |
|
100 |
+ } |
|
101 |
+ |
|
102 |
+ if (!is.null(collapsed_calls)) { |
|
103 |
+ # Check length of chromosomes if chrom.length.file was given |
|
104 |
+ if (!is.null(chrom.length.file)) { |
|
105 |
+ chrom.lengths.df <- read.table(chrom.length.file) |
|
106 |
+ chrom.lengths <- chrom.lengths.df[,2] |
|
107 |
+ names(chrom.lengths) <- chrom.lengths.df[,1] |
|
108 |
+ for (chrom in levels(as.factor(collapsed_calls$chrom))) { |
|
109 |
+ index <- which(collapsed_calls$chrom == chrom & collapsed_calls$end > chrom.lengths[chrom]) |
|
110 |
+ collapsed_calls$end[index] <- chrom.lengths[chrom] |
|
111 |
+ if (length(index) >= 1) { |
|
112 |
+ warning("Adjusted entry in chromosome ",chrom,", because it was longer than the length of the chromosome") |
|
113 |
+ } |
|
114 |
+ } |
|
115 |
+ } |
|
116 |
+ |
|
117 |
+ cat("appending to file ...\n") |
|
118 |
+ write.table(format(as.data.frame(collapsed_calls)[,1:3], scientific=FALSE), file=file, append=TRUE, row.names=FALSE, col.names=FALSE, quote=FALSE) |
|
119 |
+ } |
|
120 |
+ } |
|
121 |
+ |
|
122 |
+} |
|
123 |
+ |
|
124 |
+ |
|
125 |
+# ---------------------------------------------------------------- |
|
126 |
+# |
|
127 |
+# ---------------------------------------------------------------- |
|
128 |
+make.ucsc.file.multivariate <- function(model, numstates=NULL, file="multivariate-states-ucsc", chrom.length.file=NULL) { |
|
129 |
+ |
|
130 |
+ # Variable assignments |
|
131 |
+ states <- model$stateorder |
|
132 |
+ coordinates <- model$coordinates |
|
133 |
+ file <- paste(file,".BED", sep="") |
|
134 |
+ |
|
135 |
+ # Write first line to file |
|
136 |
+ cat("browser position chr1:1-100000\n", file=file) |
|
137 |
+ |
|
138 |
+ # Determine state of bin |
|
139 |
+ cat("Determining state of bins ...\n") |
|
140 |
+ combstates <- apply(model$posteriors,1,which.max) |
|
141 |
+ combstates <- states[combstates] |
|
142 |
+ |
|
143 |
+ if (is.null(numstates)) { |
|
144 |
+ numstates <- length(states) |
|
145 |
+ } else if (numstates > length(states)) { |
|
146 |
+ numstates <- length(states) |
|
147 |
+ } |
|
148 |
+ # Go through each state and write to file |
|
149 |
+ for (istate in states[1:numstates]) { |
|
150 |
+ cat("\nAnalyzing state",istate,"\n") |
|
151 |
+ cat(paste("track name=\"multivariate state ",istate,"\" description=\"multivariate calls for state ",istate,"\" visibility=1 color=0,0,255\n", sep=""), file=file, append=TRUE) |
|
152 |
+ |
|
153 |
+ calls <- coordinates[ combstates == istate , ] |
|
154 |
+ if (length(calls$start) >= 1) { |
|
155 |
+ # Collapse the bins |
|
156 |
+ cat("collapsing the calls\n") |
|
157 |
+ collapsed_calls <- collapse.bins(calls) |
|
158 |
+ } else { |
|
159 |
+ cat("no bins in state",istate,"\n") |
|
160 |
+ collapsed_calls <- NULL |
|
161 |
+ } |
|
162 |
+ |
|
163 |
+ if (!is.null(collapsed_calls)) { |
|
164 |
+ # Check length of chromosomes if chrom.length.file was given |
|
165 |
+ if (!is.null(chrom.length.file)) { |
|
166 |
+ chrom.lengths.df <- read.table(chrom.length.file) |
|
167 |
+ chrom.lengths <- chrom.lengths.df[,2] |
|
168 |
+ names(chrom.lengths) <- chrom.lengths.df[,1] |
|
169 |
+ for (chrom in levels(as.factor(collapsed_calls$chrom))) { |
|
170 |
+ index <- which(collapsed_calls$chrom == chrom & collapsed_calls$end > chrom.lengths[chrom]) |
|
171 |
+ collapsed_calls$end[index] <- chrom.lengths[chrom] |
|
172 |
+ if (length(index) >= 1) { |
|
173 |
+ warning("Adjusted entry in chromosome ",chrom,", because it was longer than the length of the chromosome") |
|
174 |
+ } |
|
175 |
+ } |
|
176 |
+ } |
|
177 |
+ |
|
178 |
+ cat("appending to file ...\n") |
|
179 |
+ write.table(format(as.data.frame(collapsed_calls)[,1:3], scientific=FALSE), file=file, append=TRUE, row.names=FALSE, col.names=FALSE, quote=FALSE) |
|
180 |
+ } |
|
181 |
+ } |
|
182 |
+ |
|
183 |
+} |
|
184 |
+ |
|
185 |
+ |
|
186 |
+# ---------------------------------------------------------------- |
|
187 |
+# Write signal tracks from binned data |
|
188 |
+# ---------------------------------------------------------------- |
|
189 |
+bin2wiggle <- function(binned.data.list, file="view_me_in_genome_browser") { |
|
190 |
+ |
|
191 |
+ # Check user input |
|
192 |
+ if (is.null(names(binned.data.list))) { |
|
193 |
+ stop("Please name the list entries. The names will be used as track name for the Genome Browser file.") |
|
194 |
+ } |
|
195 |
+ |
|
196 |
+ # Variables |
|
197 |
+ file <- paste(file,"wig", sep=".") |
|
198 |
+ |
|
199 |
+ # Write to file |
|
200 |
+ cat("browser hide all\n", file=file) |
|
201 |
+ |
|
202 |
+ # Go through binned.data.list |
|
203 |
+ for (i1 in 1:length(binned.data.list)) { |
|
204 |
+ binned.data <- binned.data.list[[i1]] |
|
205 |
+ names(binned.data) <- c("chrom","start","end","reads") |
|
206 |
+ |
|
207 |
+ # Write track information |
|
208 |
+ name <- names(binned.data.list)[i1] |
|
209 |
+ binsize <- diff(binned.data$start[1:2]) |
|
210 |
+ description <- paste("binned read counts ",binsize,"bp", sep="") |
|
211 |
+ cat(paste("track type=wiggle_0 name=\"",name,"\" description=\"",description,"\" visibility=full autoScale=on color=90,90,90 maxHeightPixels=100:50:20 graphType=bar priority=",i1,"\n", sep=""), file=file, append=TRUE) |
|
212 |
+ # Write read data |
|
213 |
+ for (chrom in unique(binned.data$chrom)) { |
|
214 |
+ cat(paste("fixedStep chrom=",chrom," start=1 step=",binsize," span=",binsize,"\n", sep=""), file=file, append=TRUE) |
|
215 |
+ write.table(binned.data$reads, file=file, append=TRUE, row.names=FALSE, col.names=FALSE) |
|
216 |
+ } |
|
217 |
+ } |
|
218 |
+ |
|
219 |
+} |
|
220 |
+ |
|
221 |
+ |
|
222 |
+# ---------------------------------------------------------------- |
|
223 |
+# Write color-coded (multivariate) combinatorial states to file |
|
224 |
+# ---------------------------------------------------------------- |
|
225 |
+multivariate2bed <- function(model, separate.tracks=FALSE, exclude.state.zero=TRUE, numstates=NULL, file="view_me_in_genome_browser", chrom.length.file=NULL) { |
|
226 |
+ |
|
227 |
+ # Variables |
|
228 |
+ file <- paste(file,".bed", sep="") |
|
229 |
+ combstates <- model$stateorder |
|
230 |
+ if (is.null(numstates)) { |
|
231 |
+ numstates <- length(combstates) |
|
232 |
+ } else if (numstates > length(combstates)) { |
|
233 |
+ numstates <- length(combstates) |
|
234 |
+ } |
|
235 |
+ if (exclude.state.zero) { |
|
236 |
+ combstates2use <- setdiff(combstates,0) |
|
237 |
+ combstates2use <- combstates2use[1:numstates] |
|
238 |
+ combstates2use <- combstates2use[!is.na(combstates2use)] |
|
239 |
+ numstates <- length(combstates2use) |
|
240 |
+ } |
|
241 |
+ |
|
242 |
+ # Collapse the calls |
|
243 |
+ calls <- cbind(model$coordinates, name=model$states) |
|
244 |
+ collapsed.calls <- collapse.bins(calls, column2collapseBy=4) |
|
245 |
+ # Select only desired states |
|
246 |
+ mask <- rep(FALSE,nrow(collapsed.calls)) |
|
247 |
+ for (istate in combstates2use) { |
|
248 |
+ mask <- mask | istate==collapsed.calls$name |
|
249 |
+ } |
|
250 |
+ collapsed.calls <- collapsed.calls[mask,] |
|
251 |
+ |
|
252 |
+ # Check length of chromosomes if chrom.length.file was given |
|
253 |
+ if (!is.null(chrom.length.file)) { |
|
254 |
+ chrom.lengths.df <- read.table(chrom.length.file) |
|
255 |
+ chrom.lengths <- chrom.lengths.df[,2] |
|
256 |
+ names(chrom.lengths) <- chrom.lengths.df[,1] |
|
257 |
+ for (chrom in levels(as.factor(collapsed.calls$chrom))) { |
|
258 |
+ index <- which(collapsed.calls$chrom == chrom & collapsed.calls$end > chrom.lengths[chrom]) |
|
259 |
+ collapsed.calls$end[index] <- chrom.lengths[chrom] |
|
260 |
+ if (length(index) >= 1) { |
|
261 |
+ warning("Adjusted entry in chromosome ",chrom,", because it was longer than the length of the chromosome") |
|
262 |
+ } |
|
263 |
+ } |
|
264 |
+ } |
|
265 |
+ |
|
266 |
+ # Generate the colors for each combinatorial state |
|
267 |
+ colors <- colors()[grep(colors(), pattern="white|grey|gray|snow", invert=T)] |
|
268 |
+ step <- length(colors) %/% numstates |
|
269 |
+ colors <- colors[seq(1,by=step,length=numstates)] |
|
270 |
+ RGBs <- t(col2rgb(colors)) |
|
271 |
+ RGBs <- apply(RGBs,1,paste,collapse=",") |
|
272 |
+ itemRgb <- RGBs[as.factor(collapsed.calls$name)] |
|
273 |
+ |
|
274 |
+ # Write to file |
|
275 |
+ cat("browser hide all\n", file=file) |
|
276 |
+ numsegments <- nrow(collapsed.calls) |
|
277 |
+ df <- cbind(collapsed.calls, score=rep(0,numsegments), strand=rep(".",numsegments), thickStart=collapsed.calls$start, thickEnd=collapsed.calls$end, itemRgb=itemRgb) |
|
278 |
+ |
|
279 |
+ if (separate.tracks) { |
|
280 |
+ for (istate in combstates2use) { |
|
281 |
+ priority <- 100 + which(istate==combstates2use) |
|
282 |
+ cat(paste("track name=\"comb.state ",istate,"\" description=\"multivariate calls for combinatorial state ",istate,"\" visibility=1 itemRgb=On priority=",priority,"\n", sep=""), file=file, append=TRUE) |
|
283 |
+ write.table(format(df[df$name==istate,], scientific=FALSE), file=file, append=TRUE, row.names=FALSE, col.names=FALSE, quote=FALSE) |
|
284 |
+ } |
|
285 |
+ } else { |
|
286 |
+ cat(paste("track name=\"comb.state\" description=\"multivariate combinatorial states\" visibility=1 itemRgb=On priority=100\n", sep=""), file=file, append=TRUE) |
|
287 |
+ write.table(format(df, scientific=FALSE), file=file, append=TRUE, row.names=FALSE, col.names=FALSE, quote=FALSE) |
|
288 |
+ } |
|
289 |
+ |
|
290 |
+} |
|
291 |
+ |
0 | 292 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,23 @@ |
1 |
+get.states <- function(model, threshold=0.5, separate.zeroinflation=FALSE) { |
|
2 |
+ numbins <- nrow(model$posteriors) |
|
3 |
+ states <- rep(NA,numbins) |
|
4 |
+ if (is(model,"chromStar.univariate.model")) { |
|
5 |
+ if (separate.zeroinflation) { |
|
6 |
+ states[ model$posteriors[,3]<=threshold & model$posteriors[,2]<=model$posteriors[,1] ] <- 1 |
|
7 |
+ states[ model$posteriors[,3]<=threshold & model$posteriors[,2]>=model$posteriors[,1] ] <- 2 |
|
8 |
+ states[ model$posteriors[,3]>threshold ] <- 3 |
|
9 |
+ states <- as.factor(c("zeroinflation","unmodified","modified"))[states] |
|
10 |
+ } else { |
|
11 |
+ states <- ifelse(model$posteriors[,3]>threshold, 2, 1) |
|
12 |
+ states <- as.factor(c("unmodified","modified"))[states] |
|
13 |
+ } |
|
14 |
+ return(states) |
|
15 |
+ } else if (is(model,"chromStar.multivariate.model")) { |
|
16 |
+ states <- model$stateorder[apply(model$posteriors, 1, which.max)] |
|
17 |
+ return(states) |
|
18 |
+ } else { |
|
19 |
+ stop("Supply either a univariate or multivariate chromStar model") |
|
20 |
+ } |
|
21 |
+ |
|
22 |
+} |
|
23 |
+ |
0 | 24 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,52 @@ |
1 |
+mixedorder = function (x) |
|
2 |
+{ |
|
3 |
+ if (length(x) < 1) |
|
4 |
+ return(NULL) |
|
5 |
+ else if (length(x) == 1) |
|
6 |
+ return(1) |
|
7 |
+ if (is.numeric(x)) |
|
8 |
+ return(order(x)) |
|
9 |
+ delim = "\\$\\@\\$" |
|
10 |
+ numeric <- function(x) { |
|
11 |
+ suppressWarnings(as.numeric(x)) |
|
12 |
+ } |
|
13 |
+ nonnumeric <- function(x) { |
|
14 |
+ suppressWarnings(ifelse(is.na(as.numeric(x)), toupper(x), |
|
15 |
+ NA)) |
|
16 |
+ } |
|
17 |
+ x <- as.character(x) |
|
18 |
+ which.nas <- which(is.na(x)) |
|
19 |
+ which.blanks <- which(x == "") |
|
20 |
+ if (length(which.blanks) > 0) |
|
21 |
+ x[which.blanks] <- -Inf |
|
22 |
+ if (length(which.nas) > 0) |
|
23 |
+ x[which.nas] <- Inf |
|
24 |
+ delimited <- gsub("([+-]{0,1}[0-9]+\\.{0,1}[0-9]*([eE][\\+\\-]{0,1}[0-9]+\\.{0,1}[0-9]*){0,1})", |
|
25 |
+ paste(delim, "\\1", delim, sep = ""), x) |
|
26 |
+ step1 <- strsplit(delimited, delim) |
|
27 |
+ step1 <- lapply(step1, function(x) x[x > ""]) |
|
28 |
+ step1.numeric <- lapply(step1, numeric) |
|
29 |
+ step1.character <- lapply(step1, nonnumeric) |
|
30 |
+ maxelem <- max(sapply(step1, length)) |
|
31 |
+ step1.numeric.t <- lapply(1:maxelem, function(i) sapply(step1.numeric, |
|
32 |
+ function(x) x[i])) |
|
33 |
+ step1.character.t <- lapply(1:maxelem, function(i) sapply(step1.character, |
|
34 |
+ function(x) x[i])) |
|
35 |
+ rank.numeric <- sapply(step1.numeric.t, rank) |
|
36 |
+ rank.character <- sapply(step1.character.t, function(x) as.numeric(factor(x))) |
|
37 |
+ rank.numeric[!is.na(rank.character)] <- 0 |
|
38 |
+ rank.character <- t(t(rank.character) + apply(matrix(rank.numeric), |
|
39 |
+ 2, max, na.rm = TRUE)) |
|
40 |
+ rank.overall <- ifelse(is.na(rank.character), rank.numeric, |
|
41 |
+ rank.character) |
|
42 |
+ order.frame <- as.data.frame(rank.overall) |
|
43 |
+ if (length(which.nas) > 0) |
|
44 |
+ order.frame[which.nas, ] <- Inf |
|
45 |
+ retval <- do.call("order", order.frame) |
|
46 |
+ return(retval) |
|
47 |
+} |
|
48 |
+ |
|
49 |
+mixedsort = function (x) |
|
50 |
+{ |
|
51 |
+ x[mixedorder(x)] |
|
52 |
+} |
0 | 53 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,146 @@ |
1 |
+# ------------------------------------------------------------ |
|
2 |
+# Plot a read histogram with univariate fits |
|
3 |
+# ------------------------------------------------------------ |
|
4 |
+plot.distribution <- function(model, state=NULL, chr=NULL, start=NULL, end=NULL) { |
|
5 |
+ |
|
6 |
+ ## Load libraries |
|
7 |
+# library(ggplot2) |
|
8 |
+ |
|
9 |
+ # ----------------------------------------- |
|
10 |
+ # Get right x limit |
|
11 |
+ get_rightxlim <- function(histdata, reads) { |
|
12 |
+ rightxlim1 <- median(reads[reads>0])*7 |
|
13 |
+ breaks <- histdata$breaks[1:length(histdata$counts)] |
|
14 |
+ counts <- histdata$counts |
|
15 |
+ rightxlim2 <- breaks[counts<=5 & breaks>median(reads)][1] |
|
16 |
+ rightxlim <- min(c(rightxlim1,rightxlim2), na.rm=TRUE) |
|
17 |
+ return(rightxlim) |
|
18 |
+ } |
|
19 |
+ |
|
20 |
+ ## Plot settings |
|
21 |
+ cols <- c("unmodified"="gray48","modified"="orangered3", "total"="black") |
|
22 |
+ |
|
23 |
+ # Select the rows to plot |
|
24 |
+ selectmask <- rep(TRUE,length(model$reads)) |
|
25 |
+ numchrom <- length(table(model$coordinates$chrom)) |
|
26 |
+ if (!is.null(chr)) { |
|
27 |
+ if (! chr %in% levels(model$coordinates$chrom)) { |
|
28 |
+ stop(chr," can't be found in the model coordinates.") |
|
29 |
+ } |
|
30 |
+ selectchrom <- model$coordinates$chrom == chr |
|
31 |
+ selectmask <- selectmask & selectchrom |
|
32 |
+ numchrom <- 1 |
|
33 |
+ } |
|
34 |
+ if (numchrom == 1) { |
|
35 |
+ if (!is.null(start)) { |
|
36 |
+ selectstart <- model$coordinates$start >= start |
|
37 |
+ selectmask <- selectmask & selectstart |
|
38 |
+ } |
|
39 |
+ if (!is.null(end)) { |
|
40 |
+ selectend <- model$coordinates$end <= end |
|
41 |
+ selectmask <- selectmask & selectend |
|
42 |
+ } |
|
43 |
+ } |
|
44 |
+ if (!is.null(state)) { |
|
45 |
+ states <- get.states(model) |
|
46 |
+ selectmask <- selectmask & states==state |
|
47 |
+ } |
|
48 |
+ if (length(which(selectmask)) != length(model$reads)) { |
|
49 |
+ reads <- model$reads[selectmask] |
|
50 |
+ posteriors <- model$posteriors[selectmask,] |
|
51 |
+ softweights <- apply(posteriors,2,mean) |
|
52 |
+ } else { |
|
53 |
+ reads <- model$reads |
|
54 |
+ softweights <- model$softweights |
|
55 |
+ } |
|
56 |
+ |
|
57 |
+ # Find the x limits |
|
58 |
+ breaks <- max(reads) |
|
59 |
+ if (max(reads)==0) { breaks <- 1 } |
|
60 |
+ histdata <- hist(reads, right=FALSE, breaks=breaks, plot=FALSE) |
|
61 |
+ rightxlim <- get_rightxlim(histdata, reads) |
|
62 |
+ |
|
63 |
+ # Plot the histogram |
|
64 |
+ ggplt <- ggplot(data.frame(reads)) + geom_histogram(aes(x=reads, y=..density..), binwidth=1, color='black', fill='white') + xlim(0,rightxlim) + theme_bw() + xlab("read count") |
|
65 |
+ |
|
66 |
+ ### Add fits to the histogram |
|
67 |
+ numstates <- length(softweights) |
|
68 |
+ x <- 0:rightxlim |
|
69 |
+ distributions <- data.frame(x) |
|
70 |
+ |
|
71 |
+ # Unmodified |
|
72 |
+ distributions$unmodified <- (1-softweights[3]) * dzinbinom(x, softweights[1], model$distributions[2,'r'], model$distributions[2,'p']) |
|
73 |
+ # Modified |
|
74 |
+ distributions$modified <- softweights[3] * dnbinom(x, model$distributions[3,'r'], model$distributions[3,'p']) |
|
75 |
+ # Total |
|
76 |
+ distributions$total <- distributions$unmodified + distributions$modified |
|
77 |
+ |
|
78 |
+ ### Plot the distributions |
|
79 |
+ if (is.null(state)) { |
|
80 |
+ ggplt <- ggplt + geom_line(aes(x=x, y=unmodified, color="unmodified", group=1), data=distributions, size=1) |
|
81 |
+ ggplt <- ggplt + geom_line(aes(x=x, y=modified, color="modified", group=1), data=distributions, size=1) |
|
82 |
+ ggplt <- ggplt + geom_line(aes(x=x, y=total, color="total", group=1), data=distributions, size=1) |
|
83 |
+ } else { |
|
84 |
+ if (state==0) ggplt <- ggplt + geom_line(aes(x=x, y=unmodified, color="unmodified", group=1), data=distributions, size=1) |
|
85 |
+ if (state==1) ggplt <- ggplt + geom_line(aes(x=x, y=modified, color="modified", group=1), data=distributions, size=1) |
|
86 |
+ } |
|
87 |
+ |
|
88 |
+ # Make legend and colors correct |
|
89 |
+ ggplt <- ggplt + scale_color_manual(name="components", values=cols) |
|
90 |
+ |
|
91 |
+ return(ggplt) |
|
92 |
+ |
|
93 |
+} |
|
94 |
+ |
|
95 |
+# ------------------------------------------------------------ |
|
96 |
+# Plot a read histogram in normal space of the given state |
|
97 |
+# ------------------------------------------------------------ |
|
98 |
+plot.distribution.normal <- function(model, state=0) { |
|
99 |
+ |
|
100 |
+ ## Load libraries |
|
101 |
+# library(ggplot2) |
|
102 |
+ |
|
103 |
+ ## Intercept user input |
|
104 |
+ if (state!=0 & state!=1) { stop("state has to be either 0 or 1") } |
|
105 |
+ |
|
106 |
+ ## Plot settings |
|
107 |
+ cols <- c("unmodified"="gray48","modified"="orangered3") |
|
108 |
+ |
|
109 |
+ ## Transform the reads |
|
110 |
+ states <- get.states(model) |
|
111 |
+ df <- data.frame(bin=1:length(model$reads), reads=model$reads, state=as.factor(states)) |
|
112 |
+ # Transform to uniform space |
|
113 |
+ df$ureads[df$state==0] <- pzinbinom(df$reads[df$state==0], model$softweights[1], model$distributions[2,'r'], model$distributions[2,'p']) |
|
114 |
+ df$ureads[df$state==1] <- pnbinom(df$reads[df$state==1], model$distributions[3,'r'], model$distributions[3,'p']) |
|
115 |
+ # Transform to normal space |
|
116 |
+ df$nreads <- qnorm(df$ureads) |
|
117 |
+ |
|
118 |
+ ## Make the plots |
|
119 |
+ subset <- df$nreads[df$state==state] |
|
120 |
+ breaks <- c(-Inf,sort(as.numeric(names(table(subset))))) |
|
121 |
+ x <- seq(-4,4,0.1) |
|
122 |
+ title <- paste("Transformed emission density for state ",state, sep="") |
|
123 |
+ ggplt <- ggplot() + geom_histogram(data=data.frame(ureads=subset), aes(x=ureads, y=..density..), breaks=breaks, right=TRUE, col='black', fill=cols[state+1]) + theme_bw() + geom_line(data=data.frame(x=x, y=dnorm(x, mean=0, sd=1)), aes(x=x, y=y)) + xlab("transformed reads") + ylim(0,0.5) + labs(title=title) |
|
124 |
+ return(ggplt) |
|
125 |
+ |
|
126 |
+} |
|
127 |
+ |
|
128 |
+# ------------------------------------------------------------ |
|
129 |
+# Plot a boxplot of the univariate calls |
|
130 |
+# ------------------------------------------------------------ |
|
131 |
+plot.boxplot <- function(model) { |
|
132 |
+ |
|
133 |
+ ## Load libraries |
|
134 |
+# library(ggplot2) |
|
135 |
+ |
|
136 |
+ ## Plot settings |
|
137 |
+ cols <- c("unmodified"="gray48","modified"="orangered3") |
|
138 |
+ |
|
139 |
+ ## Boxplot |
|
140 |
+ components <- c("unmodified","modified")[as.factor(get.states(model))] |
|
141 |
+ df <- data.frame(component=components, reads=model$reads) |
|
142 |
+ ggplt <- ggplot() + theme_bw() + geom_boxplot(data=df, aes(x=component, y=reads, fill=component)) + scale_fill_manual(values=cols) |
|
143 |
+ return(ggplt) |
|
144 |
+ |
|
145 |
+} |
|
146 |
+ |
0 | 147 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,138 @@ |
1 |
+simulate.univariate = function(coordinates, transition, emission, initial=1) { |
|
2 |
+ |
|
3 |
+ # Calculate some variables |
|
4 |
+ numstates = ncol(transition) |
|
5 |
+ if (numstates!=3) { |
|
6 |
+ stop("The transition matrix is expected to have 3 columns and 3 rows") |
|
7 |
+ } |
|
8 |
+ numbins = nrow(coordinates) |
|
9 |
+ |
|
10 |
+ ## Make state vector from transition matrix |
|
11 |
+ # Generate sample of random numbers |
|
12 |
+ rsample = runif(numbins,0,1) |
|
13 |
+ # Integrate transition matrix by row and add -1 in front |
|
14 |
+ cumtransition = cbind(rep(-1,numstates), t(apply(transition, 1, cumsum))) |
|
15 |
+ # Generate the state vector by going through each state |
|
16 |
+ cat("Generating states from transition matrix...") |
|
17 |
+ states = matrix(rep(NA,numstates*numbins), ncol=numstates) |
|
18 |
+ for (irow in 1:numstates) { |
|
19 |
+ states[,irow] = findInterval(rsample, cumtransition[irow,], rightmost.closed=TRUE) |
|
20 |
+ } |
|
21 |
+ statevec = rep(NA,numbins) |
|
22 |
+ statevec[1] = initial |
|
23 |
+ for (ibin in 2:numbins) { |
|
24 |
+ statevec[ibin] = states[ibin,statevec[ibin-1]] |
|
25 |
+ } |
|
26 |
+ cat(" done\n") |
|
27 |
+ |
|
28 |
+ ## Make reads from state vector and distributions |
|
29 |
+ # Generate the read counts by drawing from distribution |
|
30 |
+ cat("Generating reads from emission parameters and states...") |
|
31 |
+ reads = rep(NA, numbins) |
|
32 |
+ numbins.in.state = aggregate(rep(1,length(statevec)), list(state=statevec), sum) |
|
33 |
+ reads[statevec==1] = 0 |
|
34 |
+ if (!is.na(numbins.in.state[2,'x'])) { |
|
35 |
+ reads[statevec==2] = rnbinom(numbins.in.state[2,'x'], size=emission[2,'r'], prob=emission[2,'p']) |
|
36 |
+ } |
|
37 |
+ if (!is.na(numbins.in.state[3,'x'])) { |
|
38 |
+ reads[statevec==3] = rnbinom(numbins.in.state[3,'x'], size=emission[3,'r'], prob=emission[3,'p']) |
|
39 |
+ } |
|
40 |
+ cat(" done\n") |
|
41 |
+ |
|
42 |
+ # Return the output |
|
43 |
+ out = list(coordinates = coordinates, |
|
44 |
+ states = statevec, |
|
45 |
+ reads = reads, |
|
46 |
+ transition = transition, |
|
47 |
+ emission = emission |
|
48 |
+ ) |
|
49 |
+ return(out) |
|
50 |
+ |
|
51 |
+} |
|
52 |
+ |
|
53 |
+ |
|
54 |
+ |
|
55 |
+simulate.multivariate = function(coordinates, transition, emissions, weights, sigma, use.states, initial=1) { |
|
56 |
+ |
|
57 |
+ lib = require(mvtnorm) |
|
58 |
+ if (lib == FALSE) { |
|
59 |
+ install.packages("mvtnorm") |
|
60 |
+ library(mvtnorm) |
|
61 |
+ } |
|
62 |
+ |
|
63 |
+ # Calculate some variables |
|
64 |
+ numstates = ncol(transition) |
|
65 |
+ numbins = nrow(coordinates) |
|
66 |
+ nummod = length(emissions) |
|
67 |
+ |
|
68 |
+ ## Make state vector from transition matrix |
|
69 |
+ # Generate sample of random numbers |
|
70 |
+ rsample = runif(numbins,0,1) |
|
71 |
+ # Integrate transition matrix by row and add -1 in front |
|
72 |
+ cumtransition = cbind(rep(-1,numstates), t(apply(transition, 1, cumsum))) |
|
73 |
+ # Generate the state vector by going through each state |
|
74 |
+ cat("Generating states from transition matrix...") |
|
75 |
+ states = matrix(rep(NA,numstates*numbins), ncol=numstates) |
|
76 |
+ for (irow in 1:numstates) { |
|
77 |
+ states[,irow] = findInterval(rsample, cumtransition[irow,], rightmost.closed=TRUE) |
|
78 |
+ } |
|
79 |
+ statevec = rep(NA,numbins) |
|
80 |
+ statevec[1] = initial |
|
81 |
+ for (ibin in 2:numbins) { |
|
82 |
+ statevec[ibin] = states[ibin,statevec[ibin-1]] |
|
83 |
+ } |
|
84 |
+ # Replace the states by combinatorial states |
|
85 |
+ statevec = use.states[statevec] |
|
86 |
+ cat(" done\n") |
|
87 |
+ |
|
88 |
+ ## Make reads from state vector and emission distributions |
|
89 |
+ reads = matrix(rep(NA, nummod*numbins), ncol=nummod) |
|
90 |
+ |
|
91 |
+ rs = unlist(lapply(emissions, "[", 2:3, 'r')) |
|
92 |
+ ps = unlist(lapply(emissions, "[", 2:3, 'p')) |
|
93 |
+ ws = unlist(lapply(weights, "[", 1)) |
|
94 |
+ for (istate in use.states) { |
|
95 |
+ cat("Generating reads for state ",istate,"\n") |
|
96 |
+ i = which(use.states==istate) |
|
97 |
+ |
|
98 |
+ # Convert istate to binary representation |
|
99 |
+ binary_state = rev(as.integer(intToBits(istate))[1:nummod]) |
|
100 |
+ binary_stateTF = unlist(list(c(TRUE,FALSE), c(FALSE,TRUE))[binary_state+1]) |
|
101 |
+ |
|
102 |
+ # Draw from the multivariate normal |
|
103 |
+ n = length(which(statevec==istate)) |
|
104 |
+ if (n == 0) next |
|
105 |
+ cat("drawing from multivariate normal... \r") |
|
106 |
+ z = matrix(rmvnorm(n, mean=rep(0,nummod), sigma=sigma[,,i]), ncol=nummod) |
|
107 |
+ # Transform to uniform space |
|
108 |
+ cat("transforming to uniform space... \r") |
|
109 |
+ u = matrix(apply(z, 2, pnorm), ncol=nummod) |
|
110 |
+ # Transform to count space using marginals |
|
111 |
+ cat("transform to count space... \r") |
|
112 |
+ irs = rs[binary_stateTF] |
|
113 |
+ ips = ps[binary_stateTF] |
|
114 |
+ for (imod in 1:nummod) { |
|
115 |
+ mask = statevec==istate |
|
116 |
+ if (binary_state[imod] == 0) { |
|
117 |
+ reads[mask,imod] = qzinbinom(u[,imod], w=ws[imod], size=irs[imod], prob=ips[imod]) |
|
118 |
+ } else { |
|
119 |
+ reads[mask,imod] = qnbinom(u[,imod], size=irs[imod], prob=ips[imod]) |
|
120 |
+ } |
|
121 |
+ } |
|
122 |
+ cat(" \r") |
|
123 |
+ |
|
124 |
+ } |
|
125 |
+ |
|
126 |
+ # Return the output |
|
127 |
+ out = list(coordinates = coordinates, |
|
128 |
+ states = statevec, |
|
129 |
+ reads = reads, |
|
130 |
+ emissions = emissions, |
|
131 |
+ transition = transition, |
|
132 |
+ sigma = sigma, |
|
133 |
+ use.states = use.states, |
|
134 |
+ weights = weights |
|
135 |
+ ) |
|
136 |
+ return(out) |
|
137 |
+ |
|
138 |
+} |
0 | 139 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,165 @@ |
1 |
+univariate.from.binned.data = function(binned.data, eps=0.001, max.time=-1, max.it=-1, num.trials=1, eps.try=NULL, num.threads=1, output.if.not.converged=FALSE, filter.reads=TRUE) { |
|
2 |
+ |
|
3 |
+ ## Intercept user input |
|
4 |
+ if (check.positive(eps)!=0) stop("argument 'eps' expects a positive numeric") |
|
5 |
+ if (check.integer(max.time)!=0) stop("argument 'max.time' expects an integer") |
|
6 |
+ if (check.integer(max.it)!=0) stop("argument 'max.it' expects an integer") |
|
7 |
+ if (check.positive.integer(num.trials)!=0) stop("argument 'num.trials' expects a positive integer") |
|
8 |
+ if (!is.null(eps.try)) { |
|
9 |
+ if (check.positive(eps.try)!=0) stop("argument 'eps.try' expects a positive numeric") |
|
10 |
+ } |
|
11 |
+ if (check.positive.integer(num.threads)!=0) stop("argument 'num.threads' expects a positive integer") |
|
12 |
+ if (check.logical(output.if.not.converged)!=0) stop("argument 'output.if.not.converged' expects a logical (TRUE or FALSE)") |
|
13 |
+ |
|
14 |
+ |
|
15 |
+ war = NULL |
|
16 |
+ if (is.null(eps.try)) eps.try = eps |
|
17 |
+ |
|
18 |
+ coordinatenames = c("chrom","start","end","reads") |
|
19 |
+ names(binned.data) = coordinatenames |
|
20 |
+ |
|
21 |
+ ## Assign variables |
|
22 |
+ # number of states |
|
23 |
+ numstates = 3 |
|
24 |
+ # prepare densities to use |
|
25 |
+ densityNames = c("ZI", "NB", "NB") |
|
26 |
+ enum = c("ZI","NB","ZINB","Other") |
|
27 |
+ densityNames = factor(densityNames, levels=enum) |
|
28 |
+ densityNames = as.numeric(densityNames) - 1 |
|
29 |
+ # length of observation sequence |
|
30 |
+ numbins = length(binned.data$reads) |
|
31 |
+ |
|
32 |
+ # Check if there are reads in the data, otherwise HMM will blow up |
|
33 |
+ if (!any(binned.data$reads!=0)) { |
|
34 |
+ stop("All reads in data are zero. No univariate HMM done.") |
|
35 |
+ } |
|
36 |
+ |
|
37 |
+ # Filter high reads out, makes HMM faster |
|
38 |
+ if (filter.reads) { |
|
39 |
+ limit <- 10*ceiling(var(binned.data$reads)) |
|
40 |
+ mask <- binned.data$reads > limit |
|
41 |
+ binned.data$reads[mask] <- limit |
|
42 |
+ numfiltered <- length(which(mask)) |
|
43 |
+ if (numfiltered > 0) { |
|
44 |
+ warning(paste("There are very high read counts (probably artificial) in your data. Replaced read counts > ",limit," (10*variance) by ",limit," in ",numfiltered," bins. Set option 'filter.reads=FALSE' to disable this filtering.", sep="")) |
|
45 |
+ } |
|
46 |
+ } |
|
47 |
+ |
|
48 |
+ |
|
49 |
+ ## Call univariate in a for loop to enable multiple trials |
|
50 |
+ modellist = list() |
|
51 |
+ for (i_try in 1:num.trials) { |
|
52 |
+ cat("\n\nTry ",i_try," of ",num.trials," ------------------------------\n") |
|
53 |
+ z = .C("R_univariate_hmm", |
|
54 |
+ as.integer(binned.data$reads), # double* O |
|
55 |
+ as.integer(numbins), # int* T |
|
56 |
+ as.integer(numstates), # int* N |
|
57 |
+ as.integer(densityNames), # I pass them but they are fixed |
|
58 |
+ double(length=numstates), # double* r |
|
59 |
+ double(length=numstates), # double* p |
|
60 |
+ as.integer(max.it), # int* maxiter |
|
61 |
+ as.integer(max.time), # double* maxtime |
|
62 |
+ as.double(eps.try), # double* eps |
|
63 |
+ double(length=numbins * numstates), # double* post |
|
64 |
+ double(length=numstates*numstates), # double* A |
|
65 |
+ double(length=numstates), # double* proba |
|
66 |
+ double(length=1), # double* loglik |
|
67 |
+ double(length=numstates), # double* softweights |
|
68 |
+ double(length=numstates), # double* initial r |
|
69 |
+ double(length=numstates), # double* initial p |
|
70 |
+ double(length=numstates*numstates), # double* initial A |
|
71 |
+ double(length=numstates), # double* initial proba |
|
72 |
+ as.logical(0), # bool* use_initial_params |
|
73 |
+ as.integer(num.threads) # int* num_threads |
|
74 |
+ ) |
|
75 |
+ |
|
76 |
+ model = NULL |
|
77 |
+ model$loglik = z[[13]] |
|
78 |
+ model$iteration = z[[7]] |
|
79 |
+ model$time.in.sec = z[[8]] |
|
80 |
+ model$delta.loglik = z[[9]] |
|
81 |
+ model$epsilon = eps.try |
|
82 |
+ model$proba = z[[12]] |
|
83 |
+ model$A = matrix(z[[11]], ncol=numstates, byrow=TRUE) |
|
84 |
+ model$distributions = cbind(r=z[[5]], p=z[[6]]) |
|
85 |
+ model$softweights = z[[14]] |
|
86 |
+ model$proba.initial = z[[18]] |
|
87 |
+ model$A.initial = matrix(z[[17]], ncol=numstates, byrow=TRUE) |
|
88 |
+ model$distributions.initial = cbind(r=z[[15]], p=z[[16]]) |
|
89 |
+ model$num.threads = z[[20]] |
|
90 |
+ if (num.trials > 1) { |
|
91 |
+ if (model$delta.loglik > model$epsilon) { |
|
92 |
+ warning("HMM did not converge in trial run ",i_try,"!\n") |
|
93 |
+ } |
|
94 |
+ # Store model in list |
|
95 |
+ modellist[[i_try]] = model |
|
96 |
+ } |
|
97 |
+ } |
|
98 |
+ |
|
99 |
+ if (num.trials > 1) { |
|
100 |
+ |
|
101 |
+ # Select fit with best loglikelihood |
|
102 |
+ indexmax = which.max(unlist(lapply(modellist,"[[","loglik"))) |
|
103 |
+ model = modellist[[indexmax]] |
|
104 |
+ |
|
105 |
+ # Rerun the HMM with different epsilon and initial parameters from trial run |
|
106 |
+ cat("\n\nRerunning try ",indexmax," with eps =",eps,"--------------------\n") |
|
107 |
+ z = .C("R_univariate_hmm", |
|
108 |
+ as.integer(binned.data$reads), # double* O |
|
109 |
+ as.integer(numbins), # int* T |
|
110 |
+ as.integer(numstates), # int* N |
|
111 |
+ as.integer(densityNames), # I pass them but they are fixed |
|
112 |
+ double(length=numstates), # double* r |
|
113 |
+ double(length=numstates), # double* p |
|
114 |
+ as.integer(max.it), # int* maxiter |
|
115 |
+ as.integer(max.time), # double* maxtime |
|
116 |
+ as.double(eps), # double* eps |
|
117 |
+ double(length=numbins * numstates), # double* post |
|
118 |
+ double(length=numstates*numstates), # double* A |
|
119 |
+ double(length=numstates), # double* proba |
|
120 |
+ double(length=1), # double* loglik |
|
121 |
+ double(length=numstates), # double* softweights |
|
122 |
+ as.vector(model$distributions[,'r']), # double* initial r |
|
123 |
+ as.vector(model$distributions[,'p']), # double* initial p |
|
124 |
+ as.vector(model$A), # double* initial A |
|
125 |
+ as.vector(model$proba), # double* initial proba |
|
126 |
+ as.logical(1), # bool* use_initial_params |
|
127 |
+ as.integer(num.threads) # int* num_threads |
|
128 |
+ ) |
|
129 |
+ } |
|
130 |
+ |
|
131 |
+ model = NULL |
|
132 |
+ model$coordinates = binned.data[,coordinatenames[1:3]] |
|
133 |
+ model$reads = binned.data$reads |
|
134 |
+ model$posteriors = matrix(z[[10]], ncol=numstates) |
|
135 |
+ colnames(model$posteriors) = c("P(state = zero inflation)","P(state = unmodified)","P(state = modified)") |
|
136 |
+ class(model) = "chromStar.univariate.model" |
|
137 |
+ model$states = get.states(model) |
|
138 |
+ model$states.with.zeroinflation = get.states(model, separate.zeroinflation=TRUE) |
|
139 |
+ model$loglik = z[[13]] |
|
140 |
+ model$iteration = z[[7]] |
|
141 |
+ model$time.in.sec = z[[8]] |
|
142 |
+ model$delta.loglik = z[[9]] |
|
143 |
+ model$epsilon = eps |
|
144 |
+ model$proba = z[[12]] |
|
145 |
+ model$A = matrix(z[[11]], ncol=numstates, byrow=TRUE) |
|
146 |
+ model$distributions = cbind(r=z[[5]], p=z[[6]]) |
|
147 |
+ model$softweights = z[[14]] |
|
148 |
+ model$proba.initial = z[[18]] |
|
149 |
+ model$A.initial = matrix(z[[17]], ncol=numstates, byrow=TRUE) |
|
150 |
+ model$distributions.initial = cbind(r=z[[15]], p=z[[16]]) |
|
151 |
+ model$num.threads = z[[20]] |
|
152 |
+ if (num.trials == 1) { |
|
153 |
+ if (model$delta.loglik > model$epsilon) { |
|
154 |
+ war = warning("HMM did not converge!\n") |
|
155 |
+ } |
|
156 |
+ } |
|
157 |
+ |
|
158 |
+ if (!is.null(war)) { |
|
159 |
+ if (output.if.not.converged == TRUE) { |
|
160 |
+ return(model) |
|
161 |
+ } |
|
162 |
+ } else { |
|
163 |
+ return(model) |
|
164 |
+ } |
|
165 |
+} |
0 | 166 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,66 @@ |
1 |
+dzinbinom = function(x, w, size, prob, mu) { |
|
2 |
+ if (w < 0 || w > 1) { |
|
3 |
+ warning("NAs returned, w needs to be between 0 and 1") |
|
4 |
+ return(rep(NA, length(x))) |
|
5 |
+ } |
|
6 |
+ regular.density = dnbinom(x, size=size, prob=prob, mu=mu) |
|
7 |
+ zi = rep(0, length(x)) |
|
8 |
+ zi[x==0] = w |
|
9 |
+ density = zi + (1-w)*regular.density |
|
10 |
+ |
|
11 |
+ return(density) |
|
12 |
+} |
|
13 |
+ |
|
14 |
+pzinbinom = function(q, w, size, prob, mu, lower.tail=TRUE) { |
|
15 |
+ if (w < 0 || w > 1) { |
|
16 |
+ warning("NAs returned, w needs to be between 0 and 1") |
|
17 |
+ return(rep(NA, length(q))) |
|
18 |
+ } |
|
19 |
+ # Use stepfun to make also non-integer values possible |
|
20 |
+ x = 0:max(q) |
|
21 |
+ cdf = stepfun(x, c(0, cumsum(dzinbinom(x, w, size, prob, mu)))) |
|
22 |
+ p = cdf(q) |
|
23 |
+ p[p>1] = 1 |
|
24 |
+ |
|
25 |
+ if (lower.tail==TRUE) { |
|
26 |
+ return(p) |
|
27 |
+ } else { |
|
28 |
+ return(1-p) |
|
29 |
+ } |
|
30 |
+} |
|
31 |
+ |
|
32 |
+qzinbinom = function(p, w, size, prob, mu, lower.tail=TRUE) { |
|
33 |
+ if (w < 0 || w > 1) { |
|
34 |
+ warning("NAs returned, w needs to be between 0 and 1") |
|
35 |
+ return(rep(NA, length(q))) |
|
36 |
+ } |
|
37 |
+ x = 0:5000 |
|
38 |
+ cdf = pzinbinom(x, w, size, prob, mu) |
|
39 |
+ inv.cdf = stepfun(cdf[-length(cdf)], x) |
|
40 |
+ q = inv.cdf(p) |
|
41 |
+ |
|
42 |
+ if (lower.tail==TRUE) { |
|
43 |
+ return(q) |
|
44 |
+ } else { |
|
45 |
+ return(rev(q)) |
|
46 |
+ } |
|
47 |
+} |
|
48 |
+ |
|
49 |
+rzinbinom = function(n, w, size, prob, mu) { |
|
50 |
+ if (w < 0 || w > 1) { |
|
51 |
+ warning("NAs returned, w needs to be between 0 and 1") |
|
52 |
+ return(rep(NA, length(q))) |
|
53 |
+ } |
|
54 |
+ r = rnbinom(n, size=size, prob=prob, mu=mu) |
|
55 |
+ if (length(n)==1) { |
|
56 |
+ i = sample(n, round(w * n)) |
|
57 |
+ if (n < 10) warning("n < 10: Random numbers may not be reliable") |
|
58 |
+ } else { |
|
59 |
+ i = sample(n, round(w * length(n))) |
|
60 |
+ if (length(n) < 10) warning("length(n) < 10: Random numbers may not be reliable") |
|
61 |
+ } |
|
62 |
+ r[i] = 0 |
|
63 |
+ |
|
64 |
+ return(r) |
|
65 |
+} |
|
66 |
+ |
0 | 67 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,436 @@ |
1 |
+10 |
|
2 |
+ |
|
3 |
+dir |
|
4 |
+821 |
|
5 |
+svn+ssh://maria@141.80.187.138/home/histoneHMM/svn/histoneHMM/branches/groningen/Rpackage/histoneHMM/man |
|
6 |
+svn+ssh://maria@141.80.187.138/home/histoneHMM/svn |
|
7 |
+ |
|
8 |
+ |
|
9 |
+ |
|
10 |
+2013-10-07T16:06:14.303223Z |
|
11 |
+801 |
|
12 |
+heinig |
|
13 |
+ |
|
14 |
+ |
|
15 |
+ |
|
16 |
+ |
|
17 |
+ |
|
18 |
+ |
|
19 |
+ |
|
20 |
+ |
|
21 |
+ |
|
22 |
+ |
|
23 |
+ |
|
24 |
+ |
|
25 |
+ |
|
26 |
+ |
|
27 |
+72ef0f4a-728c-0410-918a-5b46bb2d7104 |
|
28 |
+ |
|
29 |
+zinbacopula.hmm.posterior.Rd |
|
30 |
+file |
|
31 |
+ |
|
32 |
+ |
|
33 |
+ |
|
34 |
+ |
|
35 |
+2013-10-21T16:01:10.000000Z |
|
36 |
+bee76a313dfbe41bed927bf74a4227bf |
|
37 |
+2013-10-07T16:06:14.303223Z |
|
38 |
+801 |
|
39 |
+heinig |
|
40 |
+ |
|
41 |
+ |
|
42 |
+ |
|
43 |
+ |
|
44 |
+ |
|
45 |
+ |
|
46 |
+ |
|
47 |
+ |
|
48 |
+ |
|
49 |
+ |
|
50 |
+ |
|
51 |
+ |
|
52 |
+ |
|
53 |
+ |
|
54 |
+ |
|
55 |
+ |
|
56 |
+ |
|
57 |
+ |
|
58 |
+ |
|
59 |
+ |
|
60 |
+ |
|
61 |
+2424 |
|
62 |
+ |
|
63 |
+em.Rd |
|
64 |
+file |
|
65 |
+ |
|
66 |
+ |
|
67 |
+ |
|
68 |
+ |
|
69 |
+2013-10-21T16:01:10.000000Z |
|
70 |
+53932cc64b0e6b28da2d3f2f02ca9594 |
|
71 |
+2013-10-07T16:06:14.303223Z |
|
72 |
+801 |
|
73 |
+heinig |
|
74 |
+ |
|
75 |
+ |
|
76 |
+ |
|
77 |
+ |
|
78 |
+ |
|
79 |
+ |
|
80 |
+ |
|
81 |
+ |
|
82 |
+ |
|
83 |
+ |
|
84 |
+ |
|
85 |
+ |
|
86 |
+ |
|
87 |
+ |
|
88 |
+ |
|
89 |
+ |
|
90 |
+ |
|
91 |
+ |
|
92 |
+ |
|
93 |
+ |
|
94 |
+ |
|
95 |
+1631 |
|
96 |
+ |
|
97 |
+callRegions.Rd |
|
98 |
+file |
|
99 |
+ |
|
100 |
+ |
|
101 |
+ |
|
102 |
+ |
|
103 |
+2013-10-21T14:01:40.000000Z |
|
104 |
+a67e330a1954b8ebff32e3eed41f6b58 |
|
105 |
+2013-10-07T16:06:14.303223Z |
|
106 |
+801 |
|
107 |
+heinig |
|
108 |
+ |
|
109 |
+ |
|
110 |
+ |
|
111 |
+ |
|
112 |
+ |
|
113 |
+ |
|
114 |
+ |
|
115 |
+ |
|
116 |
+ |
|
117 |
+ |
|
118 |
+ |
|
119 |
+ |
|
120 |
+ |
|
121 |
+ |
|
122 |
+ |
|
123 |
+ |
|
124 |
+ |
|
125 |
+ |
|
126 |
+ |
|
127 |
+ |
|
128 |
+ |
|
129 |
+2911 |
|
130 |
+ |
|
131 |
+histoneHMM-package.Rd |
|