From: Kasper Daniel Hansen <kasperdanielhansen@gmail.com>
git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/bsseq@110074 bc3139a8-67e5-0310-9ffc-ced21a209358
5 | 6 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,33 @@ |
1 |
+language: r |
|
2 |
+sudo: required |
|
3 |
+ |
|
4 |
+# Choose your operating system(s) (default: linux) |
|
5 |
+os: |
|
6 |
+ - linux |
|
7 |
+ - osx |
|
8 |
+ |
|
9 |
+# Bioconductor-specific stuff. If targetting bioc-devel then need to use |
|
10 |
+# "bioc_use_devel: true", otherwise remove or comment-out this line. |
|
11 |
+# TODO: Should --timings be added to r_check_args to mimic BioC build machines? |
|
12 |
+bioc_required: true |
|
13 |
+bioc_use_devel: true |
|
14 |
+# On CRAN all warnings are treated as errors; this is not true on Bioconductor. |
|
15 |
+warnings_are_errors: false |
|
16 |
+ |
|
17 |
+# Need this if also using covr to get code coverage |
|
18 |
+r_github_packages: |
|
19 |
+ - jimhester/covr |
|
20 |
+after_success: |
|
21 |
+ - Rscript -e 'covr::codecov()' |
|
22 |
+ |
|
23 |
+# bsseq-specific stuff |
|
24 |
+r_build_args: "--no-build-vignettes --no-manual" |
|
25 |
+# TODO: Travis is reporting an error when checking the vignette, hence the |
|
26 |
+# inclusion of --no-vignettes in r_check_args |
|
27 |
+r_check_args: "--no-vignettes" |
|
28 |
+# Must 'manually' install packages in SUGGESTS in order to run full tests |
|
29 |
+r_binary_packages: |
|
30 |
+ - RUnit |
|
31 |
+bioc_packages: |
|
32 |
+ - bsseqData |
|
33 |
+ - BiocStyle |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
Package: bsseq |
2 |
-Version: 1.7.0 |
|
2 |
+Version: 1.7.1 |
|
3 | 3 |
Title: Analyze, manage and store bisulfite sequencing data |
4 | 4 |
Description: A collection of tools for analyzing and visualizing bisulfite |
5 | 5 |
sequencing data. |
... | ... |
@@ -10,12 +10,13 @@ Depends: |
10 | 10 |
R (>= 2.15), |
11 | 11 |
methods, |
12 | 12 |
BiocGenerics, |
13 |
- IRanges (>= 2.1.10), |
|
14 | 13 |
GenomicRanges (>= 1.19.6), |
15 | 14 |
SummarizedExperiment (>= 0.1.1), |
16 | 15 |
parallel, |
17 |
- GenomeInfoDb |
|
16 |
+ limma |
|
18 | 17 |
Imports: |
18 |
+ IRanges (>= 2.1.10), |
|
19 |
+ GenomeInfoDb, |
|
19 | 20 |
scales, |
20 | 21 |
stats, |
21 | 22 |
graphics, |
... | ... |
@@ -29,7 +30,8 @@ Imports: |
29 | 30 |
Suggests: |
30 | 31 |
RUnit, |
31 | 32 |
bsseqData, |
32 |
- BiocStyle |
|
33 |
+ BiocStyle, |
|
34 |
+ knitr |
|
33 | 35 |
Collate: |
34 | 36 |
hasGRanges.R |
35 | 37 |
BSseq_class.R |
... | ... |
@@ -47,8 +49,12 @@ Collate: |
47 | 49 |
fisher.R |
48 | 50 |
permutations.R |
49 | 51 |
BSmooth.fstat.R |
52 |
+ BSseqStat_class.R |
|
53 |
+ getStats.R |
|
50 | 54 |
License: Artistic-2.0 |
55 |
+VignetteBuilder: knitr |
|
51 | 56 |
URL: https://github.com/kasperdanielhansen/bsseq |
52 |
-BugReports: https://github.com/kasperdanielhansen/bsseq/issues |
|
53 | 57 |
LazyData: yes |
58 |
+LazyDataCompression: xz |
|
59 |
+BugReports: https://github.com/kasperdanielhansen/bsseq/issues |
|
54 | 60 |
biocViews: DNAMethylation |
... | ... |
@@ -31,6 +31,7 @@ import(S4Vectors) |
31 | 31 |
importFrom(gtools, "combinations") |
32 | 32 |
importFrom(data.table, "fread", "setnames") |
33 | 33 |
importFrom(R.utils, "isGzipped", "gunzip") |
34 |
+import(limma) |
|
34 | 35 |
|
35 | 36 |
## |
36 | 37 |
## Exporting |
... | ... |
@@ -40,6 +41,7 @@ importFrom(R.utils, "isGzipped", "gunzip") |
40 | 41 |
exportClasses("hasGRanges", |
41 | 42 |
"BSseq", |
42 | 43 |
"BSseqTstat", |
44 |
+ "BSseqStat", |
|
43 | 45 |
"matrixOrNULL") |
44 | 46 |
|
45 | 47 |
exportMethods("[", "show", |
... | ... |
@@ -64,7 +66,8 @@ export("BSseq", "getMeth", "getCoverage", "getBSseq", "getStats", |
64 | 66 |
"plotRegion", "plotManyRegions", |
65 | 67 |
"read.umtab", "read.umtab2", "read.bsmooth", "read.bismark", |
66 | 68 |
"poissonGoodnessOfFit", "binomialGoodnessOfFit", |
67 |
- "data.frame2GRanges", "BSseqTstat") |
|
69 |
+ "data.frame2GRanges", "BSseqTstat", |
|
70 |
+ "BSseqStat", "BSmooth.fstat", "smoothSds", "computeStat") |
|
68 | 71 |
|
69 | 72 |
S3method("print", "chisqGoodnessOfFit") |
70 | 73 |
S3method("plot", "chisqGoodnessOfFit") |
... | ... |
@@ -1,6 +1,46 @@ |
1 |
-BSmooth.fstat <- function(BSseq, group1, group2, estimate.var = c("same", "paired", "group2"), |
|
2 |
- local.correct = TRUE, maxGap = NULL, qSd = 0.75, k = 101, mc.cores = 1, verbose = TRUE){ |
|
3 |
- smoothSd <- function(Sds, k) { |
|
1 |
+BSmooth.fstat <- function(BSseq, design, contrasts, returnModelCoefficients = FALSE, verbose = TRUE){ |
|
2 |
+ stopifnot(is(BSseq, "BSseq")) |
|
3 |
+ stopifnot(hasBeenSmoothed(BSseq)) |
|
4 |
+ |
|
5 |
+ ## if(any(rowSums(getCoverage(BSseq)[, unlist(groups)]) == 0)) |
|
6 |
+ ## warning("Computing t-statistics at locations where there is no data; consider subsetting the 'BSseq' object first") |
|
7 |
+ |
|
8 |
+ if(verbose) cat("[BSmooth.fstat] fitting linear models ... ") |
|
9 |
+ ptime1 <- proc.time() |
|
10 |
+ allPs <- getMeth(BSseq, type = "smooth", what = "perBase", |
|
11 |
+ confint = FALSE) |
|
12 |
+ fit <- lmFit(allPs, design) |
|
13 |
+ fitC <- contrasts.fit(fit, contrasts) |
|
14 |
+ ## Need |
|
15 |
+ ## fitC$coefficients, fitC$stdev.unscaled, fitC$sigma, fitC$cov.coefficients |
|
16 |
+ ## actuall just need |
|
17 |
+ ## tstats <- fitC$coefficients / fitC$stdev.unscaled / fitC$sigma |
|
18 |
+ ## rawSds <- fitC$sigma |
|
19 |
+ ## cor.coefficients <- cov2cor(fitC$cov.coefficients) |
|
20 |
+ rawSds <- fitC$sigma |
|
21 |
+ cor.coefficients <- cov2cor(fitC$cov.coefficients) |
|
22 |
+ rawTstats <- fitC$coefficients / fitC$stdev.unscaled / fitC$sigma |
|
23 |
+ names(dimnames(rawTstats)) <- NULL |
|
24 |
+ ptime2 <- proc.time() |
|
25 |
+ stime <- (ptime2 - ptime1)[3] |
|
26 |
+ if(verbose) cat(sprintf("done in %.1f sec\n", stime)) |
|
27 |
+ |
|
28 |
+ parameters <- c(BSseq@parameters, |
|
29 |
+ list(design = design, contrasts = contrasts)) |
|
30 |
+ stats <- list(rawSds = rawSds, |
|
31 |
+ cor.coefficients = cor.coefficients, |
|
32 |
+ rawTstats = rawTstats) |
|
33 |
+ if(returnModelCoefficients) { |
|
34 |
+ stats$modelCoefficients <- fit$coefficients |
|
35 |
+ } |
|
36 |
+ out <- BSseqStat(gr = granges(BSseq), |
|
37 |
+ stats = stats, parameters = parameters) |
|
38 |
+ out |
|
39 |
+} |
|
40 |
+ |
|
41 |
+smoothSds <- function(BSseqStat, k = 101, qSd = 0.75, mc.cores = 1, |
|
42 |
+ maxGap = 10^8, verbose = TRUE) { |
|
43 |
+ smoothSd <- function(Sds, k, qSd) { |
|
4 | 44 |
k0 <- floor(k/2) |
5 | 45 |
if(all(is.na(Sds))) return(Sds) |
6 | 46 |
thresSD <- pmax(Sds, quantile(Sds, qSd, na.rm = TRUE), na.rm = TRUE) |
... | ... |
@@ -8,9 +48,65 @@ BSmooth.fstat <- function(BSseq, group1, group2, estimate.var = c("same", "paire |
8 | 48 |
sSds <- as.vector(runmean(Rle(c(addSD, thresSD, addSD)), k = k)) |
9 | 49 |
sSds |
10 | 50 |
} |
11 |
- compute.correction <- function(idx, qSd = 0.75) { |
|
12 |
- xx <- start(BSseq)[idx] |
|
13 |
- yy <- tstat[idx] |
|
51 |
+ if(is.null(maxGap)) |
|
52 |
+ maxGap <- BSseqStat@parameters[["maxGap"]] |
|
53 |
+ if(is.null(maxGap)) |
|
54 |
+ stop("need to set argument 'maxGap'") |
|
55 |
+ if(verbose) cat("[smoothSds] preprocessing ... ") |
|
56 |
+ ptime1 <- proc.time() |
|
57 |
+ clusterIdx <- makeClusters(granges(BSseqStat), maxGap = maxGap) |
|
58 |
+ ptime2 <- proc.time() |
|
59 |
+ stime <- (ptime2 - ptime1)[3] |
|
60 |
+ if(verbose) cat(sprintf("done in %.1f sec\n", stime)) |
|
61 |
+ smoothSds <- do.call("c", |
|
62 |
+ mclapply(clusterIdx, function(idx) { |
|
63 |
+ smoothSd(getStats(BSseqStat, what = "rawSds")[idx], k = k, qSd = qSd) |
|
64 |
+ }, mc.cores = mc.cores)) |
|
65 |
+ if("smoothSds" %in% names(getStats(BSseqStat))) |
|
66 |
+ BSseqStat@stats[["smoothSds"]] <- stat |
|
67 |
+ else |
|
68 |
+ BSseqStat@stats <- c(getStats(BSseqStat), list(smoothSds = smoothSds)) |
|
69 |
+ BSseqStat |
|
70 |
+} |
|
71 |
+ |
|
72 |
+ |
|
73 |
+computeStat <- function(BSseqStat, coef = NULL) { |
|
74 |
+ stopifnot(is(BSseqStat, "BSseqStat")) |
|
75 |
+ if(is.null(coef)) { |
|
76 |
+ coef <- 1:ncol(BSseqStat$rawTstats) |
|
77 |
+ } |
|
78 |
+ tstats <- getStats(BSseqStat, what = "rawTstats")[, coef, drop = FALSE] |
|
79 |
+ tstats <- tstats * getStats(BSseqStat, what = "rawSds") / |
|
80 |
+ getStats(BSseqStat, what = "smoothSds") |
|
81 |
+ if(length(coef) > 1) { |
|
82 |
+ cor.coefficients <- getStats(BSseqStat, what = "cor.coefficients")[coef,coef] |
|
83 |
+ stat <- as.numeric(classifyTestsF(tstats, cor.coefficients, |
|
84 |
+ fstat.only = TRUE)) |
|
85 |
+ stat.type <- "fstat" |
|
86 |
+ } else { |
|
87 |
+ stat <- as.numeric(tstats) |
|
88 |
+ stat.type <- "tstat" |
|
89 |
+ } |
|
90 |
+ if("stat" %in% names(getStats(BSseqStat))) { |
|
91 |
+ BSseqStat@stats[["stat"]] <- stat |
|
92 |
+ BSseqStat@stats[["stat.type"]] <- stat.type |
|
93 |
+ } else { |
|
94 |
+ BSseqStat@stats <- c(getStats(BSseqStat), |
|
95 |
+ list(stat = stat, stat.type = stat.type)) |
|
96 |
+ } |
|
97 |
+ BSseqStat |
|
98 |
+} |
|
99 |
+ |
|
100 |
+localCorrectStat <- function(BSseqStat, threshold = c(-15,15), mc.cores = 1, verbose = TRUE) { |
|
101 |
+ compute.correction <- function(idx) { |
|
102 |
+ xx <- start(BSseqTstat)[idx] |
|
103 |
+ yy <- tstat[idx] ## FIXME |
|
104 |
+ if(!is.null(threshold)) { |
|
105 |
+ stopifnot(is.numeric(threshold) && length(threshold) == 2) |
|
106 |
+ stopifnot(threshold[1] < 0 && threshold[2] > 0) |
|
107 |
+ yy[yy < threshold[1]] <- threshold[1] |
|
108 |
+ yy[yy > threshold[2]] <- threshold[2] |
|
109 |
+ } |
|
14 | 110 |
suppressWarnings({ |
15 | 111 |
drange <- diff(range(xx, na.rm = TRUE)) |
16 | 112 |
}) |
... | ... |
@@ -24,115 +120,19 @@ BSmooth.fstat <- function(BSseq, group1, group2, estimate.var = c("same", "paire |
24 | 120 |
correction <- predict(fit, newdata = data.frame(xx.reg = xx)) |
25 | 121 |
yy - correction |
26 | 122 |
} |
27 |
- |
|
28 |
- estimate.var <- match.arg(estimate.var) |
|
29 |
- stopifnot(is(BSseq, "BSseq")) |
|
30 |
- stopifnot(hasBeenSmoothed(BSseq)) |
|
31 |
- if(is.character(group1)) { |
|
32 |
- stopifnot(all(group1 %in% sampleNames(BSseq))) |
|
33 |
- group1 <- match(group1, sampleNames(BSseq)) |
|
34 |
- } |
|
35 |
- if(is.numeric(group1)) { |
|
36 |
- stopifnot(min(group1) >= 1 & max(group1) <= ncol(BSseq)) |
|
37 |
- } else stop("problems with argument 'group1'") |
|
38 |
- if(is.character(group2)) { |
|
39 |
- stopifnot(all(group2 %in% sampleNames(BSseq))) |
|
40 |
- group2 <- match(group2, sampleNames(BSseq)) |
|
41 |
- } |
|
42 |
- if(is.numeric(group2)) { |
|
43 |
- stopifnot(min(group2) >= 1 & max(group2) <= ncol(BSseq)) |
|
44 |
- } else stop("problems with argument 'group2'") |
|
45 |
- stopifnot(length(intersect(group1, group2)) == 0) |
|
46 |
- stopifnot(length(group1) > 0) |
|
47 |
- stopifnot(length(group2) > 0) |
|
48 |
- stopifnot(length(group1) + length(group2) >= 3) |
|
49 |
- if(estimate.var == "paired") |
|
50 |
- stopifnot(length(group1) == length(group2)) |
|
51 |
- |
|
52 |
- if(any(rowSums(getCoverage(BSseq)[, c(group1, group2)]) == 0)) |
|
53 |
- warning("Computing f-statistics at locations where there is no data; consider subsetting the 'BSseq' object first") |
|
54 |
- |
|
55 |
- if(is.null(maxGap)) |
|
56 |
- maxGap <- BSseq@parameters$maxGap |
|
57 |
- if(is.null(maxGap)) |
|
58 |
- stop("need to set argument 'maxGap'") |
|
59 |
- |
|
60 |
- if(verbose) cat("[BSmooth.fstat] preprocessing ... ") |
|
123 |
+ maxGap <- BSseqStat$parameters$maxGap |
|
124 |
+ if(verbose) cat("[BSmooth.tstat] preprocessing ... ") |
|
61 | 125 |
ptime1 <- proc.time() |
62 |
- clusterIdx <- makeClusters(BSseq, maxGap = maxGap) |
|
126 |
+ clusterIdx <- makeClusters(BSseqStat$gr, maxGap = maxGap) |
|
63 | 127 |
ptime2 <- proc.time() |
64 | 128 |
stime <- (ptime2 - ptime1)[3] |
65 | 129 |
if(verbose) cat(sprintf("done in %.1f sec\n", stime)) |
66 |
- |
|
67 |
- if(verbose) cat("[BSmooth.fstat] computing stats within groups ... ") |
|
68 |
- ptime1 <- proc.time() |
|
69 |
- allPs <- getMeth(BSseq, type = "smooth", what = "perBase", |
|
70 |
- confint = FALSE) |
|
71 |
- group1.means <- rowMeans(allPs[, group1, drop = FALSE], na.rm = TRUE) |
|
72 |
- group2.means <- rowMeans(allPs[, group2, drop = FALSE], na.rm = TRUE) |
|
73 |
- ptime2 <- proc.time() |
|
74 |
- stime <- (ptime2 - ptime1)[3] |
|
75 |
- if(verbose) cat(sprintf("done in %.1f sec\n", stime)) |
|
76 |
- |
|
77 |
- if(verbose) cat("[BSmooth.fstat] computing stats across groups ... ") |
|
78 |
- ptime1 <- proc.time() |
|
79 |
- switch(estimate.var, |
|
80 |
- "group2" = { |
|
81 |
- rawSds <- rowSds(allPs[, group2, drop = FALSE], na.rm = TRUE) |
|
82 |
- smoothSds <- do.call(c, mclapply(clusterIdx, function(idx) { |
|
83 |
- smoothSd(rawSds[idx], k = k) |
|
84 |
- }, mc.cores = mc.cores)) |
|
85 |
- scale <- sqrt(1/length(group1) + 1/length(group2)) |
|
86 |
- tstat.sd <- smoothSds * scale |
|
87 |
- }, |
|
88 |
- "same" = { |
|
89 |
- rawSds <- sqrt( ((length(group1) - 1) * rowVars(allPs[, group1, drop = FALSE]) + |
|
90 |
- (length(group2) - 1) * rowVars(allPs[, group2, drop = FALSE])) / |
|
91 |
- (length(group1) + length(group2) - 2)) |
|
92 |
- smoothSds <- do.call(c, mclapply(clusterIdx, function(idx) { |
|
93 |
- smoothSd(rawSds[idx], k = k) |
|
94 |
- }, mc.cores = mc.cores)) |
|
95 |
- scale <- sqrt(1/length(group1) + 1/length(group2)) |
|
96 |
- tstat.sd <- smoothSds * scale |
|
97 |
- }, |
|
98 |
- "paired" = { |
|
99 |
- rawSds <- rowSds(allPs[, group1, drop = FALSE] - allPs[, group2, drop = FALSE]) |
|
100 |
- smoothSds <- do.call(c, mclapply(clusterIdx, function(idx) { |
|
101 |
- smoothSd(rawSds[idx], k = k) |
|
102 |
- }, mc.cores = mc.cores)) |
|
103 |
- scale <- sqrt(1/length(group1)) |
|
104 |
- tstat.sd <- smoothSds * scale |
|
105 |
- }) |
|
106 |
- tstat <- (group1.means - group2.means) / tstat.sd |
|
107 |
- is.na(tstat)[tstat.sd == 0] <- TRUE |
|
108 |
- if(local.correct) { |
|
109 |
- tstat.corrected <- do.call(c, mclapply(clusterIdx, |
|
110 |
- compute.correction, qSd = qSd, |
|
111 |
- mc.cores = mc.cores)) |
|
112 |
- } |
|
113 |
- ptime2 <- proc.time() |
|
114 |
- stime <- (ptime2 - ptime1)[3] |
|
115 |
- if(verbose) cat(sprintf("done in %.1f sec\n", stime)) |
|
116 |
- |
|
117 |
- if(local.correct) { |
|
118 |
- stats <- cbind(rawSds, tstat.sd, group2.means, group1.means, |
|
119 |
- tstat, tstat.corrected) |
|
120 |
- colnames(stats) <- c("rawSds", "tstat.sd", |
|
121 |
- "group2.means", "group1.means", "tstat", |
|
122 |
- "tstat.corrected") |
|
123 |
- |
|
124 |
- } else { |
|
125 |
- stats <- cbind(rawSds, tstat.sd, group2.means, group1.means, |
|
126 |
- tstat) |
|
127 |
- colnames(stats) <- c("rawSds", "tstat.sd", |
|
128 |
- "group2.means", "group1.means", "tstat") |
|
129 |
- } |
|
130 |
- |
|
131 |
- parameters <- c(BSseq@parameters, |
|
132 |
- list(tstatText = sprintf("BSmooth.fstat (local.correct = %s, maxGap = %d)", |
|
133 |
- local.correct, maxGap), |
|
134 |
- group1 = group1, group2 = group2, k = k, qSd = qSd, |
|
135 |
- local.correct = local.correct, maxGap = maxGap)) |
|
136 |
- out <- BSseqTstat(gr = granges(BSseq), stats = stats, parameters = parameters) |
|
137 |
- out |
|
130 |
+ stat <- BSseqStat$stat |
|
131 |
+ stat.corrected <- do.call(c, mclapply(clusterIdx, compute.correction, |
|
132 |
+ mc.cores = mc.cores)) |
|
133 |
+ BSseqTstat@stats <- cbind(getStats(BSseqTstat), |
|
134 |
+ "tstat.corrected" = stat.corrected) |
|
135 |
+ BSseqTstat@parameters$local.local <- TRUE |
|
136 |
+ BSseqTstat |
|
138 | 137 |
} |
138 |
+ |
139 | 139 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,92 @@ |
1 |
+setClass("BSseqStat", contains = "hasGRanges", |
|
2 |
+ representation(stats = "list", |
|
3 |
+ parameters = "list") |
|
4 |
+ ) |
|
5 |
+ |
|
6 |
+setValidity("BSseqStat", function(object) { |
|
7 |
+ msg <- NULL |
|
8 |
+ if(is.null(names(object@stats)) || any(names(object@stats) == "") || |
|
9 |
+ anyDuplicated(names(object@stats))) |
|
10 |
+ msg <- validMsg(msg, "the 'stats' list needs to be named with unique names.") |
|
11 |
+ for(name in c("rawSds", "smoothsSds", "stat")) { |
|
12 |
+ if(name %in% names(object@stats) && !is.vector(object@stats[[name]])) |
|
13 |
+ msg <- validMsg(msg, sprintf("component '%s' of slot 'stats' have to be a vector", name)) |
|
14 |
+ if(name %in% names(object@stats) && length(object@stats[[name]]) != length(object@gr)) |
|
15 |
+ msg <- validMsg(msg, sprintf("component '%s' of slot 'stats' have to have the same length as slot 'gr'", name)) |
|
16 |
+ } |
|
17 |
+ for(name in c("rawTstats")) { |
|
18 |
+ if(name %in% names(object@stats) && !is.matrix(object@stats[[name]])) |
|
19 |
+ msg <- validMsg(msg, sprintf("component '%s' of slot 'stats' have to be a matrix", name)) |
|
20 |
+ if(name %in% names(object@stats) && nrow(object@stats[[name]]) != length(object@gr)) |
|
21 |
+ msg <- validMsg(msg, sprintf("component '%s' of slot 'stats' have to have the same number of rows as slot 'gr' is long", name)) |
|
22 |
+ } |
|
23 |
+ if(is.null(msg)) TRUE else msg |
|
24 |
+}) |
|
25 |
+ |
|
26 |
+setMethod("show", signature(object = "BSseqStat"), |
|
27 |
+ function(object) { |
|
28 |
+ cat("An object of type 'BSseqStat' with\n") |
|
29 |
+ cat(" ", length(object), "methylation loci\n") |
|
30 |
+ cat("based on smoothed data:\n") |
|
31 |
+ cat(" ", object@parameters$smoothText, "\n") |
|
32 |
+ cat("with parameters\n") |
|
33 |
+ cat(" ", object@parameters$tstatText, "\n") |
|
34 |
+ }) |
|
35 |
+ |
|
36 |
+setMethod("[", "BSseqStat", function(x, i, ...) { |
|
37 |
+ if(missing(i)) |
|
38 |
+ stop("need [i] for subsetting") |
|
39 |
+ if(missing(i)) |
|
40 |
+ return(x) |
|
41 |
+ x@gr <- x@gr[i] |
|
42 |
+ x@stats <- lapply(names(x@stats), function(nam) { |
|
43 |
+ if(nam %in% c("rawTstats")) { |
|
44 |
+ stopifnot(is.matrix(x@stats[[nam]])) |
|
45 |
+ return(x@stats[[nam]][i,,drop=FALSE]) |
|
46 |
+ } |
|
47 |
+ if(nam %in% c("rawSds", "smoothSds", "stat")) { |
|
48 |
+ stopifnot(is.vector(x@stats[[nam]])) |
|
49 |
+ return(x@stats[[nam]][i]) |
|
50 |
+ } |
|
51 |
+ x@stats[[nam]] |
|
52 |
+ }) |
|
53 |
+ x |
|
54 |
+}) |
|
55 |
+ |
|
56 |
+BSseqStat <- function(gr = NULL, stats = NULL, parameters = NULL) { |
|
57 |
+ out <- new("BSseqStat") |
|
58 |
+ out@gr <- gr |
|
59 |
+ out@stats <- stats |
|
60 |
+ out@parameters <- parameters |
|
61 |
+ out |
|
62 |
+} |
|
63 |
+ |
|
64 |
+ |
|
65 |
+## summary.BSseqStat <- function(object, ...) { |
|
66 |
+## quant <- quantile(getStats(object)[, "tstat.corrected"], |
|
67 |
+## prob = c(0.0001, 0.001, 0.01, 0.5, 0.99, 0.999, 0.9999)) |
|
68 |
+## quant <- t(t(quant)) |
|
69 |
+## colnames(quant) <- "quantiles" |
|
70 |
+## out <- list(quantiles = quant) |
|
71 |
+## class(out) <- "summary.BSseqStat" |
|
72 |
+## out |
|
73 |
+## } |
|
74 |
+ |
|
75 |
+## print.summary.BSseqStat <- function(x, ...) { |
|
76 |
+## print(as.matrix(x$quantiles)) |
|
77 |
+## } |
|
78 |
+ |
|
79 |
+## plot.BSseqStat <- function(x, y, ...) { |
|
80 |
+## tstat <- getStats(x)[, "tstat"] |
|
81 |
+## plot(density(tstat), xlim = c(-10,10), col = "blue", main = "") |
|
82 |
+## if("tstat.corrected" %in% colnames(getStats(x))) { |
|
83 |
+## tstat.cor <- getStats(x)[, "tstat.corrected"] |
|
84 |
+## lines(density(tstat.cor), col = "black") |
|
85 |
+## legend("topleft", legend = c("uncorrected", "corrected"), lty = c(1,1), |
|
86 |
+## col = c("blue", "black")) |
|
87 |
+## } else { |
|
88 |
+## legend("topleft", legend = c("uncorrected"), lty = 1, |
|
89 |
+## col = c("blue")) |
|
90 |
+## } |
|
91 |
+## } |
|
92 |
+ |
... | ... |
@@ -65,42 +65,3 @@ plot.BSseqTstat <- function(x, y, ...) { |
65 | 65 |
} |
66 | 66 |
} |
67 | 67 |
|
68 |
-getStats <- function(BSseqTstat, regions = NULL, stat = "tstat.corrected") { |
|
69 |
- stopifnot(is(BSseqTstat, "BSseqTstat")) |
|
70 |
- if(is.null(regions)) |
|
71 |
- return(BSseqTstat@stats) |
|
72 |
- if(class(regions) == "data.frame") |
|
73 |
- regions <- data.frame2GRanges(regions) |
|
74 |
- stopifnot(stat %in% colnames(BSseqTstat@stats)) |
|
75 |
- stopifnot(length(stat) == 1) |
|
76 |
- stopifnot(is(regions, "GenomicRanges")) |
|
77 |
- ov <- findOverlaps(BSseqTstat, regions) |
|
78 |
- ov.sp <- split(queryHits(ov), subjectHits(ov)) |
|
79 |
- getRegionStats <- function(idx) { |
|
80 |
- mat <- BSseqTstat@stats[idx,, drop=FALSE] |
|
81 |
- areaStat <- sum(mat[, stat]) |
|
82 |
- maxStat <- max(mat[, stat]) |
|
83 |
- c(areaStat, maxStat) |
|
84 |
- } |
|
85 |
- stats <- matrix(NA, ncol = 2, nrow = length(regions)) |
|
86 |
- colnames(stats) <- c("areaStat", "maxStat") |
|
87 |
- tmp <- lapply(ov.sp, getRegionStats) |
|
88 |
- stats[as.integer(names(tmp)),] <- do.call(rbind, tmp) |
|
89 |
- out <- as.data.frame(stats) |
|
90 |
- if(! stat %in% c("tstat.corrected", "tstat")) |
|
91 |
- return(out) |
|
92 |
- getRegionStats_ttest <- function(idx) { |
|
93 |
- mat <- BSseqTstat@stats[idx,, drop=FALSE] |
|
94 |
- group1.mean <- mean(mat[, "group1.means"]) |
|
95 |
- group2.mean <- mean(mat[, "group2.means"]) |
|
96 |
- meanDiff <- mean(mat[, "group1.means"] - mat[, "group2.means"]) |
|
97 |
- tstat.sd <- mean(mat[, "tstat.sd"]) |
|
98 |
- c(meanDiff, group1.mean, group2.mean, tstat.sd) |
|
99 |
- } |
|
100 |
- stats_ttest<- matrix(NA, ncol = 4, nrow = length(regions)) |
|
101 |
- colnames(stats_ttest) <- c("meanDiff", "group1.mean", "group2.mean", "tstat.sd") |
|
102 |
- tmp <- lapply(ov.sp, getRegionStats_ttest) |
|
103 |
- stats_ttest[as.integer(names(tmp)),] <- do.call(rbind, tmp) |
|
104 |
- out <- cbind(out, as.data.frame(stats_ttest)) |
|
105 |
- out |
|
106 |
-} |
... | ... |
@@ -1,17 +1,23 @@ |
1 |
-dmrFinder <- function(BSseqTstat, cutoff = NULL, qcutoff = c(0.025, 0.975), |
|
1 |
+dmrFinder <- function(bstat, cutoff = NULL, qcutoff = c(0.025, 0.975), |
|
2 | 2 |
maxGap = 300, stat = "tstat.corrected", verbose = TRUE) { |
3 |
+ if(is(bstat, "BSseqTstat")) { |
|
4 |
+ if(! stat %in% colnames(bstat@stats)) |
|
5 |
+ stop("'stat' needs to be a column of 'bstat@stats'") |
|
6 |
+ dmrStat <- bstat@stats[, stat] |
|
7 |
+ } |
|
8 |
+ if(is(bstat, "BSseqStat")) { |
|
9 |
+ dmrStat <- getStats(bstat, what = "stat") |
|
10 |
+ } |
|
3 | 11 |
subverbose <- max(as.integer(verbose) - 1L, 0L) |
4 |
- if(! stat %in% colnames(BSseqTstat@stats)) |
|
5 |
- stop("'stat' needs to be a column of 'BSseqTstat@stats'") |
|
6 | 12 |
if(is.null(cutoff)) |
7 |
- cutoff <- quantile(BSseqTstat@stats[, stat], qcutoff) |
|
13 |
+ cutoff <- quantile(dmrStat, qcutoff) |
|
8 | 14 |
if(length(cutoff) == 1) |
9 | 15 |
cutoff <- c(-cutoff, cutoff) |
10 |
- direction <- as.integer(BSseqTstat@stats[, stat] >= cutoff[2]) |
|
11 |
- direction[BSseqTstat@stats[, stat] <= cutoff[1]] <- -1L |
|
16 |
+ direction <- as.integer(dmrStat >= cutoff[2]) |
|
17 |
+ direction[dmrStat <= cutoff[1]] <- -1L |
|
12 | 18 |
direction[is.na(direction)] <- 0L |
13 |
- chrs <- as.character(seqnames(BSseqTstat@gr)) |
|
14 |
- positions <- start(BSseqTstat) |
|
19 |
+ chrs <- as.character(seqnames(bstat)) |
|
20 |
+ positions <- start(bstat) |
|
15 | 21 |
regions <- regionFinder3(direction, chr = chrs, positions = positions, |
16 | 22 |
maxGap = maxGap, verbose = subverbose) |
17 | 23 |
if(is.null(regions$down) && is.null(regions$up)) |
... | ... |
@@ -22,12 +28,19 @@ dmrFinder <- function(BSseqTstat, cutoff = NULL, qcutoff = c(0.025, 0.975), |
22 | 28 |
regions$width <- regions$end - regions$start + 1 |
23 | 29 |
regions$invdensity <- regions$width / regions$n |
24 | 30 |
regions$chr <- as.character(regions$chr) |
25 |
- stats <- getStats(BSseqTstat, regions, stat = stat) |
|
26 |
- regions <- cbind(regions, stats) |
|
27 |
- if(stat %in% c("tstat.corrected", "tstat")) { |
|
28 |
- regions$direction <- ifelse(regions$meanDiff > 0, "hyper", "hypo") |
|
31 |
+ if(is(bstat, "BSseqTstat")) { |
|
32 |
+ stats <- getStats(bstat, regions, stat = stat) |
|
33 |
+ regions <- cbind(regions, stats) |
|
34 |
+ if(stat %in% c("tstat.corrected", "tstat")) { |
|
35 |
+ regions$direction <- ifelse(regions$meanDiff > 0, "hyper", "hypo") |
|
36 |
+ } |
|
37 |
+ regions <- regions[order(abs(regions$areaStat), decreasing = TRUE),] |
|
38 |
+ } |
|
39 |
+ if(is(bstat, "BSseqStat")) { |
|
40 |
+ stats <- getStats(bstat, regions) |
|
41 |
+ regions <- cbind(regions, stats) |
|
42 |
+ regions <- regions[order(abs(regions$areaStat), decreasing = TRUE),] |
|
29 | 43 |
} |
30 |
- regions <- regions[order(abs(regions$areaStat), decreasing = TRUE),] |
|
31 | 44 |
regions |
32 | 45 |
} |
33 | 46 |
|
34 | 47 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,75 @@ |
1 |
+getStats <- function(bstat, regions = NULL, ...) { |
|
2 |
+ stopifnot(is(bstat, "BSseqTstat") || is(bstat, "BSseqStat")) |
|
3 |
+ if(is(bstat, "BSseqTstat")) |
|
4 |
+ return(getStats_BSseqTstat(bstat, regions = regions, ...)) |
|
5 |
+ getStats_BSseqStat(bstat, regions = regions, ...) |
|
6 |
+} |
|
7 |
+ |
|
8 |
+getStats_BSseqStat <- function(BSseqStat, regions = NULL, what = NULL) { |
|
9 |
+ stopifnot(is(BSseqStat, "BSseqStat")) |
|
10 |
+ if(!is.null(what)) { |
|
11 |
+ stopifnot(what %in% names(BSseqStat@stats)) |
|
12 |
+ return(BSseqStat@stats[[what]]) |
|
13 |
+ } |
|
14 |
+ if(is.null(regions)) |
|
15 |
+ return(BSseqStat@stats) |
|
16 |
+ ## Now we have regions and no what |
|
17 |
+ if(class(regions) == "data.frame") |
|
18 |
+ regions <- data.frame2GRanges(regions) |
|
19 |
+ ov <- findOverlaps(BSseqStat, regions) |
|
20 |
+ ov.sp <- split(queryHits(ov), subjectHits(ov)) |
|
21 |
+ ## We need areaStat and maxStat |
|
22 |
+ ## Could need averageMeth in each group? |
|
23 |
+ ## Need to have a specific design for that |
|
24 |
+ ## Could at least get contrast coefficient |
|
25 |
+ getRegionStats <- function(idx) { |
|
26 |
+ areaStat <- sum(BSseqStat@stats$stat[idx], na.rm = TRUE) |
|
27 |
+ maxStat <- max(BSseqStat@stats$stat[idx], na.rm = TRUE) |
|
28 |
+ c(areaStat, maxStat) |
|
29 |
+ } |
|
30 |
+ regionStats <- matrix(NA, ncol = 2, nrow = length(regions)) |
|
31 |
+ colnames(regionStats) <- c("areaStat", "maxStat") |
|
32 |
+ tmp <- lapply(ov.sp, getRegionStats) |
|
33 |
+ regionStats[as.integer(names(tmp)),] <- do.call(rbind, tmp) |
|
34 |
+ regionStats |
|
35 |
+} |
|
36 |
+ |
|
37 |
+getStats_BSseqTstat <- function(BSseqTstat, regions = NULL, stat = "tstat.corrected") { |
|
38 |
+ stopifnot(is(BSseqTstat, "BSseqTstat")) |
|
39 |
+ if(is.null(regions)) |
|
40 |
+ return(BSseqTstat@stats) |
|
41 |
+ if(class(regions) == "data.frame") |
|
42 |
+ regions <- data.frame2GRanges(regions) |
|
43 |
+ stopifnot(stat %in% colnames(BSseqTstat@stats)) |
|
44 |
+ stopifnot(length(stat) == 1) |
|
45 |
+ stopifnot(is(regions, "GenomicRanges")) |
|
46 |
+ ov <- findOverlaps(BSseqTstat, regions) |
|
47 |
+ ov.sp <- split(queryHits(ov), subjectHits(ov)) |
|
48 |
+ getRegionStats <- function(idx) { |
|
49 |
+ mat <- BSseqTstat@stats[idx,, drop=FALSE] |
|
50 |
+ areaStat <- sum(mat[, stat]) |
|
51 |
+ maxStat <- max(mat[, stat]) |
|
52 |
+ c(areaStat, maxStat) |
|
53 |
+ } |
|
54 |
+ stats <- matrix(NA, ncol = 2, nrow = length(regions)) |
|
55 |
+ colnames(stats) <- c("areaStat", "maxStat") |
|
56 |
+ tmp <- lapply(ov.sp, getRegionStats) |
|
57 |
+ stats[as.integer(names(tmp)),] <- do.call(rbind, tmp) |
|
58 |
+ out <- as.data.frame(stats) |
|
59 |
+ if(! stat %in% c("tstat.corrected", "tstat")) |
|
60 |
+ return(out) |
|
61 |
+ getRegionStats_ttest <- function(idx) { |
|
62 |
+ mat <- BSseqTstat@stats[idx,, drop=FALSE] |
|
63 |
+ group1.mean <- mean(mat[, "group1.means"]) |
|
64 |
+ group2.mean <- mean(mat[, "group2.means"]) |
|
65 |
+ meanDiff <- mean(mat[, "group1.means"] - mat[, "group2.means"]) |
|
66 |
+ tstat.sd <- mean(mat[, "tstat.sd"]) |
|
67 |
+ c(meanDiff, group1.mean, group2.mean, tstat.sd) |
|
68 |
+ } |
|
69 |
+ stats_ttest<- matrix(NA, ncol = 4, nrow = length(regions)) |
|
70 |
+ colnames(stats_ttest) <- c("meanDiff", "group1.mean", "group2.mean", "tstat.sd") |
|
71 |
+ tmp <- lapply(ov.sp, getRegionStats_ttest) |
|
72 |
+ stats_ttest[as.integer(names(tmp)),] <- do.call(rbind, tmp) |
|
73 |
+ out <- cbind(out, as.data.frame(stats_ttest)) |
|
74 |
+ out |
|
75 |
+} |
... | ... |
@@ -1,8 +1,8 @@ |
1 |
-getNullPermutation <- function(BSseq, idxMatrix1, idxMatrix2, |
|
2 |
- estimate.var, local.correct, |
|
3 |
- cutoff, stat, maxGap, mc.cores = 1) { |
|
1 |
+getNullDistribution_BSmooth.tstat <- function(BSseq, idxMatrix1, idxMatrix2, |
|
2 |
+ estimate.var, local.correct, |
|
3 |
+ cutoff, stat, maxGap, mc.cores = 1) { |
|
4 | 4 |
stopifnot(nrow(idxMatrix1) == nrow(idxMatrix2)) |
5 |
- cat("[getNullPermutation] performing", nrow(idxMatrix1), "permutations\n") |
|
5 |
+ message(sprintf("[getNullDistribution_BSmooth.tstat] performing %d permutations\n", nrow(idxMatrix1))) |
|
6 | 6 |
nullDist <- mclapply(1:nrow(idxMatrix1), function(ii) { |
7 | 7 |
ptime1 <- proc.time() |
8 | 8 |
BS.tstat <- BSmooth.tstat(BSseq, estimate.var = estimate.var, |
... | ... |
@@ -13,31 +13,50 @@ getNullPermutation <- function(BSseq, idxMatrix1, idxMatrix2, |
13 | 13 |
dmrs0 <- dmrFinder(BS.tstat, stat = stat, cutoff = cutoff, maxGap = maxGap) |
14 | 14 |
ptime2 <- proc.time() |
15 | 15 |
stime <- (ptime2 - ptime1)[3] |
16 |
- cat(sprintf("[getNullPermutation] completing permutation %d in %.1f sec\n", |
|
17 |
- ii, stime)) |
|
16 |
+ message(sprintf("[getNullDistribution_BSmooth.tstat] completing permutation %d in %.1f sec\n", ii, stime)) |
|
18 | 17 |
dmrs0 |
19 | 18 |
}, mc.cores = min(nrow(idxMatrix1), mc.cores), mc.preschedule = FALSE) |
20 | 19 |
nullDist |
21 | 20 |
} |
22 | 21 |
|
23 |
-getNullBlocks <- function(BSseq, idxMatrix1, idxMatrix2, estimate.var = "same", |
|
22 |
+getNullDistribution_BSmooth.fstat <- function(BSseq, idxMatrix, design, contrasts, |
|
23 |
+ coef = NULL, cutoff, maxGap.sd, maxGap.dmr, mc.cores = 1) { |
|
24 |
+ message(sprintf("[getNullDistribution_BSmooth.fstat] performing %d permutations\n", nrow(idxMatrix))) |
|
25 |
+ nullDist <- mclapply(1:nrow(idxMatrix), function(ii) { |
|
26 |
+ ptime1 <- proc.time() |
|
27 |
+ bstat <- BSmooth.fstat(BSseq[, idxMatrix[ii,]], design = design, contrasts = contrasts) |
|
28 |
+ bstat <- smoothSds(bstat, maxGap = maxGap.sd) |
|
29 |
+ bstat <- computeStat(bstat, coef = coef) |
|
30 |
+ dmrs0 <- dmrFinder(bstat, cutoff = cutoff, maxGap = maxGap.dmr) |
|
31 |
+ ptime2 <- proc.time() |
|
32 |
+ stime <- (ptime2 - ptime1)[3] |
|
33 |
+ message(sprintf("[getNullDistribution_BSmooth.fstat] completing permutation %d in %.1f sec\n", ii, stime)) |
|
34 |
+ dmrs0 |
|
35 |
+ }, mc.cores = min(nrow(idxMatrix), mc.cores), mc.preschedule = FALSE) |
|
36 |
+ nullDist |
|
37 |
+} |
|
38 |
+ |
|
39 |
+getNullBlocks_BSmooth.tstat <- function(BSseq, idxMatrix1, idxMatrix2, estimate.var = "same", |
|
24 | 40 |
mc.cores = 1) { |
25 |
- getNullPermutation(BSseq = BSseq, idxMatrix1 = idxMatrix1, |
|
41 |
+ getNullDistribution_BSmooth.tstat(BSseq = BSseq, idxMatrix1 = idxMatrix1, |
|
26 | 42 |
idxMatrix2 = idxMatrix2, local.correct = FALSE, |
27 | 43 |
estimate.var = estimate.var, |
28 | 44 |
cutoff = c(-2,2), stat = "tstat", maxGap = 10000, |
29 | 45 |
mc.cores = mc.cores) |
30 | 46 |
} |
31 | 47 |
|
32 |
-getNullDmrs <- function(BSseq, idxMatrix1, idxMatrix2, estimate.var = "same", |
|
48 |
+getNullDmrs_BSmooth.tstat <- function(BSseq, idxMatrix1, idxMatrix2, estimate.var = "same", |
|
33 | 49 |
mc.cores = 1) { |
34 |
- getNullPermutation(BSseq = BSseq, idxMatrix1 = idxMatrix1, |
|
50 |
+ getNullDistribution_BSmooth.tstat(BSseq = BSseq, idxMatrix1 = idxMatrix1, |
|
35 | 51 |
idxMatrix2 = idxMatrix2, local.correct = TRUE, |
36 | 52 |
estimate.var = estimate.var, |
37 | 53 |
cutoff = c(-4.6,4.6), stat = "tstat.corrected", maxGap = 300, |
38 | 54 |
mc.cores = mc.cores) |
39 | 55 |
} |
40 | 56 |
|
57 |
+ |
|
58 |
+ |
|
59 |
+ |
|
41 | 60 |
subsetDmrs <- function(xx) { |
42 | 61 |
if(is.null(xx) || is(xx, "try-error")) |
43 | 62 |
return(NULL) |
... | ... |
@@ -176,17 +195,21 @@ makeIdxMatrix <- function(group1, group2, testIsSymmetric = TRUE, includeUnbalan |
176 | 195 |
} else { |
177 | 196 |
idxMatrix1 <- rbind(group1, group2, |
178 | 197 |
combineMat(subsetByMatrix(group1, combinations(6,3)), |
179 |
- subsetByMatrix(group2, combinations(6,2)))) |
|
198 |
+ subsetByMatrix(group2, combinations(6,3)))) |
|
180 | 199 |
} |
181 | 200 |
if(includeUnbalanced) { |
182 |
- newMatrix <- combineMat(subsetByMatrix(group1, combinations(6,5)), |
|
201 |
+ newMatrix1 <- combineMat(subsetByMatrix(group1, combinations(6,4)), |
|
202 |
+ subsetByMatrix(group2, combinations(6,2))) |
|
203 |
+ newMatrix2 <- combineMat(subsetByMatrix(group1, combinations(6,5)), |
|
183 | 204 |
as.matrix(group2, ncol = 1)) |
184 |
- idxMatrix1 <- rbind(idxMatrix1, newMatrix) |
|
205 |
+ idxMatrix1 <- rbind(idxMatrix1, newMatrix1, newMatrix2) |
|
185 | 206 |
} |
186 | 207 |
if(includeUnbalanced && !testIsSymmetric) { |
187 |
- newMatrix <- combineMat(as.matrix(group1, ncol = 1), |
|
188 |
- subsetByMatrix(group2, combinations(6,3))) |
|
189 |
- idxMatrix1 <- rbind(idxMatrix1, newMatrix) |
|
208 |
+ newMatrix1 <- combineMat(subsetByMatrix(group1, combinations(6,2)), |
|
209 |
+ subsetByMatrix(group2, combinations(6,4))) |
|
210 |
+ newMatrix2 <- combineMat(as.matrix(group1, ncol = 1), |
|
211 |
+ subsetByMatrix(group2, combinations(6,5))) |
|
212 |
+ idxMatrix1 <- rbind(idxMatrix1, newMatrix1, newMatrix2) |
|
190 | 213 |
} |
191 | 214 |
} |
192 | 215 |
if(is.null(idxMatrix1)) |
... | ... |
@@ -3,8 +3,16 @@ This is the developer version of Bioconductor package [bsseq](http://bioconducto |
3 | 3 |
|
4 | 4 |
```r |
5 | 5 |
source('http://bioconductor.org/biocLite.R') |
6 |
-biocLite(bsseq') |
|
6 |
+useDevel(TRUE) |
|
7 |
+biocLite('bsseq') |
|
7 | 8 |
``` |
8 | 9 |
|
9 | 10 |
## R CMD check results |
10 |
-Bioconductor: [Multiple platform build/check report](http://master.bioconductor.org/checkResults/devel/bioc-LATEST/bsseq/) |
|
11 |
+ |
|
12 |
+## Software status |
|
13 |
+ |
|
14 |
+| Resource: | Bioconductor | Travis CI | |
|
15 |
+| ------------- | ------------------- | ------------- | |
|
16 |
+| _Platforms:_ | _Multiple_ | _Linux_ | |
|
17 |
+| R CMD check | <a href="http://bioconductor.org/checkResults/release/bioc-LATEST/bsseq/"><img border="0" src="http://bioconductor.org/shields/build/release/bioc/bsseq.svg" alt="Build status"></a> (release)</br><a href="http://bioconductor.org/checkResults/devel/bioc-LATEST/bsseq/"><img border="0" src="http://bioconductor.org/shields/build/devel/bioc/bsseq.svg" alt="Build status"></a> (devel) | <a href="https://travis-ci.org/kasperdanielhansen/bsseq"><img src="https://travis-ci.org/kasperdanielhansen/bsseq.svg" alt="Build status"></a> | |
|
18 |
+| Test coverage | | <a href="https://codecov.io/github/kasperdanielhansen/bsseq?branch=master"><img src="https://codecov.io/github/kasperdanielhansen/bsseq/coverage.svg?branch=master" alt="Coverage Status"/></a> | | |
... | ... |
@@ -1,13 +1,12 @@ |
1 | 1 |
bibentry("Article", |
2 | 2 |
title = "{BSmooth: from whole genome bisulfite sequencing reads to differentially methylated regions}", |
3 |
- author = "Hansen, Kasper D. and Langmead, Benjamin and Irizarry, Rafael A.", |
|
3 |
+ author = c(person(c("Kasper", "D."), "Hansen"), |
|
4 |
+ person(c("Benjamin"), "Langmead"), |
|
5 |
+ person(c("Rafael", "A."), "Irizarry")), |
|
4 | 6 |
journal = "Genome Biology", |
5 | 7 |
year = "2012", |
6 | 8 |
volume = "13", |
9 |
+ number = "10", |
|
7 | 10 |
pages = "R83", |
8 |
- textVersion = |
|
9 |
- paste("KD Hansen, B Langmead and RA Irizarry.", |
|
10 |
- "BSmooth: from whole genome bisulfite sequencing reads to differentially methylated regions.", |
|
11 |
- "Genome Biology, 13:R83 (2012)") |
|
12 |
-) |
|
13 |
- |
|
11 |
+ doi = "10.1186/gb-2012-13-10-r83", |
|
12 |
+ pubmed = "23034175") |
... | ... |
@@ -82,3 +82,21 @@ test_makeIdxMatrix_perm5 <- function() { |
82 | 82 |
checkEquals(nrow(res), choose(10,5) / 2) |
83 | 83 |
} |
84 | 84 |
|
85 |
+test_makeIdxMatrix_perm6 <- function() { |
|
86 |
+ grp1 <- paste0("A", 1:6) |
|
87 |
+ grp2 <- paste0("B", 1:6) |
|
88 |
+ |
|
89 |
+ res <- bsseq:::makeIdxMatrix(grp1, grp2, |
|
90 |
+ testIsSymmetric = FALSE, includeUnbalanced = TRUE)[[1]] |
|
91 |
+ checkEquals(anyDuplicated(res), 0) |
|
92 |
+ checkEquals(res[1,], grp1) |
|
93 |
+ checkEquals(res[2,], grp2) |
|
94 |
+ checkEquals(nrow(res), choose(12,6)) |
|
95 |
+ |
|
96 |
+ res <- bsseq:::makeIdxMatrix(grp1, grp2, |
|
97 |
+ testIsSymmetric = TRUE, includeUnbalanced = TRUE)[[1]] |
|
98 |
+ checkEquals(anyDuplicated(res), 0) |
|
99 |
+ checkEquals(res[1,], grp1) |
|
100 |
+ checkEquals(nrow(res), choose(12,6) / 2) |
|
101 |
+} |
|
102 |
+ |
... | ... |
@@ -11,11 +11,11 @@ |
11 | 11 |
(low, high) cutoff. |
12 | 12 |
} |
13 | 13 |
\usage{ |
14 |
-dmrFinder(BSseqTstat, cutoff = NULL, qcutoff = c(0.025, 0.975), |
|
14 |
+dmrFinder(bstat, cutoff = NULL, qcutoff = c(0.025, 0.975), |
|
15 | 15 |
maxGap=300, stat = "tstat.corrected", verbose = TRUE) |
16 | 16 |
} |
17 | 17 |
\arguments{ |
18 |
- \item{BSseqTstat}{An object of class \code{BSseqTstat}.} |
|
18 |
+ \item{bstat}{An object of class \code{BSseqStat} or \code{BSseqTstat}.} |
|
19 | 19 |
\item{cutoff}{The cutoff of the t-statistics. This should be a vector |
20 | 20 |
of length two giving the (low, high) cutoff. If \code{NULL}, see |
21 | 21 |
\code{qcutoff}.} |
... | ... |
@@ -8,19 +8,26 @@ |
8 | 8 |
object. |
9 | 9 |
} |
10 | 10 |
\usage{ |
11 |
-getStats(BSseqTstat, regions = NULL, stat = "tstat.corrected") |
|
11 |
+getStats(bstat, regions = NULL, ...) |
|
12 | 12 |
} |
13 | 13 |
\arguments{ |
14 |
- \item{BSseqTstat}{An object of class \code{BSseqTstat}.} |
|
14 |
+ \item{bstat}{An object of class \code{BSseqStat} or \code{BSseqTstat}.} |
|
15 | 15 |
\item{regions}{An optional \code{data.frame} or |
16 | 16 |
\code{GenomicRanges} object specifying a number of genomic |
17 |
- regions.} |
|
18 |
- \item{stat}{Which statistics column should be obtained.} |
|
17 |
+ regions.} |
|
18 |
+ \item{...}{Additional arguments passed to the different backends based |
|
19 |
+ on the class of \code{bstat}; see Details.} |
|
19 | 20 |
} |
20 | 21 |
\value{ |
21 | 22 |
An object of class \code{data.frame} possible restricted to the |
22 | 23 |
regions specified. |
23 | 24 |
} |
25 |
+\details{ |
|
26 |
+ Additional argument when the \code{bstat} object is of class \code{BSseqTstat}: |
|
27 |
+ \describe{ |
|
28 |
+ \item{stat}{Which statistics column should be obtained.} |
|
29 |
+ } |
|
30 |
+} |
|
24 | 31 |
\author{ |
25 | 32 |
Kasper Daniel Hansen \email{khansen@jhsph.edu} |
26 | 33 |
} |
... | ... |
@@ -1,8 +1,9 @@ |
1 | 1 |
%\VignetteIndexEntry{The bsseq user's guide} |
2 | 2 |
%\VignetteDepends{bsseq} |
3 | 3 |
%\VignettePackage{bsseq} |
4 |
+%\VignetteEngine{knitr::knitr} |
|
4 | 5 |
\documentclass[12pt]{article} |
5 |
-<<style-Sweave, eval=TRUE, echo=FALSE, results=tex>>= |
|
6 |
+<<style-knitr, eval=TRUE, echo=FALSE, results="asis">>= |
|
6 | 7 |
BiocStyle::latex() |
7 | 8 |
@ |
8 | 9 |
\newcommand{\bold}[1]{\textbf{#1}} % Fix for R bibentry |
... | ... |
@@ -17,7 +18,7 @@ BiocStyle::latex() |
17 | 18 |
|
18 | 19 |
The packages required for this vignette are |
19 | 20 |
|
20 |
-<<load,results=hide>>= |
|
21 |
+<<load, warning=FALSE, message=FALSE>>= |
|
21 | 22 |
library(bsseq) |
22 | 23 |
@ |
23 | 24 |
|
... | ... |
@@ -91,7 +92,7 @@ methylated position (DMP) referring to a single loci. |
91 | 92 |
|
92 | 93 |
If you use this package, please cite our BSmooth paper: |
93 | 94 |
|
94 |
-<<citation,echo=FALSE,results=tex>>= |
|
95 |
+<<citation,echo=FALSE,results="asis">>= |
|
95 | 96 |
print(citation("bsseq"), style = "latex") |
96 | 97 |
@ |
97 | 98 |
|
... | ... |
@@ -329,7 +330,7 @@ combine the evidence from both strands, CpGs on the ``-'' strand needs to have 1 |
329 | 330 |
their position. A full script showing how these files are used to create \Rcode{BS.chr22} can be |
330 | 331 |
found in \texttt{inst/scripts/get\_BS.chr22.R} in the \Rpackage{bsseq} package. The path of this |
331 | 332 |
file may be found by |
332 |
-<<locateScript>>= |
|
333 |
+<<locateScript,results="hide">>= |
|
333 | 334 |
system.file("scripts", "get_BS.chr22.R", package = "bsseq") |
334 | 335 |
@ |
335 | 336 |
|
... | ... |
@@ -360,15 +361,14 @@ with the bsseq package'', which also finding DMRs and plotting them. |
360 | 361 |
|
361 | 362 |
Fisher's exact tests may be efficiently computed using the function \Rcode{fisherTests}. |
362 | 363 |
|
363 |
-Binomial and poisson goodness of fit tests statistics may be computed using |
|
364 |
-\Rcode{binomialGoodnessOfFit} and \Rcode{poissonGoodnessOfFit}. |
|
364 |
+Binomial and poisson goodness of fit tests statistics may be computed using \Rcode{binomialGoodnessOfFit} and \Rcode{poissonGoodnessOfFit}. |
|
365 | 365 |
|
366 | 366 |
\bibliography{bsseq} |
367 | 367 |
|
368 | 368 |
|
369 | 369 |
\section*{SessionInfo} |
370 | 370 |
|
371 |
-<<sessionInfo,results=tex,eval=TRUE,echo=FALSE>>= |
|
371 |
+<<sessionInfo,results="asis",eval=TRUE,echo=FALSE>>= |
|
372 | 372 |
toLatex(sessionInfo()) |
373 | 373 |
@ |
374 | 374 |
|
... | ... |
@@ -2,8 +2,9 @@ |
2 | 2 |
%\VignetteDepends{bsseq} |
3 | 3 |
%\VignetteDepends{bsseqData} |
4 | 4 |
%\VignettePackage{bsseq} |
5 |
+%\VignetteEngine{knitr::knitr} |
|
5 | 6 |
\documentclass[12pt]{article} |
6 |
-<<style-Sweave, eval=TRUE, echo=FALSE, results=tex>>= |
|
7 |
+<<style-knitr, eval=TRUE, echo=FALSE, results="asis">>= |
|
7 | 8 |
BiocStyle::latex() |
8 | 9 |
@ |
9 | 10 |
\newcommand{\bold}[1]{\textbf{#1}} % Fix for R bibentry |
... | ... |
@@ -16,29 +17,18 @@ BiocStyle::latex() |
16 | 17 |
|
17 | 18 |
\section*{Introduction} |
18 | 19 |
|
19 |
-This document discusses the ins and outs of an analysis of a whole-genome shotgun bisulfite |
|
20 |
-sequencing (WGBS) dataset, using the BSmooth algorithm, which was first used in \cite{Hansen:2011} |
|
21 |
-and more formally presented and evaluated in \cite{Hansen:2012}. The intention with the document is |
|
22 |
-to focus on analysis-related tasks and questions. Basic usage of the \Rpackage{bsseq} package is |
|
23 |
-covered in ``The bsseq user's guide''. It may be useful to consult the user's guide while reading |
|
24 |
-this analysis guide. |
|
20 |
+This document discusses the ins and outs of an analysis of a whole-genome shotgun bisulfite sequencing (WGBS) dataset, using the BSmooth algorithm, which was first used in \cite{Hansen:2011} and more formally presented and evaluated in \cite{Hansen:2012}. The intention with the document is to focus on analysis-related tasks and questions. Basic usage of the \Rpackage{bsseq} package is covered in ``The bsseq user's guide''. It may be useful to consult the user's guide while reading this analysis guide. |
|
25 | 21 |
|
26 |
-In this vignette we analyze chromosome 21 and 22 from \cite{Hansen:2011}. This is primary data from |
|
27 |
-3 patients with colon cancer. For each patient we have normal colon tissue as well as cancer colon. |
|
28 |
-The samples were run on ABI SOLiD and we generated 50bp single-end reads. The reads were aligned |
|
29 |
-using the Merman aligner in the BSmooth suite (\url{http://www.rafalab.org/bsmooth}). See the |
|
30 |
-primary publication for more details \cite{Hansen:2011}. |
|
22 |
+In this vignette we analyze chromosome 21 and 22 from \cite{Hansen:2011}. This is primary data from 3 patients with colon cancer. For each patient we have normal colon tissue as well as cancer colon. The samples were run on ABI SOLiD and we generated 50bp single-end reads. The reads were aligned using the Merman aligner in the BSmooth suite (\url{http://www.rafalab.org/bsmooth}). See the primary publication for more details \cite{Hansen:2011}. |
|
31 | 23 |
|
32 | 24 |
This data is contained in the \Rpackage{bsseqData} |
33 | 25 |
|
34 |
-<<load,results=hide>>= |
|
26 |
+<<load,warnings=FALSE,messages=FALSE>>= |
|
35 | 27 |
library(bsseq) |
36 | 28 |
library(bsseqData) |
37 | 29 |
@ |
38 | 30 |
|
39 |
-The \Rpackage{bsseqData} contains a script, \texttt{inst/script/create\_BS.cancer.R}, describing how |
|
40 |
-this data is created from the Merman alignment output (also contained in the package). Note that |
|
41 |
-the current version of the BSmooth pipeline uses a slightly different alignment output format. |
|
31 |
+The \Rpackage{bsseqData} contains a script, \texttt{inst/script/create\_BS.cancer.R}, describing how this data is created from the Merman alignment output (also contained in the package). Note that the current version of the BSmooth pipeline uses a slightly different alignment output format. |
|
42 | 32 |
|
43 | 33 |
The following object contains the unsmoothed ``raw'' summarized alignment data. |
44 | 34 |
<<showData>>= |
... | ... |
@@ -49,7 +39,7 @@ pData(BS.cancer.ex) |
49 | 39 |
@ |
50 | 40 |
|
51 | 41 |
If you use this package, please cite our BSmooth paper |
52 |
-<<citation,echo=FALSE,results=tex>>= |
|
42 |
+<<citation,echo=FALSE,results="asis">>= |
|
53 | 43 |
print(citation("bsseq"), style = "latex") |
54 | 44 |
@ |
55 | 45 |
|
... | ... |
@@ -72,18 +62,9 @@ BS.cancer.ex.fit <- updateObject(BS.cancer.ex.fit) |
72 | 62 |
BS.cancer.ex.fit |
73 | 63 |
@ |
74 | 64 |
|
75 |
-This step uses parallelization where each sample is run on a separate core using \Rcode{mclapply} |
|
76 |
-from the \Rpackage{parallel} package. This form of parallelization is built into \Rpackage{bsseq}, |
|
77 |
-and (as written) requires access to a machine with 6 cores and enough RAM. The smoothing step is |
|
78 |
-being done completely independently on each sample, so if you have a lot of samples (or other |
|
79 |
-circumstances), an alternative is to split the computations manually. A later subsection shows some |
|
80 |
-example code for doing that. |
|
65 |
+This step uses parallelization where each sample is run on a separate core using \Rcode{mclapply} from the \Rpackage{parallel} package. This form of parallelization is built into \Rpackage{bsseq}, and (as written) requires access to a machine with 6 cores and enough RAM. The smoothing step is being done completely independently on each sample, so if you have a lot of samples (or other circumstances), an alternative is to split the computations manually. A later subsection shows some example code for doing that. |
|
81 | 66 |
|
82 |
-Let us discuss coverage and representation. The \Rcode{BS.cancer.ex} object contains all annotated |
|
83 |
-CpGs on human chromosome 21 and 22, whether or not there is actual data. Since we have multiple |
|
84 |
-samples, we can roughly divide the genome into 3 categories: CpGs where all samples have data, CpGs |
|
85 |
-where none of the samples have data and CpGs where some, but not all, of the samples have data. |
|
86 |
-Examining the object at hand, we get |
|
67 |
+Let us discuss coverage and representation. The \Rcode{BS.cancer.ex} object contains all annotated CpGs on human chromosome 21 and 22, whether or not there is actual data. Since we have multiple samples, we can roughly divide the genome into 3 categories: CpGs where all samples have data, CpGs where none of the samples have data and CpGs where some, but not all, of the samples have data. Examining the object at hand, we get |
|
87 | 68 |
|
88 | 69 |
<<cpgNumbers>>= |
89 | 70 |
## The average coverage of CpGs on the two chromosomes |
... | ... |
@@ -174,17 +155,14 @@ load("BS1.fit.rda") |
174 | 155 |
load("BS2.fit.rda") |
175 | 156 |
BS.fit <- combine(BS1.fit, BS2.fit) |
176 | 157 |
@ |
177 |
-This still requires that you have one node with enough RAM to hold all samples in memory. |
|
158 |
+ |
|
159 |
+This still requires that you have one node with enough RAM to hold all samples in memory. |
|
178 | 160 |
|
179 | 161 |
|
180 | 162 |
\section*{Computing t-statistics} |
181 | 163 |
|
182 |
-Before computing t-statistics, we will remove CpGs with little or no coverage. If this is not done, |
|
183 |
-you may find many DMRs in areas of the genome with very little coverage, which are most likely false |
|
184 |
-positives. It is open to personal preferences exactly which CpGs to remove, but for this analysis |
|
185 |
-we will only keep CpGs where at least 2 cancer samples and at least 2 normal samples have at least |
|
186 |
-2x in coverage. For readability, we store the coverage in a separate matrix (this is just due to |
|
187 |
-line breaks in Sweave) |
|
164 |
+Before computing t-statistics, we will remove CpGs with little or no coverage. If this is not done, you may find many DMRs in areas of the genome with very little coverage, which are most likely false positives. It is open to personal preferences exactly which CpGs to remove, but for this analysis we will only keep CpGs where at least 2 cancer samples and at least 2 normal samples have at least 2x in coverage. For readability, we store the coverage in a separate matrix (this is just due to line breaks in Sweave) |
|
165 |
+ |
|
188 | 166 |
<<keepLoci>>= |
189 | 167 |
BS.cov <- getCoverage(BS.cancer.ex.fit) |
190 | 168 |
keepLoci.ex <- which(rowSums(BS.cov[, BS.cancer.ex$Type == "cancer"] >= 2) >= 2 & |
... | ... |
@@ -192,8 +170,8 @@ keepLoci.ex <- which(rowSums(BS.cov[, BS.cancer.ex$Type == "cancer"] >= 2) >= 2 |
192 | 170 |
length(keepLoci.ex) |
193 | 171 |
BS.cancer.ex.fit <- BS.cancer.ex.fit[keepLoci.ex,] |
194 | 172 |
@ |
195 |
-(the \Rcode{keepLoci.ex} is also available for direct inspection in the \Rpackage{bsseqData} |
|
196 |
-package.) |
|
173 |
+ |
|
174 |
+(the \Rcode{keepLoci.ex} is also available for direct inspection in the \Rpackage{bsseqData} package.) |
|
197 | 175 |
|
198 | 176 |
|
199 | 177 |
We are now ready to compute t-statistics, by |
... | ... |
@@ -206,36 +184,22 @@ BS.cancer.ex.tstat <- BSmooth.tstat(BS.cancer.ex.fit, |
206 | 184 |
verbose = TRUE) |
207 | 185 |
BS.cancer.ex.tstat |
208 | 186 |
@ |
209 |
-(the \Rcode{BS.cancer.ex.tstat} is also available for direct inspection in the \Rpackage{bsseqData} |
|
210 |
-package.) |
|
211 | 187 |
|
188 |
+(the \Rcode{BS.cancer.ex.tstat} is also available for direct inspection in the \Rpackage{bsseqData} package.) |
|
212 | 189 |
|
213 | 190 |
|
214 |
-The arguments to \Rcode{BSmooth.tstat} are simple. \Rcode{group1} and \Rcode{group2} contain the |
|
215 |
-sample names of the two groups being compared (it is always group1 - group2), and indices may be |
|
216 |
-used instead of sample names. \Rcode{estimate.var} describes which samples are being used to |
|
217 |
-estimate the variability. Because this is a cancer dataset, and cancer have higher variability than |
|
218 |
-normals, we only use the normal samples to estimate the variability. Other choices of |
|
219 |
-\Rcode{estimate.var} are \Rcode{same} (assume same variability in each group) and \Rcode{paired} (do |
|
220 |
-a paired t-test). The argument \Rcode{local.correct} describes whether we should use a large-scale |
|
221 |
-(low-frequency) mean correction. This is especially important in cancer where we have found many |
|
222 |
-large-scale methylation differences between cancer and normals. |
|
191 |
+ |
|
192 |
+The arguments to \Rcode{BSmooth.tstat} are simple. \Rcode{group1} and \Rcode{group2} contain the sample names of the two groups being compared (it is always group1 - group2), and indices may be used instead of sample names. \Rcode{estimate.var} describes which samples are being used to estimate the variability. Because this is a cancer dataset, and cancer have higher variability than normals, we only use the normal samples to estimate the variability. Other choices of \Rcode{estimate.var} are \Rcode{same} (assume same variability in each group) and \Rcode{paired} (do a paired t-test). The argument \Rcode{local.correct} describes whether we should use a large-scale (low-frequency) mean correction. This is especially important in cancer where we have found many large-scale methylation differences between cancer and normals. |
|
223 | 193 |
|
224 | 194 |
We can look at the marginal distribution of the t-statistic by |
225 | 195 |
|
226 |
-\setkeys{Gin}{width=0.5\textwidth} |
|
227 |
-<<plotTstat,fig=TRUE,width=5,height=5>>= |
|
196 |
+<<plotTstat,fig.width=4,fig.height=4>>= |
|
228 | 197 |
plot(BS.cancer.ex.tstat) |
229 | 198 |
@ |
230 |
-\setkeys{Gin}{width=0.8\textwidth} |
|
231 |
- |
|
232 | 199 |
|
233 |
-The ``blocks'' of hypomethylation are clearly visible in the marginal distribution of the |
|
234 |
-uncorrected t-statistics. |
|
200 |
+The ``blocks'' of hypomethylation are clearly visible in the marginal distribution of the uncorrected t-statistics. |
|
235 | 201 |
|
236 |
-Even in comparisons where we do not observe these large-scale methylation differences, it often |
|
237 |
-improves the marginal distribution of the t-statistics to locally correct them (``improves'' in the |
|
238 |
-sense of making them more symmetric). |
|
202 |
+Even in comparisons where we do not observe these large-scale methylation differences, it often improves the marginal distribution of the t-statistics to locally correct them (``improves'' in the sense of making them more symmetric). |
|
239 | 203 |
|
240 | 204 |
\section*{Finding DMRs} |
241 | 205 |
|
... | ... |
@@ -276,20 +240,14 @@ pData(BS.cancer.ex.fit) <- pData |
276 | 240 |
|
277 | 241 |
Once this is setup, we can plot a single DMR like |
278 | 242 |
|
279 |
-\setkeys{Gin}{width=1\textwidth} |
|
280 |
-<<plotDmr,fig=TRUE,width=10,height=5>>= |
|
243 |
+<<plotDmr,fig.width=8,fig.height=4>>= |
|
281 | 244 |
plotRegion(BS.cancer.ex.fit, dmrs[1,], extend = 5000, addRegions = dmrs) |
282 | 245 |
@ |
283 |
-\setkeys{Gin}{width=0.8\textwidth} |
|
284 |
-\vspace*{-6\baselineskip} |
|
285 | 246 |
|
286 |
-\Rcode{extend} tells us how many bp to extend to either side of the plotting region. |
|
287 |
-\Rcode{addRegions} is a \Rclass{data.frame} or \Rcode{GRanges} listing additional regions that |
|
288 |
-should be highlighted. |
|
247 |
+\Rcode{extend} tells us how many bp to extend to either side of the plotting region. \Rcode{addRegions} is a \Rclass{data.frame} or \Rcode{GRanges} listing additional regions that should be highlighted. |
|
248 |
+ |
|
249 |
+Typically, we plot hundreds of DMRs in a single PDF file and use external tools to look at them. For this purpose, \Rcode{plotManyRegions} is very useful since it is much faster than plotting individual DMRs with \Rcode{plotRegion}. An example (not run) is |
|
289 | 250 |
|
290 |
-Typically, we plot hundreds of DMRs in a single PDF file and use external tools to look at them. |
|
291 |
-For this purpose, \Rcode{plotManyRegions} is very useful since it is much faster than plotting |
|
292 |
-individual DMRs with \Rcode{plotRegion}. An example (not run) is |
|
293 | 251 |
<<plotManyRegions,eval=FALSE>>= |
294 | 252 |
pdf(file = "dmrs_top200.pdf", width = 10, height = 5) |
295 | 253 |
plotManyRegions(BS.cancer.ex.fit, dmrs[1:200,], extend = 5000, |
... | ... |
@@ -331,14 +289,13 @@ space and would add complications to the internal data structures. |
331 | 289 |
|
332 | 290 |
\section*{SessionInfo} |
333 | 291 |
|
334 |
-<<sessionInfo,results=tex,eval=TRUE,echo=FALSE>>= |
|
292 |
+<<sessionInfo,results="asis",eval=TRUE,echo=FALSE>>= |
|
335 | 293 |
toLatex(sessionInfo()) |
336 | 294 |
@ |
337 | 295 |
|
338 | 296 |
\end{document} |
339 | 297 |
|
340 | 298 |
% Local Variables: |
341 |
-% eval: (add-hook 'LaTeX-mode-hook '(lambda () (if (string= (buffer-name) "bsseq_analysis.Rnw") (setq fill-column 100)))) |
|
342 | 299 |
% LocalWords: LocalWords bisulfite methylation methylated CpG CpGs DMR bsseq bp |
343 | 300 |
% LocalWords: DMRs differentially ABI SOLiD dataset WGBS BSmooth |
344 | 301 |
% LocalWords: parallelization Bioconductor hypomethylation genomic pos PDF thresholding |