From: Kasper Daniel Hansen <khansen@Tanngrisnir.local>
git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/bsseq@116910 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,33 +1,12 @@ |
1 | 1 |
language: r |
2 |
-sudo: required |
|
3 |
- |
|
4 |
-# Choose your operating system(s) (default: linux) |
|
5 |
-os: |
|
6 |
- - linux |
|
7 |
- - osx |
|
8 |
- |
|
9 |
-# Bioconductor-specific stuff. If targetting bioc-devel then need to use |
|
10 |
-# "bioc_use_devel: true", otherwise remove or comment-out this line. |
|
11 |
-# TODO: Should --timings be added to r_check_args to mimic BioC build machines? |
|
2 |
+r: |
|
3 |
+ - devel |
|
4 |
+sudo: false |
|
5 |
+cache: packages |
|
12 | 6 |
bioc_required: true |
13 |
-bioc_use_devel: true |
|
14 |
-# On CRAN all warnings are treated as errors; this is not true on Bioconductor. |
|
15 | 7 |
warnings_are_errors: false |
16 |
- |
|
17 |
-# Need this if also using covr to get code coverage |
|
8 |
+# Used for code coverage testing with covr and codecov |
|
18 | 9 |
r_github_packages: |
19 | 10 |
- jimhester/covr |
20 | 11 |
after_success: |
21 | 12 |
- Rscript -e 'covr::codecov()' |
22 |
- |
|
23 |
-# bsseq-specific stuff |
|
24 |
-r_build_args: "--no-build-vignettes --no-manual" |
|
25 |
-# TODO: Travis is reporting an error when checking the vignette, hence the |
|
26 |
-# inclusion of --no-vignettes in r_check_args |
|
27 |
-r_check_args: "--no-vignettes" |
|
28 |
-# Must 'manually' install packages in SUGGESTS in order to run full tests |
|
29 |
-r_binary_packages: |
|
30 |
- - RUnit |
|
31 |
-bioc_packages: |
|
32 |
- - bsseqData |
|
33 |
- - BiocStyle |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
Package: bsseq |
2 |
-Version: 1.7.7 |
|
2 |
+Version: 1.7.13 |
|
3 | 3 |
Title: Analyze, manage and store bisulfite sequencing data |
4 | 4 |
Description: A collection of tools for analyzing and visualizing bisulfite |
5 | 5 |
sequencing data. |
... | ... |
@@ -7,7 +7,7 @@ Authors@R: c(person(c("Kasper", "Daniel"), "Hansen", role = c("aut", "cre"), |
7 | 7 |
email = "kasperdanielhansen@gmail.com"), |
8 | 8 |
person("Peter", "Hickey", role = "ctb", email = "peter.hickey@gmail.com")) |
9 | 9 |
Depends: |
10 |
- R (>= 2.15), |
|
10 |
+ R (>= 3.3), |
|
11 | 11 |
methods, |
12 | 12 |
BiocGenerics, |
13 | 13 |
GenomicRanges (>= 1.23.7), |
... | ... |
@@ -26,7 +26,7 @@ Imports: |
26 | 26 |
data.table, |
27 | 27 |
S4Vectors, |
28 | 28 |
R.utils (>= 2.0.0), |
29 |
- matrixStats, |
|
29 |
+ matrixStats (>= 0.50.0), |
|
30 | 30 |
permute |
31 | 31 |
Suggests: |
32 | 32 |
RUnit, |
... | ... |
@@ -68,7 +68,7 @@ export("BSseq", "getMeth", "getCoverage", "getBSseq", "getStats", |
68 | 68 |
"read.umtab", "read.umtab2", "read.bsmooth", "read.bismark", |
69 | 69 |
"poissonGoodnessOfFit", "binomialGoodnessOfFit", |
70 | 70 |
"data.frame2GRanges", "BSseqTstat", |
71 |
- "BSseqStat", "BSmooth.fstat", "smoothSds", "computeStat") |
|
71 |
+ "BSseqStat") |
|
72 | 72 |
|
73 | 73 |
S3method("print", "chisqGoodnessOfFit") |
74 | 74 |
S3method("plot", "chisqGoodnessOfFit") |
... | ... |
@@ -1,10 +1,10 @@ |
1 |
-BSmooth.fstat <- function(BSseq, design, contrasts, returnModelCoefficients = FALSE, verbose = TRUE){ |
|
1 |
+BSmooth.fstat <- function(BSseq, design, contrasts, verbose = TRUE){ |
|
2 | 2 |
stopifnot(is(BSseq, "BSseq")) |
3 | 3 |
stopifnot(hasBeenSmoothed(BSseq)) |
4 |
- |
|
4 |
+ |
|
5 | 5 |
## if(any(rowSums(getCoverage(BSseq)[, unlist(groups)]) == 0)) |
6 | 6 |
## warning("Computing t-statistics at locations where there is no data; consider subsetting the 'BSseq' object first") |
7 |
- |
|
7 |
+ |
|
8 | 8 |
if(verbose) cat("[BSmooth.fstat] fitting linear models ... ") |
9 | 9 |
ptime1 <- proc.time() |
10 | 10 |
allPs <- getMeth(BSseq, type = "smooth", what = "perBase", |
... | ... |
@@ -30,9 +30,6 @@ BSmooth.fstat <- function(BSseq, design, contrasts, returnModelCoefficients = FA |
30 | 30 |
stats <- list(rawSds = rawSds, |
31 | 31 |
cor.coefficients = cor.coefficients, |
32 | 32 |
rawTstats = rawTstats) |
33 |
- if(returnModelCoefficients) { |
|
34 |
- stats$modelCoefficients <- fit$coefficients |
|
35 |
- } |
|
36 | 33 |
out <- BSseqStat(gr = granges(BSseq), |
37 | 34 |
stats = stats, parameters = parameters) |
38 | 35 |
out |
... | ... |
@@ -69,7 +66,8 @@ smoothSds <- function(BSseqStat, k = 101, qSd = 0.75, mc.cores = 1, |
69 | 66 |
BSseqStat |
70 | 67 |
} |
71 | 68 |
|
72 |
- |
|
69 |
+# Quieten R CMD check |
|
70 |
+globalVariables("tstat") |
|
73 | 71 |
computeStat <- function(BSseqStat, coef = NULL) { |
74 | 72 |
stopifnot(is(BSseqStat, "BSseqStat")) |
75 | 73 |
if(is.null(coef)) { |
... | ... |
@@ -140,11 +138,10 @@ fstat.pipeline <- function(BSseq, design, contrasts, cutoff, fac, nperm = 1000, |
140 | 138 |
coef = NULL, maxGap.sd = 10 ^ 8, maxGap.dmr = 300, |
141 | 139 |
mc.cores = 1) { |
142 | 140 |
bstat <- BSmooth.fstat(BSseq = BSseq, design = design, |
143 |
- contrasts = contrasts, |
|
144 |
- returnModelCoefficients = TRUE) |
|
141 |
+ contrasts = contrasts) |
|
145 | 142 |
bstat <- smoothSds(bstat) |
146 | 143 |
bstat <- computeStat(bstat, coef = coef) |
147 |
- dmrs <- dmrFinder(bstat, cutoff = cutoff) |
|
144 |
+ dmrs <- dmrFinder(bstat, cutoff = cutoff, maxGap = maxGap.dmr) |
|
148 | 145 |
idxMatrix <- permuteAll(nperm, design) |
149 | 146 |
nullDist <- getNullDistribution_BSmooth.fstat(BSseq = BSseq, |
150 | 147 |
idxMatrix = idxMatrix, |
... | ... |
@@ -164,3 +161,38 @@ fstat.pipeline <- function(BSseq, design, contrasts, cutoff, fac, nperm = 1000, |
164 | 161 |
list(bstat = bstat, dmrs = dmrs, idxMatrix = idxMatrix, nullDist = nullDist) |
165 | 162 |
} |
166 | 163 |
|
164 |
+fstat.comparisons.pipeline <- function(BSseq, design, contrasts, cutoff, fac, |
|
165 |
+ maxGap.sd = 10 ^ 8, maxGap.dmr = 300, |
|
166 |
+ verbose = TRUE) { |
|
167 |
+ bstat <- BSmooth.fstat(BSseq = BSseq, |
|
168 |
+ design = design, |
|
169 |
+ contrasts = contrasts, |
|
170 |
+ verbose = verbose) |
|
171 |
+ bstat <- smoothSds(bstat, maxGap = maxGap.sd, verbose = verbose) |
|
172 |
+ # NOTE: Want to keep the bstat object corresponding to the original fstat |
|
173 |
+ # and not that from the last t-tsts in the following lapply() |
|
174 |
+ bstat_f <- bstat |
|
175 |
+ if (verbose) { |
|
176 |
+ cat(paste0("[fstat.comparisons.pipeline] making ", ncol(contrasts), |
|
177 |
+ " comparisons ... \n")) |
|
178 |
+ } |
|
179 |
+ l <- lapply(seq_len(ncol(contrasts)), function(coef) { |
|
180 |
+ if (verbose) { |
|
181 |
+ cat(paste0("[fstat.comparisons.pipeline] ", |
|
182 |
+ colnames(contrasts)[coef], "\n")) |
|
183 |
+ } |
|
184 |
+ bstat <- computeStat(bstat, coef = coef) |
|
185 |
+ dmrs <- dmrFinder(bstat, cutoff = cutoff, maxGap = maxGap.dmr, |
|
186 |
+ verbose = verbose) |
|
187 |
+ if (!is.null(dmrs)) { |
|
188 |
+ meth <- getMeth(BSseq, dmrs, what = "perRegion") |
|
189 |
+ meth <- t(apply(meth, 1, function(xx) tapply(xx, fac, mean))) |
|
190 |
+ dmrs <- cbind(dmrs, meth) |
|
191 |
+ dmrs$maxDiff <- rowMaxs(meth) - rowMins(meth) |
|
192 |
+ } |
|
193 |
+ dmrs |
|
194 |
+ }) |
|
195 |
+ names(l) <- colnames(contrasts) |
|
196 |
+ list(bstat = bstat, dmrs = l) |
|
197 |
+} |
|
198 |
+ |
... | ... |
@@ -78,7 +78,7 @@ BSmooth.tstat <- function(BSseq, group1, group2, estimate.var = c("same", "paire |
78 | 78 |
ptime1 <- proc.time() |
79 | 79 |
switch(estimate.var, |
80 | 80 |
"group2" = { |
81 |
- rawSds <- rowSds(allPs[, group2, drop = FALSE], na.rm = TRUE) |
|
81 |
+ rawSds <- rowSds(allPs, cols = group2, na.rm = TRUE) |
|
82 | 82 |
smoothSds <- do.call(c, mclapply(clusterIdx, function(idx) { |
83 | 83 |
smoothSd(rawSds[idx], k = k) |
84 | 84 |
}, mc.cores = mc.cores)) |
... | ... |
@@ -86,8 +86,8 @@ BSmooth.tstat <- function(BSseq, group1, group2, estimate.var = c("same", "paire |
86 | 86 |
tstat.sd <- smoothSds * scale |
87 | 87 |
}, |
88 | 88 |
"same" = { |
89 |
- rawSds <- sqrt( ((length(group1) - 1) * rowVars(allPs[, group1, drop = FALSE]) + |
|
90 |
- (length(group2) - 1) * rowVars(allPs[, group2, drop = FALSE])) / |
|
89 |
+ rawSds <- sqrt( ((length(group1) - 1) * rowVars(allPs, cols = group1) + |
|
90 |
+ (length(group2) - 1) * rowVars(allPs, cols = group2)) / |
|
91 | 91 |
(length(group1) + length(group2) - 2)) |
92 | 92 |
smoothSds <- do.call(c, mclapply(clusterIdx, function(idx) { |
93 | 93 |
smoothSd(rawSds[idx], k = k) |
... | ... |
@@ -3,6 +3,7 @@ read.bismark <- function(files, |
3 | 3 |
rmZeroCov = FALSE, |
4 | 4 |
strandCollapse = TRUE, |
5 | 5 |
fileType = c("cov", "oldBedGraph", "cytosineReport"), |
6 |
+ mc.cores = 1, |
|
6 | 7 |
verbose = TRUE) { |
7 | 8 |
## Argument checking |
8 | 9 |
if (anyDuplicated(files)) { |
... | ... |
@@ -15,10 +16,18 @@ read.bismark <- function(files, |
15 | 16 |
if (verbose) { |
16 | 17 |
message(paste0("Assuming file type is ", fileType)) |
17 | 18 |
} |
19 |
+ if (strandCollapse && (fileType == "cov" || fileType == "oldBedGraph")) { |
|
20 |
+ stop("'strandCollapse' must be 'FALSE' if 'fileType' is '", fileType, "'") |
|
21 |
+ } |
|
22 |
+ ## SummarizedExperiment validator makes calls to identical() that will fail |
|
23 |
+ ## due to how sampleNames are propagated sometimes with and without names(). |
|
24 |
+ ## To make life simpler, we simply strip the names() |
|
25 |
+ sampleNames <- unname(sampleNames) |
|
26 |
+ |
|
18 | 27 |
## Process each file |
19 | 28 |
idxes <- seq_along(files) |
20 | 29 |
names(idxes) <- sampleNames |
21 |
- allOut <- lapply(idxes, function(ii) { |
|
30 |
+ allOut <- mclapply(idxes, function(ii) { |
|
22 | 31 |
if (verbose) { |
23 | 32 |
cat(sprintf("[read.bismark] Reading file '%s' ... ", files[ii])) |
24 | 33 |
} |
... | ... |
@@ -33,7 +42,7 @@ read.bismark <- function(files, |
33 | 42 |
rmZeroCov = rmZeroCov, |
34 | 43 |
keepContext = FALSE) |
35 | 44 |
} |
36 |
- if (strandCollapse && !all(runValue(strand(out)) == "*")) { |
|
45 |
+ if (strandCollapse) { |
|
37 | 46 |
out <- strandCollapse(out) |
38 | 47 |
} |
39 | 48 |
ptime2 <- proc.time() |
... | ... |
@@ -42,7 +51,7 @@ read.bismark <- function(files, |
42 | 51 |
cat(sprintf("done in %.1f secs\n", stime)) |
43 | 52 |
} |
44 | 53 |
out |
45 |
- }) |
|
54 |
+ }, mc.cores = mc.cores) |
|
46 | 55 |
|
47 | 56 |
if (verbose) { |
48 | 57 |
cat(sprintf("[read.bismark] Joining samples ... ")) |
... | ... |
@@ -121,5 +130,6 @@ read.bismarkCytosineReportRaw <- function(thisfile, |
121 | 130 |
## Create BSseq instance from 'out' |
122 | 131 |
BSseq(gr = gr, sampleNames = thisSampleName, |
123 | 132 |
M = as.matrix(out[[4L]]), |
124 |
- Cov = as.matrix(out[[4L]] + out[[5L]])) |
|
133 |
+ Cov = as.matrix(out[[4L]] + out[[5L]]), |
|
134 |
+ rmZeroCov = rmZeroCov) |
|
125 | 135 |
} |
... | ... |
@@ -9,6 +9,15 @@ test_read.bismark_coverage <- function() { |
9 | 9 |
fileType = "cov", |
10 | 10 |
verbose = FALSE) |
11 | 11 |
checkTrue(is(bsseq, "BSseq")) |
12 |
+ # Regression check |
|
13 |
+ # Should not fail if sampleNames have names() |
|
14 |
+ bsseq <- read.bismark(files = infile, |
|
15 |
+ sampleNames = c(test = "test_data"), |
|
16 |
+ rmZeroCov = FALSE, |
|
17 |
+ strandCollapse = FALSE, |
|
18 |
+ fileType = "cov", |
|
19 |
+ verbose = FALSE) |
|
20 |
+ checkTrue(is(bsseq, "BSseq")) |
|
12 | 21 |
|
13 | 22 |
# Should also work because the "cov" fileType is the same as the |
14 | 23 |
# "oldBedGraph" fileType |
15 | 24 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,62 @@ |
1 |
+\name{BSmooth.fstat} |
|
2 |
+\alias{BSmooth.fstat} |
|
3 |
+\title{ |
|
4 |
+ Compute F-statistics based on smoothed whole-genome bisulfite |
|
5 |
+ sequencing data. |
|
6 |
+} |
|
7 |
+\description{ |
|
8 |
+ Compute F-statistics based on smoothed whole-genome bisulfite |
|
9 |
+ sequencing data. |
|
10 |
+} |
|
11 |
+\usage{ |
|
12 |
+BSmooth.fstat(BSseq, design, contrasts, verbose = TRUE) |
|
13 |
+} |
|
14 |
+\arguments{ |
|
15 |
+ \item{BSseq}{An object of class \code{BSseq}.} |
|
16 |
+ \item{design}{The design matrix of the bisulfite-sequencing experiment, |
|
17 |
+ with rows corresponding to arrays and columns to coefficients to be |
|
18 |
+ estimated.} |
|
19 |
+ \item{contrasts}{Numeric matrix with rows corresponding to columns in |
|
20 |
+ \code{design} and columns containing contrasts. May be a vector if |
|
21 |
+ there is only one contrast.} |
|
22 |
+ \item{verbose}{Should the function be verbose?} |
|
23 |
+} |
|
24 |
+\details{ |
|
25 |
+ \strong{TODO} |
|
26 |
+} |
|
27 |
+\value{ |
|
28 |
+ An object of class \linkS4class{BSseqStat}. |
|
29 |
+} |
|
30 |
+\author{ |
|
31 |
+ Kasper Daniel Hansen \email{khansen@jhsph.edu} |
|
32 |
+} |
|
33 |
+\seealso{ |
|
34 |
+ \code{\link{BSmooth}} for the input object and |
|
35 |
+ \linkS4class{BSseq} for its class. |
|
36 |
+ \linkS4class{BSseqTstat} describes the return class. This |
|
37 |
+ function is likely to be followed by the use of |
|
38 |
+ \code{\link{dmrFinder}}. |
|
39 |
+} |
|
40 |
+\examples{ |
|
41 |
+ \dontrun{ |
|
42 |
+ if(require(bsseqData)) { |
|
43 |
+ data(keepLoci.ex) |
|
44 |
+ data(BS.cancer.ex.fit) |
|
45 |
+ BS.cancer.ex.fit <- updateObject(BS.cancer.ex.fit) |
|
46 |
+ ## Remember to subset the BSseq object, see vignette for explanation |
|
47 |
+ ## TODO: Kind of a forced example |
|
48 |
+ design <- model.matrix(~0 + BS.cancer.ex.fit$Type) |
|
49 |
+ colnames(design) <- gsub("BS\\\.cancer\\\.ex\\\.fit\\\$Type", "", |
|
50 |
+ colnames(design)) |
|
51 |
+ contrasts <- makeContrasts( |
|
52 |
+ cancer_vs_normal = cancer - normal, |
|
53 |
+ levels = design |
|
54 |
+ ) |
|
55 |
+ BS.stat <- BSmooth.fstat(BS.cancer.ex.fit[keepLoci.ex,], |
|
56 |
+ design, |
|
57 |
+ contrasts) |
|
58 |
+ BS.stat |
|
59 |
+ } |
|
60 |
+ } |
|
61 |
+} |
|
62 |
+\keyword{internal} |
0 | 63 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,64 @@ |
1 |
+\name{BSseqStat-class} |
|
2 |
+\Rdversion{1.1} |
|
3 |
+\docType{class} |
|
4 |
+\alias{BSseqStat} |
|
5 |
+\alias{BSseqStat-class} |
|
6 |
+\alias{[,BSseqStat-method} |
|
7 |
+\alias{[,BSseqStat,ANY-method} |
|
8 |
+\alias{show,BSseqStat-method} |
|
9 |
+\title{Class BSseqStat} |
|
10 |
+\description{ |
|
11 |
+ A class for representing statistics for smoothed whole-genome |
|
12 |
+ bisulfite sequencing data. |
|
13 |
+} |
|
14 |
+\usage{ |
|
15 |
+ BSseqStat(gr = NULL, stats = NULL, parameters = NULL) |
|
16 |
+} |
|
17 |
+\arguments{ |
|
18 |
+ \item{gr}{The genomic locations as an object of class \code{GRanges}.} |
|
19 |
+ \item{stats}{The statistics, as a matrix.} |
|
20 |
+ \item{parameters}{A list of parameters.} |
|
21 |
+} |
|
22 |
+\section{Objects from the Class}{ |
|
23 |
+ Objects can be created by calls of the form \code{BSseqStat(...)}. |
|
24 |
+ However, usually objects are returned by \code{BSmooth.fstat(...)} and |
|
25 |
+ not constructed by the user. |
|
26 |
+} |
|
27 |
+\section{Slots}{ |
|
28 |
+ \describe{ |
|
29 |
+ \item{\code{stats}:}{This is a matrix with columns representing |
|
30 |
+ various statistics for methylation loci along the genome.} |
|
31 |
+ \item{\code{parameters}:}{Object of class \code{list}. A list of |
|
32 |
+ parameters representing how the statistics were computed.} |
|
33 |
+ \item{\code{gr}:}{Object of class \code{GRanges} giving genomic |
|
34 |
+ locations.} |
|
35 |
+ } |
|
36 |
+} |
|
37 |
+\section{Extends}{ |
|
38 |
+ Class \code{\linkS4class{hasGRanges}}, directly. |
|
39 |
+} |
|
40 |
+\section{Methods}{ |
|
41 |
+ \describe{ |
|
42 |
+ \item{[}{The subsetting operator; one may only subset in one |
|
43 |
+ dimension, corresponding to methylation loci.} |
|
44 |
+ \item{show}{The show method.} |
|
45 |
+ } |
|
46 |
+} |
|
47 |
+\section{Utilities}{ |
|
48 |
+ This class extends \code{hasGRanges} and therefore inherits a number |
|
49 |
+ of useful \code{GRanges} methods that operate on the \code{gr} slot, |
|
50 |
+ used for accessing and setting the genomic locations and also do |
|
51 |
+ \code{subsetByOverlaps}. |
|
52 |
+} |
|
53 |
+\author{ |
|
54 |
+ Kasper Daniel Hansen \email{khansen@jhsph.edu} |
|
55 |
+} |
|
56 |
+\seealso{ |
|
57 |
+ \code{\linkS4class{hasGRanges}} for accessing the genomic locations. |
|
58 |
+ \code{\link{BSmooth.fstat}} for a function |
|
59 |
+ that returns objects of class \code{BSseqStat}, and \code{\link{smoothSds}}, |
|
60 |
+ \code{\link{computeStat}} and \code{\link{dmrFinder}} |
|
61 |
+ for functions that operate based on these statistics. Also see |
|
62 |
+ the more specialised \code{\linkS4class{BSseqTstat}}. |
|
63 |
+} |
|
64 |
+\keyword{classes} |
... | ... |
@@ -31,7 +31,7 @@ not constructed by the user.. |
31 | 31 |
\item{\code{parameters}:}{Object of class \code{list}. A list of |
32 | 32 |
parameters representing how the t-statistics were computed.} |
33 | 33 |
\item{\code{gr}:}{Object of class \code{GRanges} giving genomic |
34 |
- locations.} |
|
34 |
+ locations.} |
|
35 | 35 |
} |
36 | 36 |
} |
37 | 37 |
\section{Extends}{ |
... | ... |
@@ -48,7 +48,7 @@ Class \code{\linkS4class{hasGRanges}}, directly. |
48 | 48 |
This class extends \code{hasGRanges} and therefore inherits a number |
49 | 49 |
of useful \code{GRanges} methods that operate on the \code{gr} slot, |
50 | 50 |
used for accessing and setting the genomic locations and also do |
51 |
- \code{subsetByOverlaps}. |
|
51 |
+ \code{subsetByOverlaps}. |
|
52 | 52 |
} |
53 | 53 |
\author{ |
54 | 54 |
Kasper Daniel Hansen \email{khansen@jhsph.edu} |
... | ... |
@@ -56,7 +56,7 @@ Class \code{\linkS4class{hasGRanges}}, directly. |
56 | 56 |
\seealso{ |
57 | 57 |
The package vignette(s). \code{\linkS4class{hasGRanges}} for accessing |
58 | 58 |
the genomic locations. \code{\link{BSmooth.tstat}} for a function |
59 |
- that objects of class \code{BSseqTstat}, and \code{\link{dmrFinder}} |
|
59 |
+ that returns objects of class \code{BSseqTstat}, and \code{\link{dmrFinder}} |
|
60 | 60 |
for a function that computes DMRs based on the t-statistics. Also see |
61 | 61 |
\code{\link[bsseqData]{BS.cancer.ex.tstat}} for an example of the |
62 | 62 |
class in the \pkg{bsseqData} package. |
63 | 63 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,66 @@ |
1 |
+\name{computeStat} |
|
2 |
+\alias{computeStat} |
|
3 |
+\title{ |
|
4 |
+ Compute a test statistic based on smoothed whole-genome bisulfite |
|
5 |
+ sequencing data. |
|
6 |
+ } |
|
7 |
+\description{ |
|
8 |
+ Compute a test statistic based on smoothed whole-genome bisulfite |
|
9 |
+ sequencing data. |
|
10 |
+ } |
|
11 |
+\usage{ |
|
12 |
+ computeStat(BSseqStat, coef = NULL) |
|
13 |
+} |
|
14 |
+ |
|
15 |
+\arguments{ |
|
16 |
+ \item{BSseqStat}{An object of class \code{BSseqStat}, typically an object |
|
17 |
+ returned by \code{\link{smoothSds}(...)} and not constructed by |
|
18 |
+ the user.} |
|
19 |
+ \item{coef}{A vector indicating for which coefficients the statistic is to |
|
20 |
+ be computed (\code{coef = NULL} corresponds to testing all |
|
21 |
+ coefficients). If the length of the \code{coef} is 1 then the |
|
22 |
+ corresponding t-statistic is computed, otherwise the corresponding |
|
23 |
+ F-statistic is computed.} |
|
24 |
+} |
|
25 |
+\details{ |
|
26 |
+ \strong{TODO} |
|
27 |
+} |
|
28 |
+\value{ |
|
29 |
+ An object of class \linkS4class{BSseqStat}. More speciically, the input |
|
30 |
+ \linkS4class{BSseqStat} object with the computed statistics added to the |
|
31 |
+ \code{stats} slot (accessible with \code{\link{getStats}}). |
|
32 |
+} |
|
33 |
+\author{ |
|
34 |
+ Kasper Daniel Hansen \email{khansen@jhsph.edu} |
|
35 |
+} |
|
36 |
+ |
|
37 |
+\seealso{ |
|
38 |
+ \code{\link{smoothSds}} for the function to create the appropriate |
|
39 |
+ \code{\linkS4class{BSseqStat}} input object. |
|
40 |
+ \code{\linkS4class{BSseqStat}} also describes the return class. This |
|
41 |
+ function is likely to be followed by the use of \code{\link{dmrFinder}}.} |
|
42 |
+\examples{ |
|
43 |
+ \dontrun{ |
|
44 |
+ if(require(bsseqData)) { |
|
45 |
+ data(keepLoci.ex) |
|
46 |
+ data(BS.cancer.ex.fit) |
|
47 |
+ BS.cancer.ex.fit <- updateObject(BS.cancer.ex.fit) |
|
48 |
+ ## Remember to subset the BSseq object, see vignette for explanation |
|
49 |
+ ## TODO: Kind of a forced example |
|
50 |
+ design <- model.matrix(~0 + BS.cancer.ex.fit$Type) |
|
51 |
+ colnames(design) <- gsub("BS\\\.cancer\\\.ex\\\.fit\\\$Type", "", |
|
52 |
+ colnames(design)) |
|
53 |
+ contrasts <- makeContrasts( |
|
54 |
+ cancer_vs_normal = cancer - normal, |
|
55 |
+ levels = design |
|
56 |
+ ) |
|
57 |
+ BS.stat <- BSmooth.fstat(BS.cancer.ex.fit[keepLoci.ex,], |
|
58 |
+ design, |
|
59 |
+ contrasts) |
|
60 |
+ BS.stat <- smoothSds(BS.stat) |
|
61 |
+ BS.stat <- computeStat(BS.stat) |
|
62 |
+ BS.stat |
|
63 |
+ } |
|
64 |
+ } |
|
65 |
+} |
|
66 |
+\keyword{internal} |
... | ... |
@@ -11,6 +11,7 @@ |
11 | 11 |
rmZeroCov = FALSE, |
12 | 12 |
strandCollapse = TRUE, |
13 | 13 |
fileType = c("cov", "oldBedGraph", "cytosineReport"), |
14 |
+ mc.cores = 1, |
|
14 | 15 |
verbose = TRUE) |
15 | 16 |
} |
16 | 17 |
\arguments{ |
... | ... |
@@ -22,8 +23,13 @@ |
22 | 23 |
all samples be removed. This will result in a much smaller object if |
23 | 24 |
the data originates from (targeted) capture bisulfite sequencing.} |
24 | 25 |
\item{strandCollapse}{Should strand-symmetric methylation loci, e.g., CpGs, |
25 |
- be collapsed across strands.} |
|
26 |
+ be collapsed across strands. This option is only available if |
|
27 |
+ \code{fileType = "cytosineReport"} since the other file types do not |
|
28 |
+ contain the necessary strand information.} |
|
26 | 29 |
\item{fileType}{The format of the input file; see Note for details.} |
30 |
+ \item{mc.cores}{The number of cores used. Note that setting |
|
31 |
+ \code{mc.cores} to a value greater than 1 is not supported on MS |
|
32 |
+ Windows, see the help page for \code{mclapply}.} |
|
27 | 33 |
\item{verbose}{Make the function verbose.} |
28 | 34 |
} |
29 | 35 |
\note{ |
... | ... |
@@ -56,7 +62,7 @@ |
56 | 62 |
There is no standard file extension for this file. The "\code{C-context}" and |
57 | 63 |
"\code{trinucleotide context}" columns are not currently used by bsseq. |
58 | 64 |
|
59 |
- The following is an incomplete list of issues to be aware of when using |
|
65 |
+ The following is a list of some issues to be aware of when using |
|
60 | 66 |
output from Bismark's methylation extractor: |
61 | 67 |
|
62 | 68 |
\itemize{ |
... | ... |
@@ -75,7 +81,9 @@ |
75 | 81 |
Furthermore, the default co-ordinate system varies by version of Bismark. |
76 | 82 |
bsseq makes no assumptions about the basis of the genomic co-ordinates and |
77 | 83 |
it is left to the user to ensure that the appropriate basis is used in the |
78 |
- analysis of their data. |
|
84 |
+ analysis of their data. Since Bioconductor packages and |
|
85 |
+ \code{\linkS4class{GRanges}} use one-based co-ordinates, it |
|
86 |
+ is recommended that your Bismark files are also one-based. |
|
79 | 87 |
} |
80 | 88 |
} |
81 | 89 |
\value{ |
... | ... |
@@ -96,7 +104,7 @@ |
96 | 104 |
bismarkBSseq <- read.bismark(files = infile, |
97 | 105 |
sampleNames = "test_data", |
98 | 106 |
rmZeroCov = FALSE, |
99 |
- strandCollapse = TRUE, |
|
107 |
+ strandCollapse = FALSE, |
|
100 | 108 |
fileType = "cov", |
101 | 109 |
verbose = TRUE) |
102 | 110 |
bismarkBSseq |
103 | 111 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,73 @@ |
1 |
+\name{smoothSds} |
|
2 |
+\alias{smoothSds} |
|
3 |
+\title{ |
|
4 |
+ Smooth the standard deviations using a thresholded running mean based on |
|
5 |
+ smoothed whole-genome bisulfite sequencing data. |
|
6 |
+} |
|
7 |
+\description{ |
|
8 |
+ Smooth the standard deviations using a thresholded running mean based on |
|
9 |
+ smoothed whole-genome bisulfite sequencing data. |
|
10 |
+} |
|
11 |
+\usage{ |
|
12 |
+ smoothSds(BSseqStat, k = 101, qSd = 0.75, mc.cores = 1, maxGap = 10^8, |
|
13 |
+ verbose = TRUE) |
|
14 |
+} |
|
15 |
+ |
|
16 |
+\arguments{ |
|
17 |
+ \item{BSseqStat}{An object of class \code{BSseqStat}, typically an object |
|
18 |
+ returned by \code{\link{BSmooth.fstat}(...)} and not constructed by |
|
19 |
+ the user.} |
|
20 |
+ \item{k}{A positive scalar, see details.} |
|
21 |
+ \item{qSd}{A scalar between 0 and 1, see details.} |
|
22 |
+ \item{mc.cores}{The number of cores used. Note that setting |
|
23 |
+ \code{mc.cores} to a value greater than 1 is not supported on MS |
|
24 |
+ Windows, see the help page for \code{mclapply}.} |
|
25 |
+ \item{maxGap}{A scalar greater than 0, see details.} |
|
26 |
+ \item{verbose}{Should the function be verbose?} |
|
27 |
+} |
|
28 |
+\details{ |
|
29 |
+ The standard deviation estimates are smoothed using a running mean with a |
|
30 |
+ width of \code{k} and thresholded using \code{qSd} which sets the minimum |
|
31 |
+ standard deviation to be the \code{qSd}-quantile. |
|
32 |
+} |
|
33 |
+\value{ |
|
34 |
+ An object of class \linkS4class{BSseqStat}. More speciically, the input |
|
35 |
+ \linkS4class{BSseqStat} object with the computed statistics added to the |
|
36 |
+ \code{stats} slot (accessible with \code{\link{getStats}}). |
|
37 |
+} |
|
38 |
+\author{ |
|
39 |
+ Kasper Daniel Hansen \email{khansen@jhsph.edu} |
|
40 |
+} |
|
41 |
+ |
|
42 |
+\seealso{ |
|
43 |
+ \code{\link{BSmooth.fstat}} for the function to create the appropriate |
|
44 |
+ \code{\linkS4class{BSseqStat}} input object. |
|
45 |
+ \code{\linkS4class{BSseqStat}} also describes the return class. This |
|
46 |
+ function is likely to be followed by the use of \code{\link{computeStat}}.} |
|
47 |
+\examples{ |
|
48 |
+ \dontrun{ |
|
49 |
+ if(require(bsseqData)) { |
|
50 |
+ data(keepLoci.ex) |
|
51 |
+ data(BS.cancer.ex.fit) |
|
52 |
+ BS.cancer.ex.fit <- updateObject(BS.cancer.ex.fit) |
|
53 |
+ ## Remember to subset the BSseq object, see vignette for explanation |
|
54 |
+ ## TODO: Kind of a forced example |
|
55 |
+ design <- model.matrix(~0 + BS.cancer.ex.fit$Type) |
|
56 |
+ colnames(design) <- gsub("BS\\\.cancer\\\.ex\\\.fit\\\$Type", "", |
|
57 |
+ colnames(design)) |
|
58 |
+ contrasts <- makeContrasts( |
|
59 |
+ cancer_vs_normal = cancer - normal, |
|
60 |
+ levels = design |
|
61 |
+ ) |
|
62 |
+ BS.stat <- BSmooth.fstat(BS.cancer.ex.fit[keepLoci.ex,], |
|
63 |
+ design, |
|
64 |
+ contrasts) |
|
65 |
+ BS.stat <- smoothSds(BS.stat) |
|
66 |
+ ## Comparing the raw standard deviations to the smoothed standard |
|
67 |
+ ## deviations |
|
68 |
+ summary(getStats(BS.stat, what = "rawSds")) |
|
69 |
+ summary(getStats(BS.stat, what = "smoothSds")) |
|
70 |
+ } |
|
71 |
+ } |
|
72 |
+} |
|
73 |
+\keyword{internal} |
... | ... |
@@ -158,7 +158,7 @@ We also allow for easy computation of Fisher's exact test for each loci, using t |
158 | 158 |
|
159 | 159 |
Objects of class \Rcode{BSseq} contains a \Rcode{GRanges} object with the genomic locations. This |
160 | 160 |
\Rcode{GRanges} object can be obtained by \Rcode{granges}. A number of standard \Rcode{GRanges} |
161 |
-methods works directly on the \R.code{BSseq} object, such as \Rcode{start}, \Rcode{end}, |
|
161 |
+methods works directly on the \Rcode{BSseq} object, such as \Rcode{start}, \Rcode{end}, |
|
162 | 162 |
\Rcode{seqnames} (chromosome names) etc. |
163 | 163 |
|
164 | 164 |
These objects also contains a \Rcode{phenoData} object for sample pheno data. Useful methods are |
... | ... |
@@ -201,7 +201,7 @@ Even in comparisons where we do not observe these large-scale methylation differ |
201 | 201 |
|
202 | 202 |
Once t-statistics have been computed, we can compute differentially methylated regions (DMRs) by |
203 | 203 |
thresholding the t-statistics. Here we use a cutoff of $4.6$, which was chosen by looking at the |
204 |
-quantiles of the t-statistics (for the entire genome). |
|
204 |
+quantiles of the t-statistics (for the entire genome)\footnote{See \url{https://support.bioconductor.org/p/78227/} for further discussion on the choice of cutoff. }. |
|
205 | 205 |
<<dmrs>>= |
206 | 206 |
dmrs0 <- dmrFinder(BS.cancer.ex.tstat, cutoff = c(-4.6, 4.6)) |
207 | 207 |
dmrs <- subset(dmrs0, n >= 3 & abs(meanDiff) >= 0.1) |