Browse code

updates from master

From: Kasper Daniel Hansen <khansen@Tanngrisnir.local>

git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/bsseq@116910 bc3139a8-67e5-0310-9ffc-ced21a209358

khansen authored on 29/04/2016 04:18:31
Showing 15 changed files

... ...
@@ -1,33 +1,12 @@
1 1
 language: r
2
-sudo: required
3
-
4
-# Choose your operating system(s) (default: linux)
5
-os:
6
-  - linux
7
-  - osx
8
-
9
-# Bioconductor-specific stuff. If targetting bioc-devel then need to use
10
-# "bioc_use_devel: true", otherwise remove or comment-out this line.
11
-# TODO: Should --timings be added to r_check_args to mimic BioC build machines?
2
+r:
3
+  - devel
4
+sudo: false
5
+cache: packages
12 6
 bioc_required: true
13
-bioc_use_devel: true
14
-# On CRAN all warnings are treated as errors; this is not true on Bioconductor.
15 7
 warnings_are_errors: false
16
-
17
-# Need this if also using covr to get code coverage
8
+# Used for code coverage testing with covr and codecov
18 9
 r_github_packages:
19 10
   - jimhester/covr
20 11
 after_success:
21 12
   - Rscript -e 'covr::codecov()'
22
-
23
-# bsseq-specific stuff
24
-r_build_args: "--no-build-vignettes --no-manual"
25
-# TODO: Travis is reporting an error when checking the vignette, hence the
26
-# inclusion of --no-vignettes in r_check_args
27
-r_check_args: "--no-vignettes"
28
-# Must 'manually' install packages in SUGGESTS in order to run full tests
29
-r_binary_packages:
30
-  - RUnit
31
-bioc_packages:
32
-  - bsseqData
33
-  - BiocStyle
... ...
@@ -1,5 +1,5 @@
1 1
 Package: bsseq
2
-Version: 1.7.7
2
+Version: 1.7.13
3 3
 Title: Analyze, manage and store bisulfite sequencing data
4 4
 Description: A collection of tools for analyzing and visualizing bisulfite
5 5
     sequencing data.
... ...
@@ -7,7 +7,7 @@ Authors@R: c(person(c("Kasper", "Daniel"), "Hansen", role = c("aut", "cre"),
7 7
     email = "kasperdanielhansen@gmail.com"),
8 8
     person("Peter", "Hickey", role = "ctb", email = "peter.hickey@gmail.com"))
9 9
 Depends:
10
-    R (>= 2.15),
10
+    R (>= 3.3),
11 11
     methods,
12 12
     BiocGenerics,
13 13
     GenomicRanges (>= 1.23.7),
... ...
@@ -26,7 +26,7 @@ Imports:
26 26
     data.table,
27 27
     S4Vectors,
28 28
     R.utils (>= 2.0.0),
29
-    matrixStats,
29
+    matrixStats (>= 0.50.0),
30 30
     permute
31 31
 Suggests:
32 32
     RUnit,
... ...
@@ -68,7 +68,7 @@ export("BSseq", "getMeth", "getCoverage", "getBSseq", "getStats",
68 68
        "read.umtab", "read.umtab2", "read.bsmooth", "read.bismark",
69 69
        "poissonGoodnessOfFit", "binomialGoodnessOfFit",
70 70
        "data.frame2GRanges", "BSseqTstat",
71
-       "BSseqStat", "BSmooth.fstat", "smoothSds", "computeStat")
71
+       "BSseqStat")
72 72
 
73 73
 S3method("print", "chisqGoodnessOfFit")
74 74
 S3method("plot", "chisqGoodnessOfFit")
... ...
@@ -1,10 +1,10 @@
1
-BSmooth.fstat <- function(BSseq, design, contrasts, returnModelCoefficients = FALSE, verbose = TRUE){
1
+BSmooth.fstat <- function(BSseq, design, contrasts, verbose = TRUE){
2 2
     stopifnot(is(BSseq, "BSseq"))
3 3
     stopifnot(hasBeenSmoothed(BSseq))
4
-        
4
+
5 5
     ## if(any(rowSums(getCoverage(BSseq)[, unlist(groups)]) == 0))
6 6
     ##     warning("Computing t-statistics at locations where there is no data; consider subsetting the 'BSseq' object first")
7
-    
7
+
8 8
     if(verbose) cat("[BSmooth.fstat] fitting linear models ... ")
9 9
     ptime1 <- proc.time()
10 10
     allPs <- getMeth(BSseq, type = "smooth", what = "perBase",
... ...
@@ -30,9 +30,6 @@ BSmooth.fstat <- function(BSseq, design, contrasts, returnModelCoefficients = FA
30 30
     stats <- list(rawSds = rawSds,
31 31
                   cor.coefficients = cor.coefficients,
32 32
                   rawTstats = rawTstats)
33
-    if(returnModelCoefficients) {
34
-        stats$modelCoefficients <- fit$coefficients
35
-    }
36 33
     out <- BSseqStat(gr = granges(BSseq),
37 34
                      stats = stats, parameters = parameters)
38 35
     out
... ...
@@ -69,7 +66,8 @@ smoothSds <- function(BSseqStat, k = 101, qSd = 0.75, mc.cores = 1,
69 66
     BSseqStat
70 67
 }
71 68
 
72
-
69
+# Quieten R CMD check
70
+globalVariables("tstat")
73 71
 computeStat <- function(BSseqStat, coef = NULL) {
74 72
     stopifnot(is(BSseqStat, "BSseqStat"))
75 73
     if(is.null(coef)) {
... ...
@@ -140,11 +138,10 @@ fstat.pipeline <- function(BSseq, design, contrasts, cutoff, fac, nperm = 1000,
140 138
                            coef = NULL, maxGap.sd = 10 ^ 8, maxGap.dmr = 300,
141 139
                            mc.cores = 1) {
142 140
     bstat <- BSmooth.fstat(BSseq = BSseq, design = design,
143
-                           contrasts = contrasts,
144
-                           returnModelCoefficients = TRUE)
141
+                           contrasts = contrasts)
145 142
     bstat <- smoothSds(bstat)
146 143
     bstat <- computeStat(bstat, coef = coef)
147
-    dmrs <- dmrFinder(bstat, cutoff = cutoff)
144
+    dmrs <- dmrFinder(bstat, cutoff = cutoff, maxGap = maxGap.dmr)
148 145
     idxMatrix <- permuteAll(nperm, design)
149 146
     nullDist <- getNullDistribution_BSmooth.fstat(BSseq = BSseq,
150 147
                                                   idxMatrix = idxMatrix,
... ...
@@ -164,3 +161,38 @@ fstat.pipeline <- function(BSseq, design, contrasts, cutoff, fac, nperm = 1000,
164 161
     list(bstat = bstat, dmrs = dmrs, idxMatrix = idxMatrix, nullDist = nullDist)
165 162
 }
166 163
 
164
+fstat.comparisons.pipeline <- function(BSseq, design, contrasts, cutoff, fac,
165
+                                       maxGap.sd = 10 ^ 8, maxGap.dmr = 300,
166
+                                       verbose = TRUE) {
167
+    bstat <- BSmooth.fstat(BSseq = BSseq,
168
+                           design = design,
169
+                           contrasts = contrasts,
170
+                           verbose = verbose)
171
+    bstat <- smoothSds(bstat, maxGap = maxGap.sd, verbose = verbose)
172
+    # NOTE: Want to keep the bstat object corresponding to the original fstat
173
+    #       and not that from the last t-tsts in the following lapply()
174
+    bstat_f <- bstat
175
+    if (verbose) {
176
+        cat(paste0("[fstat.comparisons.pipeline] making ", ncol(contrasts),
177
+                   " comparisons ... \n"))
178
+    }
179
+    l <- lapply(seq_len(ncol(contrasts)), function(coef) {
180
+        if (verbose) {
181
+            cat(paste0("[fstat.comparisons.pipeline] ",
182
+                       colnames(contrasts)[coef], "\n"))
183
+        }
184
+        bstat <- computeStat(bstat, coef = coef)
185
+        dmrs <- dmrFinder(bstat, cutoff = cutoff, maxGap = maxGap.dmr,
186
+                          verbose = verbose)
187
+        if (!is.null(dmrs)) {
188
+            meth <- getMeth(BSseq, dmrs, what = "perRegion")
189
+            meth <- t(apply(meth, 1, function(xx) tapply(xx, fac, mean)))
190
+            dmrs <- cbind(dmrs, meth)
191
+            dmrs$maxDiff <- rowMaxs(meth) - rowMins(meth)
192
+        }
193
+        dmrs
194
+    })
195
+    names(l) <- colnames(contrasts)
196
+    list(bstat = bstat, dmrs = l)
197
+}
198
+
... ...
@@ -78,7 +78,7 @@ BSmooth.tstat <- function(BSseq, group1, group2, estimate.var = c("same", "paire
78 78
     ptime1 <- proc.time()
79 79
     switch(estimate.var,
80 80
            "group2" = {
81
-               rawSds <- rowSds(allPs[, group2, drop = FALSE], na.rm = TRUE)
81
+               rawSds <- rowSds(allPs, cols = group2, na.rm = TRUE)
82 82
                smoothSds <- do.call(c, mclapply(clusterIdx, function(idx) {
83 83
                    smoothSd(rawSds[idx], k = k)
84 84
                }, mc.cores = mc.cores))
... ...
@@ -86,8 +86,8 @@ BSmooth.tstat <- function(BSseq, group1, group2, estimate.var = c("same", "paire
86 86
                tstat.sd <- smoothSds * scale
87 87
            },
88 88
            "same" = {
89
-               rawSds <- sqrt( ((length(group1) - 1) * rowVars(allPs[, group1, drop = FALSE]) +
90
-                                (length(group2) - 1) * rowVars(allPs[, group2, drop = FALSE])) /
89
+               rawSds <- sqrt( ((length(group1) - 1) * rowVars(allPs, cols = group1) +
90
+                                (length(group2) - 1) * rowVars(allPs, cols = group2)) /
91 91
                               (length(group1) + length(group2) - 2))
92 92
                smoothSds <- do.call(c, mclapply(clusterIdx, function(idx) {
93 93
                    smoothSd(rawSds[idx], k = k)
... ...
@@ -3,6 +3,7 @@ read.bismark <- function(files,
3 3
                          rmZeroCov = FALSE,
4 4
                          strandCollapse = TRUE,
5 5
                          fileType = c("cov", "oldBedGraph", "cytosineReport"),
6
+                         mc.cores = 1,
6 7
                          verbose = TRUE) {
7 8
     ## Argument checking
8 9
     if (anyDuplicated(files)) {
... ...
@@ -15,10 +16,18 @@ read.bismark <- function(files,
15 16
     if (verbose) {
16 17
         message(paste0("Assuming file type is ", fileType))
17 18
     }
19
+    if (strandCollapse && (fileType == "cov" || fileType == "oldBedGraph")) {
20
+        stop("'strandCollapse' must be 'FALSE' if 'fileType' is '", fileType, "'")
21
+    }
22
+    ## SummarizedExperiment validator makes calls to identical() that will fail
23
+    ## due to how sampleNames are propagated sometimes with and without names().
24
+    ## To make life simpler, we simply strip the names()
25
+    sampleNames <- unname(sampleNames)
26
+
18 27
     ## Process each file
19 28
     idxes <- seq_along(files)
20 29
     names(idxes) <- sampleNames
21
-    allOut <- lapply(idxes, function(ii) {
30
+    allOut <- mclapply(idxes, function(ii) {
22 31
         if (verbose) {
23 32
             cat(sprintf("[read.bismark] Reading file '%s' ... ", files[ii]))
24 33
         }
... ...
@@ -33,7 +42,7 @@ read.bismark <- function(files,
33 42
                                                  rmZeroCov = rmZeroCov,
34 43
                                                  keepContext = FALSE)
35 44
         }
36
-        if (strandCollapse && !all(runValue(strand(out)) == "*")) {
45
+        if (strandCollapse) {
37 46
             out <- strandCollapse(out)
38 47
         }
39 48
         ptime2 <- proc.time()
... ...
@@ -42,7 +51,7 @@ read.bismark <- function(files,
42 51
             cat(sprintf("done in %.1f secs\n", stime))
43 52
         }
44 53
         out
45
-    })
54
+    }, mc.cores = mc.cores)
46 55
 
47 56
     if (verbose) {
48 57
         cat(sprintf("[read.bismark] Joining samples ... "))
... ...
@@ -121,5 +130,6 @@ read.bismarkCytosineReportRaw <- function(thisfile,
121 130
     ## Create BSseq instance from 'out'
122 131
     BSseq(gr = gr, sampleNames = thisSampleName,
123 132
           M = as.matrix(out[[4L]]),
124
-          Cov = as.matrix(out[[4L]] + out[[5L]]))
133
+          Cov = as.matrix(out[[4L]] + out[[5L]]),
134
+          rmZeroCov = rmZeroCov)
125 135
 }
... ...
@@ -9,6 +9,15 @@ test_read.bismark_coverage <- function() {
9 9
                           fileType = "cov",
10 10
                           verbose = FALSE)
11 11
     checkTrue(is(bsseq, "BSseq"))
12
+    # Regression check
13
+    # Should not fail if sampleNames have names()
14
+    bsseq <- read.bismark(files = infile,
15
+                          sampleNames = c(test = "test_data"),
16
+                          rmZeroCov = FALSE,
17
+                          strandCollapse = FALSE,
18
+                          fileType = "cov",
19
+                          verbose = FALSE)
20
+    checkTrue(is(bsseq, "BSseq"))
12 21
 
13 22
     # Should also work because the "cov" fileType is the same as the
14 23
     # "oldBedGraph" fileType
15 24
new file mode 100644
... ...
@@ -0,0 +1,62 @@
1
+\name{BSmooth.fstat}
2
+\alias{BSmooth.fstat}
3
+\title{
4
+    Compute F-statistics based on smoothed whole-genome bisulfite
5
+    sequencing data.
6
+}
7
+\description{
8
+    Compute F-statistics based on smoothed whole-genome bisulfite
9
+    sequencing data.
10
+}
11
+\usage{
12
+BSmooth.fstat(BSseq, design, contrasts, verbose = TRUE)
13
+}
14
+\arguments{
15
+    \item{BSseq}{An object of class \code{BSseq}.}
16
+    \item{design}{The design matrix of the bisulfite-sequencing experiment,
17
+        with rows corresponding to arrays and columns to coefficients to be
18
+        estimated.}
19
+    \item{contrasts}{Numeric matrix with rows corresponding to columns in
20
+        \code{design} and columns containing contrasts. May be a vector if
21
+        there is only one contrast.}
22
+    \item{verbose}{Should the function be verbose?}
23
+}
24
+\details{
25
+    \strong{TODO}
26
+}
27
+\value{
28
+    An object of class \linkS4class{BSseqStat}.
29
+}
30
+\author{
31
+    Kasper Daniel Hansen \email{khansen@jhsph.edu}
32
+}
33
+\seealso{
34
+    \code{\link{BSmooth}} for the input object and
35
+    \linkS4class{BSseq} for its class.
36
+    \linkS4class{BSseqTstat} describes the return class. This
37
+    function is likely to be followed by the use of
38
+    \code{\link{dmrFinder}}.
39
+}
40
+\examples{
41
+  \dontrun{
42
+    if(require(bsseqData)) {
43
+        data(keepLoci.ex)
44
+        data(BS.cancer.ex.fit)
45
+        BS.cancer.ex.fit <- updateObject(BS.cancer.ex.fit)
46
+        ## Remember to subset the BSseq object, see vignette for explanation
47
+        ## TODO: Kind of a forced example
48
+        design <- model.matrix(~0 + BS.cancer.ex.fit$Type)
49
+        colnames(design) <- gsub("BS\\\.cancer\\\.ex\\\.fit\\\$Type", "",
50
+                                 colnames(design))
51
+        contrasts <- makeContrasts(
52
+            cancer_vs_normal = cancer - normal,
53
+            levels = design
54
+        )
55
+        BS.stat <- BSmooth.fstat(BS.cancer.ex.fit[keepLoci.ex,],
56
+                                  design,
57
+                                  contrasts)
58
+        BS.stat
59
+    }
60
+  }
61
+}
62
+\keyword{internal}
0 63
new file mode 100644
... ...
@@ -0,0 +1,64 @@
1
+\name{BSseqStat-class}
2
+\Rdversion{1.1}
3
+\docType{class}
4
+\alias{BSseqStat}
5
+\alias{BSseqStat-class}
6
+\alias{[,BSseqStat-method}
7
+\alias{[,BSseqStat,ANY-method}
8
+\alias{show,BSseqStat-method}
9
+\title{Class BSseqStat}
10
+\description{
11
+    A class for representing statistics for smoothed whole-genome
12
+    bisulfite sequencing data.
13
+}
14
+\usage{
15
+    BSseqStat(gr = NULL, stats = NULL, parameters = NULL)
16
+}
17
+\arguments{
18
+    \item{gr}{The genomic locations as an object of class \code{GRanges}.}
19
+    \item{stats}{The statistics, as a matrix.}
20
+    \item{parameters}{A list of parameters.}
21
+}
22
+\section{Objects from the Class}{
23
+    Objects can be created by calls of the form \code{BSseqStat(...)}.
24
+    However, usually objects are returned by \code{BSmooth.fstat(...)} and
25
+    not constructed by the user.
26
+}
27
+\section{Slots}{
28
+    \describe{
29
+        \item{\code{stats}:}{This is a matrix with columns representing
30
+            various statistics for methylation loci along the genome.}
31
+        \item{\code{parameters}:}{Object of class \code{list}.  A list of
32
+            parameters representing how the statistics were computed.}
33
+        \item{\code{gr}:}{Object of class \code{GRanges} giving genomic
34
+            locations.}
35
+    }
36
+}
37
+\section{Extends}{
38
+    Class \code{\linkS4class{hasGRanges}}, directly.
39
+}
40
+\section{Methods}{
41
+    \describe{
42
+        \item{[}{The subsetting operator; one may only subset in one
43
+            dimension, corresponding to methylation loci.}
44
+        \item{show}{The show method.}
45
+    }
46
+}
47
+\section{Utilities}{
48
+    This class extends \code{hasGRanges} and therefore inherits a number
49
+    of useful \code{GRanges} methods that operate on the \code{gr} slot,
50
+    used for accessing and setting the genomic locations and also do
51
+    \code{subsetByOverlaps}.
52
+}
53
+\author{
54
+    Kasper Daniel Hansen \email{khansen@jhsph.edu}
55
+}
56
+\seealso{
57
+    \code{\linkS4class{hasGRanges}} for accessing the genomic locations.
58
+    \code{\link{BSmooth.fstat}} for a function
59
+    that returns objects of class \code{BSseqStat}, and \code{\link{smoothSds}},
60
+    \code{\link{computeStat}} and \code{\link{dmrFinder}}
61
+    for functions that operate based on these statistics.  Also see
62
+    the more specialised \code{\linkS4class{BSseqTstat}}.
63
+}
64
+\keyword{classes}
... ...
@@ -31,7 +31,7 @@ not constructed by the user..
31 31
     \item{\code{parameters}:}{Object of class \code{list}.  A list of
32 32
       parameters representing how the t-statistics were computed.}
33 33
     \item{\code{gr}:}{Object of class \code{GRanges} giving genomic
34
-      locations.} 
34
+      locations.}
35 35
   }
36 36
 }
37 37
 \section{Extends}{
... ...
@@ -48,7 +48,7 @@ Class \code{\linkS4class{hasGRanges}}, directly.
48 48
   This class extends \code{hasGRanges} and therefore inherits a number
49 49
   of useful \code{GRanges} methods that operate on the \code{gr} slot,
50 50
   used for accessing and setting the genomic locations and also do
51
-  \code{subsetByOverlaps}. 
51
+  \code{subsetByOverlaps}.
52 52
   }
53 53
 \author{
54 54
   Kasper Daniel Hansen \email{khansen@jhsph.edu}
... ...
@@ -56,7 +56,7 @@ Class \code{\linkS4class{hasGRanges}}, directly.
56 56
 \seealso{
57 57
   The package vignette(s).  \code{\linkS4class{hasGRanges}} for accessing
58 58
   the genomic locations.  \code{\link{BSmooth.tstat}} for a function
59
-  that objects of class \code{BSseqTstat}, and \code{\link{dmrFinder}}
59
+  that returns objects of class \code{BSseqTstat}, and \code{\link{dmrFinder}}
60 60
   for a function that computes DMRs based on the t-statistics.  Also see
61 61
   \code{\link[bsseqData]{BS.cancer.ex.tstat}} for an example of the
62 62
   class in the \pkg{bsseqData} package.
63 63
new file mode 100644
... ...
@@ -0,0 +1,66 @@
1
+\name{computeStat}
2
+\alias{computeStat}
3
+\title{
4
+    Compute a test statistic based on smoothed whole-genome bisulfite
5
+    sequencing data.
6
+ }
7
+\description{
8
+    Compute a test statistic based on smoothed whole-genome bisulfite
9
+    sequencing data.
10
+ }
11
+\usage{
12
+    computeStat(BSseqStat, coef = NULL)
13
+}
14
+
15
+\arguments{
16
+    \item{BSseqStat}{An object of class \code{BSseqStat}, typically an object
17
+        returned by \code{\link{smoothSds}(...)} and not constructed by
18
+        the user.}
19
+    \item{coef}{A vector indicating for which coefficients the statistic is to
20
+        be computed (\code{coef = NULL} corresponds to testing all
21
+        coefficients). If the length of the \code{coef} is 1 then the
22
+        corresponding t-statistic is computed, otherwise the corresponding
23
+        F-statistic is computed.}
24
+}
25
+\details{
26
+    \strong{TODO}
27
+}
28
+\value{
29
+  An object of class \linkS4class{BSseqStat}. More speciically, the input
30
+  \linkS4class{BSseqStat} object with the computed statistics added to the
31
+  \code{stats} slot (accessible with \code{\link{getStats}}).
32
+}
33
+\author{
34
+  Kasper Daniel Hansen \email{khansen@jhsph.edu}
35
+}
36
+
37
+\seealso{
38
+  \code{\link{smoothSds}} for the function to create the appropriate
39
+  \code{\linkS4class{BSseqStat}} input object.
40
+  \code{\linkS4class{BSseqStat}} also describes the return class.  This
41
+  function is likely to be followed by the use of \code{\link{dmrFinder}}.}
42
+\examples{
43
+  \dontrun{
44
+    if(require(bsseqData)) {
45
+        data(keepLoci.ex)
46
+        data(BS.cancer.ex.fit)
47
+        BS.cancer.ex.fit <- updateObject(BS.cancer.ex.fit)
48
+        ## Remember to subset the BSseq object, see vignette for explanation
49
+        ## TODO: Kind of a forced example
50
+        design <- model.matrix(~0 + BS.cancer.ex.fit$Type)
51
+        colnames(design) <- gsub("BS\\\.cancer\\\.ex\\\.fit\\\$Type", "",
52
+                                 colnames(design))
53
+        contrasts <- makeContrasts(
54
+            cancer_vs_normal = cancer - normal,
55
+            levels = design
56
+        )
57
+        BS.stat <- BSmooth.fstat(BS.cancer.ex.fit[keepLoci.ex,],
58
+                                  design,
59
+                                  contrasts)
60
+        BS.stat <- smoothSds(BS.stat)
61
+        BS.stat <- computeStat(BS.stat)
62
+        BS.stat
63
+    }
64
+  }
65
+}
66
+\keyword{internal}
... ...
@@ -11,6 +11,7 @@
11 11
              rmZeroCov = FALSE,
12 12
              strandCollapse = TRUE,
13 13
              fileType = c("cov", "oldBedGraph", "cytosineReport"),
14
+             mc.cores = 1,
14 15
              verbose = TRUE)
15 16
 }
16 17
 \arguments{
... ...
@@ -22,8 +23,13 @@
22 23
     all samples be removed. This will result in a much smaller object if
23 24
     the data originates from (targeted) capture bisulfite sequencing.}
24 25
   \item{strandCollapse}{Should strand-symmetric methylation loci, e.g., CpGs,
25
-    be collapsed across strands.}
26
+    be collapsed across strands. This option is only available if
27
+    \code{fileType = "cytosineReport"} since the other file types do not
28
+    contain the necessary strand information.}
26 29
   \item{fileType}{The format of the input file; see Note for details.}
30
+  \item{mc.cores}{The number of cores used.  Note that setting
31
+    \code{mc.cores} to a value greater than 1 is not  supported on MS
32
+    Windows, see the help page for \code{mclapply}.}
27 33
   \item{verbose}{Make the function verbose.}
28 34
 }
29 35
 \note{
... ...
@@ -56,7 +62,7 @@
56 62
   There is no standard file extension for this file. The "\code{C-context}" and
57 63
   "\code{trinucleotide context}" columns are not currently used by bsseq.
58 64
 
59
-  The following is an incomplete list of issues to be aware of when using
65
+  The following is a list of some issues to be aware of when using
60 66
   output from Bismark's methylation extractor:
61 67
 
62 68
   \itemize{
... ...
@@ -75,7 +81,9 @@
75 81
     Furthermore, the default co-ordinate system varies by version of Bismark.
76 82
     bsseq makes no assumptions about the basis of the genomic co-ordinates and
77 83
     it is left to the user to ensure that the appropriate basis is used in the
78
-    analysis of their data.
84
+    analysis of their data. Since Bioconductor packages and
85
+    \code{\linkS4class{GRanges}} use one-based co-ordinates, it
86
+    is recommended that your Bismark files are also one-based.
79 87
   }
80 88
 }
81 89
 \value{
... ...
@@ -96,7 +104,7 @@
96 104
   bismarkBSseq <- read.bismark(files = infile,
97 105
                                sampleNames = "test_data",
98 106
                                rmZeroCov = FALSE,
99
-                               strandCollapse = TRUE,
107
+                               strandCollapse = FALSE,
100 108
                                fileType = "cov",
101 109
                                verbose = TRUE)
102 110
   bismarkBSseq
103 111
new file mode 100644
... ...
@@ -0,0 +1,73 @@
1
+\name{smoothSds}
2
+\alias{smoothSds}
3
+\title{
4
+    Smooth the standard deviations using a thresholded running mean based on
5
+    smoothed whole-genome bisulfite sequencing data.
6
+}
7
+\description{
8
+    Smooth the standard deviations using a thresholded running mean based on
9
+    smoothed whole-genome bisulfite sequencing data.
10
+}
11
+\usage{
12
+    smoothSds(BSseqStat, k = 101, qSd = 0.75, mc.cores = 1, maxGap = 10^8,
13
+              verbose = TRUE)
14
+}
15
+
16
+\arguments{
17
+    \item{BSseqStat}{An object of class \code{BSseqStat}, typically an object
18
+        returned by \code{\link{BSmooth.fstat}(...)} and not constructed by
19
+        the user.}
20
+    \item{k}{A positive scalar, see details.}
21
+    \item{qSd}{A scalar between 0 and 1, see details.}
22
+    \item{mc.cores}{The number of cores used.  Note that setting
23
+        \code{mc.cores} to a value greater than 1 is not supported on MS
24
+        Windows, see the help page for \code{mclapply}.}
25
+    \item{maxGap}{A scalar greater than 0, see details.}
26
+    \item{verbose}{Should the function be verbose?}
27
+}
28
+\details{
29
+    The standard deviation estimates are smoothed using a running mean with a
30
+    width of \code{k} and thresholded using \code{qSd} which sets the minimum
31
+    standard deviation to be the \code{qSd}-quantile.
32
+}
33
+\value{
34
+    An object of class \linkS4class{BSseqStat}. More speciically, the input
35
+    \linkS4class{BSseqStat} object with the computed statistics added to the
36
+    \code{stats} slot (accessible with \code{\link{getStats}}).
37
+}
38
+\author{
39
+    Kasper Daniel Hansen \email{khansen@jhsph.edu}
40
+}
41
+
42
+\seealso{
43
+    \code{\link{BSmooth.fstat}} for the function to create the appropriate
44
+    \code{\linkS4class{BSseqStat}} input object.
45
+    \code{\linkS4class{BSseqStat}} also describes the return class.  This
46
+    function is likely to be followed by the use of \code{\link{computeStat}}.}
47
+\examples{
48
+  \dontrun{
49
+    if(require(bsseqData)) {
50
+        data(keepLoci.ex)
51
+        data(BS.cancer.ex.fit)
52
+        BS.cancer.ex.fit <- updateObject(BS.cancer.ex.fit)
53
+        ## Remember to subset the BSseq object, see vignette for explanation
54
+        ## TODO: Kind of a forced example
55
+        design <- model.matrix(~0 + BS.cancer.ex.fit$Type)
56
+        colnames(design) <- gsub("BS\\\.cancer\\\.ex\\\.fit\\\$Type", "",
57
+                                 colnames(design))
58
+        contrasts <- makeContrasts(
59
+            cancer_vs_normal = cancer - normal,
60
+            levels = design
61
+        )
62
+        BS.stat <- BSmooth.fstat(BS.cancer.ex.fit[keepLoci.ex,],
63
+                                 design,
64
+                                 contrasts)
65
+        BS.stat <- smoothSds(BS.stat)
66
+        ## Comparing the raw standard deviations to the smoothed standard
67
+        ## deviations
68
+        summary(getStats(BS.stat, what = "rawSds"))
69
+        summary(getStats(BS.stat, what = "smoothSds"))
70
+    }
71
+  }
72
+}
73
+\keyword{internal}
... ...
@@ -158,7 +158,7 @@ We also allow for easy computation of Fisher's exact test for each loci, using t
158 158
 
159 159
 Objects of class \Rcode{BSseq} contains a \Rcode{GRanges} object with the genomic locations.  This
160 160
 \Rcode{GRanges} object can be obtained by \Rcode{granges}.  A number of standard \Rcode{GRanges}
161
-methods works directly on the \R.code{BSseq} object, such as \Rcode{start}, \Rcode{end},
161
+methods works directly on the \Rcode{BSseq} object, such as \Rcode{start}, \Rcode{end},
162 162
 \Rcode{seqnames} (chromosome names) etc.
163 163
 
164 164
 These objects also contains a \Rcode{phenoData} object for sample pheno data.  Useful methods are
... ...
@@ -201,7 +201,7 @@ Even in comparisons where we do not observe these large-scale methylation differ
201 201
 
202 202
 Once t-statistics have been computed, we can compute differentially methylated regions (DMRs) by
203 203
 thresholding the t-statistics.  Here we use a cutoff of $4.6$, which was chosen by looking at the
204
-quantiles of the t-statistics (for the entire genome).
204
+quantiles of the t-statistics (for the entire genome)\footnote{See \url{https://support.bioconductor.org/p/78227/} for further discussion on the choice of cutoff. }.
205 205
 <<dmrs>>=
206 206
 dmrs0 <- dmrFinder(BS.cancer.ex.tstat, cutoff = c(-4.6, 4.6))
207 207
 dmrs <- subset(dmrs0, n >= 3 & abs(meanDiff) >= 0.1)