Browse code

Commit made by the Bioconductor Git-SVN bridge.

Commit id: 3e3c4a483adc9ba1e5d101a35f9a0fef41d2b78b

updated vignettes to use BiocStyle


Commit id: fe749c1df8a2a39ca6c4e1cc3b2358195f307631

updating read.bismark


Commit id: c50eb68c8e7f1be51af4ff411c95064849b52d15

Fixing Description; updating bsseq.Rnw to use BiocStyle


Commit id: f38a77394f0248aae1a6a4e27102610a53ca892e

resaved with compress=xz


Commit id: 2dcedd4fead08fe0af2a0afb7de2c89f4818b962

added Raw parsing of cytosine report files from Bismark



git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/bsseq@104827 bc3139a8-67e5-0310-9ffc-ced21a209358

khansen authored on 11/06/2015 14:04:52
Showing 10 changed files

... ...
@@ -1,14 +1,14 @@
1 1
 Package: bsseq
2
-Version: 1.5.2
3
-Title: Analyze, manage and store bisulfite sequencing data.
4
-Description: Tools for analyzing and visualizing bisulfite sequencing data
2
+Version: 1.5.3
3
+Title: Analyze, manage and store bisulfite sequencing data
4
+Description: A collection of tools for analyzing and visualizing bisulfite sequencing data.
5 5
 Authors@R: c(person(c("Kasper", "Daniel"), "Hansen", role = c("aut", "cre"),
6 6
                     email = "kasperdanielhansen@gmail.com"))
7
-Depends: R (>= 2.15), methods, BiocGenerics, S4Vectors, IRanges (>= 2.1.10),
7
+Depends: R (>= 2.15), methods, BiocGenerics, IRanges (>= 2.1.10),
8 8
         GenomicRanges (>= 1.19.6), SummarizedExperiment (>= 0.1.1), parallel,
9 9
         matrixStats, GenomeInfoDb
10
-Imports: scales, stats, graphics, Biobase, locfit, gtools
11
-Suggests: RUnit, bsseqData
10
+Imports: scales, stats, graphics, Biobase, locfit, gtools, data.table, S4Vectors
11
+Suggests: RUnit, bsseqData, BiocStyle
12 12
 Collate: hasGRanges.R BSseq_class.R BSseqTstat_class.R BSseq_utils.R combine.R 
13 13
         utils.R read.bsmooth.R read.bismark.R BSmooth.R BSmooth.tstat.R dmrFinder.R
14 14
         gof_stats.R plotting.R fisher.R permutations.R
... ...
@@ -29,6 +29,7 @@ importMethodsFrom(GenomeInfoDb, "seqlengths", "seqlengths<-", "seqinfo", "seqinf
29 29
                   "seqnames", "seqnames<-", "seqlevels", "seqlevels<-")
30 30
 import(S4Vectors)
31 31
 importFrom(gtools, "combinations")
32
+importFrom(data.table, "fread", "setnames")
32 33
 
33 34
 ##
34 35
 ## Exporting
... ...
@@ -58,7 +59,7 @@ exportMethods("[", "show",
58 59
 export("BSseq", "getMeth", "getCoverage", "getBSseq", "getStats",
59 60
        "collapseBSseq", "orderBSseq", "hasBeenSmoothed", "chrSelectBSseq",
60 61
        "BSmooth", "BSmooth.tstat", "dmrFinder", "fisherTests",
61
-       "combineList",
62
+       "combineList", "strandCollapse",
62 63
        "plotRegion", "plotManyRegions", 
63 64
        "read.umtab", "read.umtab2", "read.bsmooth", "read.bismark",
64 65
        "poissonGoodnessOfFit", "binomialGoodnessOfFit",
... ...
@@ -228,8 +228,24 @@ setMethod("updateObject", "BSseq",
228 228
            })
229 229
 
230 230
 
231
-
232
-                  
231
+strandCollapse <- function(BSseq, shift = TRUE) {
232
+    if(all(runValue(strand(BSseq)) == "*")) {
233
+        warning("All loci are unstranded; nothing to collapse")
234
+        return(BSseq)
235
+    }
236
+    if(!(all(runValue(strand(BSseq)) %in% c("+", "-"))))
237
+        stop("'BSseq' object has a mix of stranded and unstranded loci.")
238
+    BS.forward <- BSseq[strand(BSseq) == "+"]
239
+    strand(BS.forward) <- "*"
240
+    BS.reverse <- BSseq[strand(BSseq) == "-"]
241
+    strand(BS.reverse) <- "*"
242
+    if(shift)
243
+        rowRanges(BS.reverse) <- shift(granges(BS.reverse), -1L)
244
+    sampleNames(BS.reverse) <- paste0(sampleNames(BS.reverse), "_REVERSE")
245
+    BS.comb <- combine(BS.forward, BS.reverse)
246
+    newBSseq <- collapseBSseq(BS.comb, columns = rep(sampleNames(BSseq), 2))
247
+    newBSseq
248
+}
233 249
 
234 250
 ## getD <- function(data, sample1, sample2, type = c("raw", "fit"),
235 251
 ##                  addPositions = FALSE, alpha = 0.95) {
... ...
@@ -41,7 +41,8 @@ getNullDmrs <- function(BSseq, idxMatrix1, idxMatrix2, estimate.var = "same",
41 41
 subsetDmrs <- function(xx) {
42 42
     if(is.null(xx) || is(xx, "try-error"))
43 43
         return(NULL)
44
-    out <- subset(xx, n >= 3 & abs(meanDiff) > 0.1 & invdensity <= 300)
44
+    out <- xx[ xx[,"n"] >= 3 & abs(xx[, "meanDiff"]) > 0.1 &
45
+                  xx[, "invdensity"] <= 300, ]
45 46
     if(nrow(out) == 0)
46 47
         return(NULL)
47 48
     out
... ...
@@ -1,4 +1,5 @@
1
-read.bismark <- function(files, sampleNames, rmZeroCov = FALSE, verbose = TRUE){
1
+read.bismark <- function(files, sampleNames, rmZeroCov = FALSE, strandCollapse = TRUE,
2
+                         fileType = c("cytosineReport", "oldBedGraph"), verbose = TRUE){
2 3
     ## Argument checking
3 4
     if (anyDuplicated(files)){
4 5
         stop("duplicate entries in 'files'")
... ...
@@ -6,6 +7,7 @@ read.bismark <- function(files, sampleNames, rmZeroCov = FALSE, verbose = TRUE){
6 7
     if (length(sampleNames) != length(files) | anyDuplicated(sampleNames)){
7 8
         stop("argument 'sampleNames' has to have the same length as argument 'files', without duplicate entries")
8 9
     }
10
+    fileType <- match.arg(fileType)
9 11
     ## Process each file
10 12
     idxes <- seq_along(files)
11 13
     names(idxes) <- sampleNames
... ...
@@ -14,12 +16,19 @@ read.bismark <- function(files, sampleNames, rmZeroCov = FALSE, verbose = TRUE){
14 16
             cat(sprintf("[read.bismark] Reading file '%s' ... ", files[ii]))
15 17
         }
16 18
         ptime1 <- proc.time()
17
-        raw <- read.bismarkFileRaw(thisfile = files[ii])
19
+        if(fileType == "oldBedGraph") {
20
+            raw <- read.bismarkFileRaw(thisfile = files[ii])
21
+        }
22
+        if(fileType == "cytosineReport") {
23
+            raw <- read.bismarkCytosineRaw(thisfile = files[ii], keepContext = FALSE)
24
+        }
18 25
         M <- matrix(mcols(raw)[, "mCount"], ncol = 1)
19 26
         Cov <- M + mcols(raw)[, "uCount"]
20 27
         mcols(raw) <- NULL
21 28
         out <- BSseq(gr = raw, M = M, Cov = Cov,
22 29
                      sampleNames = sampleNames[ii], rmZeroCov = rmZeroCov)
30
+        if(strandCollapse && !all(runValue(strand(out)) == "*"))
31
+            out <- strandCollapse(out)
23 32
         ptime2 <- proc.time()
24 33
         stime <- (ptime2 - ptime1)[3]
25 34
         if (verbose) {
... ...
@@ -64,3 +73,26 @@ read.bismarkFileRaw <- function(thisfile, verbose = TRUE){
64 73
     mcols(gr) <- df
65 74
     gr
66 75
 }
76
+
77
+read.bismarkCytosineRaw <- function(thisfile, keepContext = FALSE) {
78
+    out <- fread(thisfile)
79
+    if(length(out) != 6 && length(out) != 7)
80
+        stop("unknown file format")
81
+    if(length(out) == 6) {
82
+        setnames(out, c("chr", "start", "end", "methPerc", "mCount", "uCount"))
83
+        gr <- GRanges(seqnames = out[["chr"]],
84
+                      ranges = IRanges(start = out[["start"]], width = 1),
85
+                      mCount = out[["mCount"]], uCount = out[["uCount"]])
86
+    }
87
+    if(length(out) == 7) {
88
+        setnames(out, c("chr", "start", "strand", "mCount", "uCount", "type", "context"))
89
+        gr <- GRanges(seqnames = out[["chr"]], strand = out[["strand"]],
90
+                      ranges = IRanges(start = out[["start"]], width = 1),
91
+                      mCount = out[["mCount"]], uCount = out[["uCount"]])
92
+        if(keepContext) {
93
+            gr$type <- out[["type"]]
94
+            gr$context <- out[["context"]]
95
+        }                      
96
+    }
97
+    gr
98
+}
67 99
Binary files a/data/BS.chr22.rda and b/data/BS.chr22.rda differ
... ...
@@ -2,9 +2,18 @@
2 2
 \title{bsseq news}
3 3
 \encoding{UTF-8}
4 4
 
5
+\section{Version 1.5.x}{
6
+  \itemize{
7
+    \item new function strandCollapse for collapsing forward and reverse
8
+    strand data to be unstranded.
9
+    \item Updated read.bismark to support the cytosine report files;
10
+    both formats are support.
11
+  }
12
+}
13
+
5 14
 \section{Version 0.11.x}{
6 15
   \itemize{
7
-    \item Converted to using Authors@R in the DESCRIPTIOn file.
16
+    \item Converted to using Authors@R in the DESCRIPTION file.
8 17
     \item plotRegion did not respect the col/lty/lwd argument if given
9 18
     explicitely as opposed to through pData().  Reported by Karen
10 19
     Conneely <kconnee@emory.edu>.
... ...
@@ -17,6 +17,7 @@
17 17
 \alias{show,BSseq-method}
18 18
 \alias{getBSseq}
19 19
 \alias{collapseBSseq}
20
+\alias{strandCollapse}
20 21
 \alias{orderBSseq}
21 22
 \alias{chrSelectBSseq}
22 23
 \alias{hasBeenSmoothed}
... ...
@@ -128,6 +129,18 @@ slots for representing smoothed data. This class is an extension of
128 129
       methylation loci.  The input is a list, with each component an object
129 130
       of class \code{BSseq}.  The (slower) alternative is to use
130 131
       \code{Reduce(combine, list)}. }
132
+  
133
+    \item{\code{strandCollapse(BSseq, shift = TRUE)}}{
134
+
135
+      This function operates on a \code{BSseq} objects which has
136
+      stranded loci (ie. loci where the strand is one of \sQuote{+} pr
137
+      \sQuote{-}).  It will collapse the methylation and coverage
138
+      information across the two strands, into one position.  The
139
+      argument \code{shift} indicates whether the positions for the loci
140
+      on the reverse strand should be shifted one (ie. the positions for
141
+      these loci are the positions of the \sQuote{G} in the
142
+      \sQuote{CpG}; this is the case for Bismark output for example.}
143
+
131 144
   }
132 145
 }
133 146
 \section{Coercion}{
... ...
@@ -2,35 +2,18 @@
2 2
 %\VignetteDepends{bsseq}
3 3
 %\VignettePackage{bsseq}
4 4
 \documentclass[12pt]{article}
5
-<<options,echo=FALSE,results=hide>>=
6
-options(width=70)
7
-@ 
8
-\SweaveOpts{eps=FALSE,echo=TRUE}
9
-\usepackage{times}
10
-\usepackage{color,hyperref}
11
-\usepackage{fullpage}
12
-\usepackage[numbers]{natbib}
13
-\definecolor{darkblue}{rgb}{0.0,0.0,0.75}
14
-\hypersetup{colorlinks,breaklinks,
15
-            linkcolor=darkblue,urlcolor=darkblue,
16
-            anchorcolor=darkblue,citecolor=darkblue}
17
-
18
-\newcommand{\Rcode}[1]{{\texttt{#1}}}
19
-\newcommand{\Rpackage}[1]{{\texttt{#1}}}
20
-\newcommand{\software}[1]{\textsf{#1}}
21
-\newcommand{\R}{\software{R}}
5
+<<style-Sweave, eval=TRUE, echo=FALSE, results=tex>>=
6
+BiocStyle::latex()
7
+@
22 8
 \newcommand{\bold}[1]{\textbf{#1}} % Fix for R bibentry
23
-
24 9
 \title{The bsseq user's guide}
25
-\author{Kasper Daniel Hansen \\ \texttt{khansen@jhsph.edu}}
26
-\date{Modified: October 13, 2012.  Compiled: \today}
10
+\author{Kasper Daniel Hansen \\ \texttt{kasperdanielhansen@gmail.com}}
11
+\date{Modified: June 10, 2015.  Compiled: \today}
27 12
 \begin{document}
28
-\setlength{\parskip}{1\baselineskip}
29
-\setlength{\parindent}{0pt}
30 13
 
31 14
 \maketitle
32 15
 
33
-\section{Capabilities}
16
+\section*{Capabilities}
34 17
 
35 18
 The packages required for this vignette are
36 19
 
... ...
@@ -112,7 +95,7 @@ If you use this package, please cite our BSmooth paper:
112 95
 print(citation("bsseq"), style = "latex")
113 96
 @ 
114 97
 
115
-\section{Overview}
98
+\section*{Overview}
116 99
 
117 100
 The package assumes that the following data has been extracted from alignments:
118 101
 \begin{enumerate}
... ...
@@ -170,7 +153,7 @@ be of marginal interest to most users.  See the man page for \Rcode{binomialGood
170 153
 We also allow for easy computation of Fisher's exact test for each loci, using the function
171 154
 \Rcode{fisherTests}. 
172 155
 
173
-\section{Using objects of class BSseq}
156
+\section*{Using objects of class BSseq}
174 157
 
175 158
 \subsubsection*{Basic operations}
176 159
 
... ...
@@ -301,7 +284,7 @@ getMeth(BS.chr22, regions, type = "raw")
301 284
 getMeth(BS.chr22, regions[2], type = "raw", what = "perBase")
302 285
 @ 
303 286
 
304
-\section{Reading data}
287
+\section*{Reading data}
305 288
 
306 289
 \subsubsection*{Alignment output from the BSmooth alignment suite}
307 290
 
... ...
@@ -380,7 +363,6 @@ Fisher's exact tests may be efficiently computed using the function \Rcode{fishe
380 363
 Binomial and poisson goodness of fit tests statistics may be computed using
381 364
 \Rcode{binomialGoodnessOfFit} and \Rcode{poissonGoodnessOfFit}.
382 365
 
383
-\bibliographystyle{unsrturl}
384 366
 \bibliography{bsseq}
385 367
 
386 368
 
... ...
@@ -3,30 +3,13 @@
3 3
 %\VignetteDepends{bsseqData}
4 4
 %\VignettePackage{bsseq}
5 5
 \documentclass[12pt]{article}
6
-<<echo=FALSE,results=hide>>=
7
-options(width=70)
8
-@ 
9
-\SweaveOpts{eps=FALSE,echo=TRUE,eval=TRUE}
10
-\usepackage{times}
11
-\usepackage{color,hyperref}
12
-\usepackage{fullpage}
13
-\usepackage[numbers]{natbib}
14
-\usepackage{parskip}
15
-\definecolor{darkblue}{rgb}{0.0,0.0,0.75}
16
-\hypersetup{colorlinks,breaklinks,
17
-            linkcolor=darkblue,urlcolor=darkblue,
18
-            anchorcolor=darkblue,citecolor=darkblue}
19
-
20
-\newcommand{\Rcode}[1]{{\texttt{#1}}}
21
-\newcommand{\Rclass}[1]{{\texttt{"#1"}}}
22
-\newcommand{\Rpackage}[1]{{\texttt{#1}}}
23
-\newcommand{\software}[1]{\textsf{#1}}
24
-\newcommand{\R}{\software{R}}
6
+<<style-Sweave, eval=TRUE, echo=FALSE, results=tex>>=
7
+BiocStyle::latex()
8
+@
25 9
 \newcommand{\bold}[1]{\textbf{#1}} % Fix for R bibentry
26
-
27 10
 \title{Analyzing WGBS with the bsseq package}
28
-\author{Kasper Daniel Hansen\\ \texttt{khansen@jhsph.edu}}
29
-\date{Modified: October 13, 2012.  Compiled: \today}
11
+\author{Kasper Daniel Hansen\\ \texttt{kasperdanielhansen@gmail.com}}
12
+\date{Modified: June 10, 2015.  Compiled: \today}
30 13
 \begin{document}
31 14
 
32 15
 \maketitle
... ...
@@ -344,7 +327,6 @@ space and would add complications to the internal data structures.
344 327
 %% Backmatter
345 328
 %%
346 329
 
347
-\bibliographystyle{unsrturl}
348 330
 \bibliography{bsseq}
349 331
 
350 332
 \section*{SessionInfo}