Commit id: 3e3c4a483adc9ba1e5d101a35f9a0fef41d2b78b
updated vignettes to use BiocStyle
Commit id: fe749c1df8a2a39ca6c4e1cc3b2358195f307631
updating read.bismark
Commit id: c50eb68c8e7f1be51af4ff411c95064849b52d15
Fixing Description; updating bsseq.Rnw to use BiocStyle
Commit id: f38a77394f0248aae1a6a4e27102610a53ca892e
resaved with compress=xz
Commit id: 2dcedd4fead08fe0af2a0afb7de2c89f4818b962
added Raw parsing of cytosine report files from Bismark
git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/bsseq@104827 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,14 +1,14 @@ |
1 | 1 |
Package: bsseq |
2 |
-Version: 1.5.2 |
|
3 |
-Title: Analyze, manage and store bisulfite sequencing data. |
|
4 |
-Description: Tools for analyzing and visualizing bisulfite sequencing data |
|
2 |
+Version: 1.5.3 |
|
3 |
+Title: Analyze, manage and store bisulfite sequencing data |
|
4 |
+Description: A collection of tools for analyzing and visualizing bisulfite sequencing data. |
|
5 | 5 |
Authors@R: c(person(c("Kasper", "Daniel"), "Hansen", role = c("aut", "cre"), |
6 | 6 |
email = "kasperdanielhansen@gmail.com")) |
7 |
-Depends: R (>= 2.15), methods, BiocGenerics, S4Vectors, IRanges (>= 2.1.10), |
|
7 |
+Depends: R (>= 2.15), methods, BiocGenerics, IRanges (>= 2.1.10), |
|
8 | 8 |
GenomicRanges (>= 1.19.6), SummarizedExperiment (>= 0.1.1), parallel, |
9 | 9 |
matrixStats, GenomeInfoDb |
10 |
-Imports: scales, stats, graphics, Biobase, locfit, gtools |
|
11 |
-Suggests: RUnit, bsseqData |
|
10 |
+Imports: scales, stats, graphics, Biobase, locfit, gtools, data.table, S4Vectors |
|
11 |
+Suggests: RUnit, bsseqData, BiocStyle |
|
12 | 12 |
Collate: hasGRanges.R BSseq_class.R BSseqTstat_class.R BSseq_utils.R combine.R |
13 | 13 |
utils.R read.bsmooth.R read.bismark.R BSmooth.R BSmooth.tstat.R dmrFinder.R |
14 | 14 |
gof_stats.R plotting.R fisher.R permutations.R |
... | ... |
@@ -29,6 +29,7 @@ importMethodsFrom(GenomeInfoDb, "seqlengths", "seqlengths<-", "seqinfo", "seqinf |
29 | 29 |
"seqnames", "seqnames<-", "seqlevels", "seqlevels<-") |
30 | 30 |
import(S4Vectors) |
31 | 31 |
importFrom(gtools, "combinations") |
32 |
+importFrom(data.table, "fread", "setnames") |
|
32 | 33 |
|
33 | 34 |
## |
34 | 35 |
## Exporting |
... | ... |
@@ -58,7 +59,7 @@ exportMethods("[", "show", |
58 | 59 |
export("BSseq", "getMeth", "getCoverage", "getBSseq", "getStats", |
59 | 60 |
"collapseBSseq", "orderBSseq", "hasBeenSmoothed", "chrSelectBSseq", |
60 | 61 |
"BSmooth", "BSmooth.tstat", "dmrFinder", "fisherTests", |
61 |
- "combineList", |
|
62 |
+ "combineList", "strandCollapse", |
|
62 | 63 |
"plotRegion", "plotManyRegions", |
63 | 64 |
"read.umtab", "read.umtab2", "read.bsmooth", "read.bismark", |
64 | 65 |
"poissonGoodnessOfFit", "binomialGoodnessOfFit", |
... | ... |
@@ -228,8 +228,24 @@ setMethod("updateObject", "BSseq", |
228 | 228 |
}) |
229 | 229 |
|
230 | 230 |
|
231 |
- |
|
232 |
- |
|
231 |
+strandCollapse <- function(BSseq, shift = TRUE) { |
|
232 |
+ if(all(runValue(strand(BSseq)) == "*")) { |
|
233 |
+ warning("All loci are unstranded; nothing to collapse") |
|
234 |
+ return(BSseq) |
|
235 |
+ } |
|
236 |
+ if(!(all(runValue(strand(BSseq)) %in% c("+", "-")))) |
|
237 |
+ stop("'BSseq' object has a mix of stranded and unstranded loci.") |
|
238 |
+ BS.forward <- BSseq[strand(BSseq) == "+"] |
|
239 |
+ strand(BS.forward) <- "*" |
|
240 |
+ BS.reverse <- BSseq[strand(BSseq) == "-"] |
|
241 |
+ strand(BS.reverse) <- "*" |
|
242 |
+ if(shift) |
|
243 |
+ rowRanges(BS.reverse) <- shift(granges(BS.reverse), -1L) |
|
244 |
+ sampleNames(BS.reverse) <- paste0(sampleNames(BS.reverse), "_REVERSE") |
|
245 |
+ BS.comb <- combine(BS.forward, BS.reverse) |
|
246 |
+ newBSseq <- collapseBSseq(BS.comb, columns = rep(sampleNames(BSseq), 2)) |
|
247 |
+ newBSseq |
|
248 |
+} |
|
233 | 249 |
|
234 | 250 |
## getD <- function(data, sample1, sample2, type = c("raw", "fit"), |
235 | 251 |
## addPositions = FALSE, alpha = 0.95) { |
... | ... |
@@ -41,7 +41,8 @@ getNullDmrs <- function(BSseq, idxMatrix1, idxMatrix2, estimate.var = "same", |
41 | 41 |
subsetDmrs <- function(xx) { |
42 | 42 |
if(is.null(xx) || is(xx, "try-error")) |
43 | 43 |
return(NULL) |
44 |
- out <- subset(xx, n >= 3 & abs(meanDiff) > 0.1 & invdensity <= 300) |
|
44 |
+ out <- xx[ xx[,"n"] >= 3 & abs(xx[, "meanDiff"]) > 0.1 & |
|
45 |
+ xx[, "invdensity"] <= 300, ] |
|
45 | 46 |
if(nrow(out) == 0) |
46 | 47 |
return(NULL) |
47 | 48 |
out |
... | ... |
@@ -1,4 +1,5 @@ |
1 |
-read.bismark <- function(files, sampleNames, rmZeroCov = FALSE, verbose = TRUE){ |
|
1 |
+read.bismark <- function(files, sampleNames, rmZeroCov = FALSE, strandCollapse = TRUE, |
|
2 |
+ fileType = c("cytosineReport", "oldBedGraph"), verbose = TRUE){ |
|
2 | 3 |
## Argument checking |
3 | 4 |
if (anyDuplicated(files)){ |
4 | 5 |
stop("duplicate entries in 'files'") |
... | ... |
@@ -6,6 +7,7 @@ read.bismark <- function(files, sampleNames, rmZeroCov = FALSE, verbose = TRUE){ |
6 | 7 |
if (length(sampleNames) != length(files) | anyDuplicated(sampleNames)){ |
7 | 8 |
stop("argument 'sampleNames' has to have the same length as argument 'files', without duplicate entries") |
8 | 9 |
} |
10 |
+ fileType <- match.arg(fileType) |
|
9 | 11 |
## Process each file |
10 | 12 |
idxes <- seq_along(files) |
11 | 13 |
names(idxes) <- sampleNames |
... | ... |
@@ -14,12 +16,19 @@ read.bismark <- function(files, sampleNames, rmZeroCov = FALSE, verbose = TRUE){ |
14 | 16 |
cat(sprintf("[read.bismark] Reading file '%s' ... ", files[ii])) |
15 | 17 |
} |
16 | 18 |
ptime1 <- proc.time() |
17 |
- raw <- read.bismarkFileRaw(thisfile = files[ii]) |
|
19 |
+ if(fileType == "oldBedGraph") { |
|
20 |
+ raw <- read.bismarkFileRaw(thisfile = files[ii]) |
|
21 |
+ } |
|
22 |
+ if(fileType == "cytosineReport") { |
|
23 |
+ raw <- read.bismarkCytosineRaw(thisfile = files[ii], keepContext = FALSE) |
|
24 |
+ } |
|
18 | 25 |
M <- matrix(mcols(raw)[, "mCount"], ncol = 1) |
19 | 26 |
Cov <- M + mcols(raw)[, "uCount"] |
20 | 27 |
mcols(raw) <- NULL |
21 | 28 |
out <- BSseq(gr = raw, M = M, Cov = Cov, |
22 | 29 |
sampleNames = sampleNames[ii], rmZeroCov = rmZeroCov) |
30 |
+ if(strandCollapse && !all(runValue(strand(out)) == "*")) |
|
31 |
+ out <- strandCollapse(out) |
|
23 | 32 |
ptime2 <- proc.time() |
24 | 33 |
stime <- (ptime2 - ptime1)[3] |
25 | 34 |
if (verbose) { |
... | ... |
@@ -64,3 +73,26 @@ read.bismarkFileRaw <- function(thisfile, verbose = TRUE){ |
64 | 73 |
mcols(gr) <- df |
65 | 74 |
gr |
66 | 75 |
} |
76 |
+ |
|
77 |
+read.bismarkCytosineRaw <- function(thisfile, keepContext = FALSE) { |
|
78 |
+ out <- fread(thisfile) |
|
79 |
+ if(length(out) != 6 && length(out) != 7) |
|
80 |
+ stop("unknown file format") |
|
81 |
+ if(length(out) == 6) { |
|
82 |
+ setnames(out, c("chr", "start", "end", "methPerc", "mCount", "uCount")) |
|
83 |
+ gr <- GRanges(seqnames = out[["chr"]], |
|
84 |
+ ranges = IRanges(start = out[["start"]], width = 1), |
|
85 |
+ mCount = out[["mCount"]], uCount = out[["uCount"]]) |
|
86 |
+ } |
|
87 |
+ if(length(out) == 7) { |
|
88 |
+ setnames(out, c("chr", "start", "strand", "mCount", "uCount", "type", "context")) |
|
89 |
+ gr <- GRanges(seqnames = out[["chr"]], strand = out[["strand"]], |
|
90 |
+ ranges = IRanges(start = out[["start"]], width = 1), |
|
91 |
+ mCount = out[["mCount"]], uCount = out[["uCount"]]) |
|
92 |
+ if(keepContext) { |
|
93 |
+ gr$type <- out[["type"]] |
|
94 |
+ gr$context <- out[["context"]] |
|
95 |
+ } |
|
96 |
+ } |
|
97 |
+ gr |
|
98 |
+} |
... | ... |
@@ -2,9 +2,18 @@ |
2 | 2 |
\title{bsseq news} |
3 | 3 |
\encoding{UTF-8} |
4 | 4 |
|
5 |
+\section{Version 1.5.x}{ |
|
6 |
+ \itemize{ |
|
7 |
+ \item new function strandCollapse for collapsing forward and reverse |
|
8 |
+ strand data to be unstranded. |
|
9 |
+ \item Updated read.bismark to support the cytosine report files; |
|
10 |
+ both formats are support. |
|
11 |
+ } |
|
12 |
+} |
|
13 |
+ |
|
5 | 14 |
\section{Version 0.11.x}{ |
6 | 15 |
\itemize{ |
7 |
- \item Converted to using Authors@R in the DESCRIPTIOn file. |
|
16 |
+ \item Converted to using Authors@R in the DESCRIPTION file. |
|
8 | 17 |
\item plotRegion did not respect the col/lty/lwd argument if given |
9 | 18 |
explicitely as opposed to through pData(). Reported by Karen |
10 | 19 |
Conneely <kconnee@emory.edu>. |
... | ... |
@@ -17,6 +17,7 @@ |
17 | 17 |
\alias{show,BSseq-method} |
18 | 18 |
\alias{getBSseq} |
19 | 19 |
\alias{collapseBSseq} |
20 |
+\alias{strandCollapse} |
|
20 | 21 |
\alias{orderBSseq} |
21 | 22 |
\alias{chrSelectBSseq} |
22 | 23 |
\alias{hasBeenSmoothed} |
... | ... |
@@ -128,6 +129,18 @@ slots for representing smoothed data. This class is an extension of |
128 | 129 |
methylation loci. The input is a list, with each component an object |
129 | 130 |
of class \code{BSseq}. The (slower) alternative is to use |
130 | 131 |
\code{Reduce(combine, list)}. } |
132 |
+ |
|
133 |
+ \item{\code{strandCollapse(BSseq, shift = TRUE)}}{ |
|
134 |
+ |
|
135 |
+ This function operates on a \code{BSseq} objects which has |
|
136 |
+ stranded loci (ie. loci where the strand is one of \sQuote{+} pr |
|
137 |
+ \sQuote{-}). It will collapse the methylation and coverage |
|
138 |
+ information across the two strands, into one position. The |
|
139 |
+ argument \code{shift} indicates whether the positions for the loci |
|
140 |
+ on the reverse strand should be shifted one (ie. the positions for |
|
141 |
+ these loci are the positions of the \sQuote{G} in the |
|
142 |
+ \sQuote{CpG}; this is the case for Bismark output for example.} |
|
143 |
+ |
|
131 | 144 |
} |
132 | 145 |
} |
133 | 146 |
\section{Coercion}{ |
... | ... |
@@ -2,35 +2,18 @@ |
2 | 2 |
%\VignetteDepends{bsseq} |
3 | 3 |
%\VignettePackage{bsseq} |
4 | 4 |
\documentclass[12pt]{article} |
5 |
-<<options,echo=FALSE,results=hide>>= |
|
6 |
-options(width=70) |
|
7 |
-@ |
|
8 |
-\SweaveOpts{eps=FALSE,echo=TRUE} |
|
9 |
-\usepackage{times} |
|
10 |
-\usepackage{color,hyperref} |
|
11 |
-\usepackage{fullpage} |
|
12 |
-\usepackage[numbers]{natbib} |
|
13 |
-\definecolor{darkblue}{rgb}{0.0,0.0,0.75} |
|
14 |
-\hypersetup{colorlinks,breaklinks, |
|
15 |
- linkcolor=darkblue,urlcolor=darkblue, |
|
16 |
- anchorcolor=darkblue,citecolor=darkblue} |
|
17 |
- |
|
18 |
-\newcommand{\Rcode}[1]{{\texttt{#1}}} |
|
19 |
-\newcommand{\Rpackage}[1]{{\texttt{#1}}} |
|
20 |
-\newcommand{\software}[1]{\textsf{#1}} |
|
21 |
-\newcommand{\R}{\software{R}} |
|
5 |
+<<style-Sweave, eval=TRUE, echo=FALSE, results=tex>>= |
|
6 |
+BiocStyle::latex() |
|
7 |
+@ |
|
22 | 8 |
\newcommand{\bold}[1]{\textbf{#1}} % Fix for R bibentry |
23 |
- |
|
24 | 9 |
\title{The bsseq user's guide} |
25 |
-\author{Kasper Daniel Hansen \\ \texttt{khansen@jhsph.edu}} |
|
26 |
-\date{Modified: October 13, 2012. Compiled: \today} |
|
10 |
+\author{Kasper Daniel Hansen \\ \texttt{kasperdanielhansen@gmail.com}} |
|
11 |
+\date{Modified: June 10, 2015. Compiled: \today} |
|
27 | 12 |
\begin{document} |
28 |
-\setlength{\parskip}{1\baselineskip} |
|
29 |
-\setlength{\parindent}{0pt} |
|
30 | 13 |
|
31 | 14 |
\maketitle |
32 | 15 |
|
33 |
-\section{Capabilities} |
|
16 |
+\section*{Capabilities} |
|
34 | 17 |
|
35 | 18 |
The packages required for this vignette are |
36 | 19 |
|
... | ... |
@@ -112,7 +95,7 @@ If you use this package, please cite our BSmooth paper: |
112 | 95 |
print(citation("bsseq"), style = "latex") |
113 | 96 |
@ |
114 | 97 |
|
115 |
-\section{Overview} |
|
98 |
+\section*{Overview} |
|
116 | 99 |
|
117 | 100 |
The package assumes that the following data has been extracted from alignments: |
118 | 101 |
\begin{enumerate} |
... | ... |
@@ -170,7 +153,7 @@ be of marginal interest to most users. See the man page for \Rcode{binomialGood |
170 | 153 |
We also allow for easy computation of Fisher's exact test for each loci, using the function |
171 | 154 |
\Rcode{fisherTests}. |
172 | 155 |
|
173 |
-\section{Using objects of class BSseq} |
|
156 |
+\section*{Using objects of class BSseq} |
|
174 | 157 |
|
175 | 158 |
\subsubsection*{Basic operations} |
176 | 159 |
|
... | ... |
@@ -301,7 +284,7 @@ getMeth(BS.chr22, regions, type = "raw") |
301 | 284 |
getMeth(BS.chr22, regions[2], type = "raw", what = "perBase") |
302 | 285 |
@ |
303 | 286 |
|
304 |
-\section{Reading data} |
|
287 |
+\section*{Reading data} |
|
305 | 288 |
|
306 | 289 |
\subsubsection*{Alignment output from the BSmooth alignment suite} |
307 | 290 |
|
... | ... |
@@ -380,7 +363,6 @@ Fisher's exact tests may be efficiently computed using the function \Rcode{fishe |
380 | 363 |
Binomial and poisson goodness of fit tests statistics may be computed using |
381 | 364 |
\Rcode{binomialGoodnessOfFit} and \Rcode{poissonGoodnessOfFit}. |
382 | 365 |
|
383 |
-\bibliographystyle{unsrturl} |
|
384 | 366 |
\bibliography{bsseq} |
385 | 367 |
|
386 | 368 |
|
... | ... |
@@ -3,30 +3,13 @@ |
3 | 3 |
%\VignetteDepends{bsseqData} |
4 | 4 |
%\VignettePackage{bsseq} |
5 | 5 |
\documentclass[12pt]{article} |
6 |
-<<echo=FALSE,results=hide>>= |
|
7 |
-options(width=70) |
|
8 |
-@ |
|
9 |
-\SweaveOpts{eps=FALSE,echo=TRUE,eval=TRUE} |
|
10 |
-\usepackage{times} |
|
11 |
-\usepackage{color,hyperref} |
|
12 |
-\usepackage{fullpage} |
|
13 |
-\usepackage[numbers]{natbib} |
|
14 |
-\usepackage{parskip} |
|
15 |
-\definecolor{darkblue}{rgb}{0.0,0.0,0.75} |
|
16 |
-\hypersetup{colorlinks,breaklinks, |
|
17 |
- linkcolor=darkblue,urlcolor=darkblue, |
|
18 |
- anchorcolor=darkblue,citecolor=darkblue} |
|
19 |
- |
|
20 |
-\newcommand{\Rcode}[1]{{\texttt{#1}}} |
|
21 |
-\newcommand{\Rclass}[1]{{\texttt{"#1"}}} |
|
22 |
-\newcommand{\Rpackage}[1]{{\texttt{#1}}} |
|
23 |
-\newcommand{\software}[1]{\textsf{#1}} |
|
24 |
-\newcommand{\R}{\software{R}} |
|
6 |
+<<style-Sweave, eval=TRUE, echo=FALSE, results=tex>>= |
|
7 |
+BiocStyle::latex() |
|
8 |
+@ |
|
25 | 9 |
\newcommand{\bold}[1]{\textbf{#1}} % Fix for R bibentry |
26 |
- |
|
27 | 10 |
\title{Analyzing WGBS with the bsseq package} |
28 |
-\author{Kasper Daniel Hansen\\ \texttt{khansen@jhsph.edu}} |
|
29 |
-\date{Modified: October 13, 2012. Compiled: \today} |
|
11 |
+\author{Kasper Daniel Hansen\\ \texttt{kasperdanielhansen@gmail.com}} |
|
12 |
+\date{Modified: June 10, 2015. Compiled: \today} |
|
30 | 13 |
\begin{document} |
31 | 14 |
|
32 | 15 |
\maketitle |
... | ... |
@@ -344,7 +327,6 @@ space and would add complications to the internal data structures. |
344 | 327 |
%% Backmatter |
345 | 328 |
%% |
346 | 329 |
|
347 |
-\bibliographystyle{unsrturl} |
|
348 | 330 |
\bibliography{bsseq} |
349 | 331 |
|
350 | 332 |
\section*{SessionInfo} |