git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/bsseq@70488 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -193,6 +193,7 @@ setMethod("combine", signature(x = "BSseq", y = "BSseq"), function(x, y, ...) { |
193 | 193 |
stop("'x' and 'y' need to be smoothed on the same scale") |
194 | 194 |
phenoData <- combine(phenoData(x), phenoData(y)) |
195 | 195 |
if(identical(granges(x), granges(y))) { |
196 |
+ gr <- granges(x) |
|
196 | 197 |
M <- cbind(getBSseq(x, "M"), getBSseq(y, "M")) |
197 | 198 |
Cov <- cbind(getBSseq(x, "Cov"), getBSseq(y, "Cov")) |
198 | 199 |
if(!hasBeenSmoothed(x) || !hasBeenSmoothed(y)) { |
... | ... |
@@ -60,6 +60,8 @@ getMeth <- function(BSseq, regions = NULL, type = c("smooth", "raw"), |
60 | 60 |
} |
61 | 61 |
stopifnot(is(BSseq, "BSseq")) |
62 | 62 |
type <- match.arg(type) |
63 |
+ if(type == "smooth" & !hasBeenSmoothed(BSseq)) |
|
64 |
+ stop("'type=smooth' requires the object to have been smoothed.") |
|
63 | 65 |
what <- match.arg(what) |
64 | 66 |
z <- abs(qnorm((1 - alpha)/2, mean = 0, sd = 1)) |
65 | 67 |
if(is.null(regions) && type == "smooth") { |
... | ... |
@@ -336,7 +336,7 @@ read.umtab.chr <- function(files, sampleNames = NULL, |
336 | 336 |
read.bsmoothDirRaw <- function(dir, seqnames = NULL, keepCycle = FALSE, keepFilt = FALSE, |
337 | 337 |
header = TRUE, verbose = TRUE) { |
338 | 338 |
dir <- normalizePath(dir) |
339 |
- inpattern <- "\\.cpg\\.tsv\\.gz$" |
|
339 |
+ inpattern <- "\\.cpg\\.tsv(|\\.gz)$" |
|
340 | 340 |
if(length(dir) != 1 || !file.info(dir)$isdir) |
341 | 341 |
stop("argument 'dir' needs to be a single directory") |
342 | 342 |
allChrFiles <- list.files(dir, pattern = inpattern, full.names = TRUE) |
... | ... |
@@ -347,8 +347,12 @@ read.bsmoothDirRaw <- function(dir, seqnames = NULL, keepCycle = FALSE, keepFilt |
347 | 347 |
return(NULL) |
348 | 348 |
} |
349 | 349 |
if(header) { |
350 |
- columnHeaders <- sapply(allChrFiles, function(file) { |
|
351 |
- out <- readLines(con <- gzfile(file), n = 1) |
|
350 |
+ columnHeaders <- sapply(allChrFiles, function(thisfile) { |
|
351 |
+ if(grepl("\\.gz$", thisfile)) |
|
352 |
+ con <- gzfile(thisfile) |
|
353 |
+ else |
|
354 |
+ con <- file(thisfile, open = "r") |
|
355 |
+ out <- readLines(con, n = 1) |
|
352 | 356 |
close(con) |
353 | 357 |
out |
354 | 358 |
}) |
... | ... |
@@ -367,10 +371,14 @@ read.bsmoothDirRaw <- function(dir, seqnames = NULL, keepCycle = FALSE, keepFilt |
367 | 371 |
what0[c("Mcy", "Ucy")] <- replicate(2, NULL) |
368 | 372 |
if(!keepFilt) |
369 | 373 |
what0[grep("^filt", names(what0))] <- replicate(length(grep("^filt", names(what0))), NULL) |
370 |
- outList <- lapply(allChrFiles, function(file) { |
|
374 |
+ outList <- lapply(allChrFiles, function(thisfile) { |
|
371 | 375 |
if(verbose) |
372 |
- cat(sprintf("Reading '%s'\n", file)) |
|
373 |
- out <- scan(con <- gzfile(file, open = "r"), skip = header, what = what0, sep = "\t", |
|
376 |
+ cat(sprintf("Reading '%s'\n", thisfile)) |
|
377 |
+ if(grepl("\\.gz$", thisfile)) |
|
378 |
+ con <- gzfile(thisfile) |
|
379 |
+ else |
|
380 |
+ con <- file(thisfile) |
|
381 |
+ out <- scan(con, skip = header, what = what0, sep = "\t", |
|
374 | 382 |
quote = "", na.strings = "NA", quiet = TRUE) |
375 | 383 |
close(con) |
376 | 384 |
out |
... | ... |
@@ -6,10 +6,11 @@ bibentry("Article", |
6 | 6 |
person(c("Rafael", "A."), "Irizarry")), |
7 | 7 |
journal = "Genome Biology", |
8 | 8 |
year = 2012, |
9 |
- note = "In press", |
|
10 |
- textVersion = |
|
11 |
- paste("KD Hansen, B Langmead, and RA Irizarry", |
|
12 |
- "BSmooth: from whole genome bisulfite sequencing reads to differentially methylated regions", |
|
13 |
-"Genome Biology, In press (2012)") |
|
9 |
+ volume = "13", |
|
10 |
+ pages = "R83", |
|
11 |
+ textVersion = |
|
12 |
+ paste("KD Hansen, B Langmead, and RA Irizarry.", |
|
13 |
+ "BSmooth: from whole genome bisulfite sequencing reads to differentially methylated regions.", |
|
14 |
+"Genome Biology, 13:R83 (2012)") |
|
14 | 15 |
) |
15 | 16 |
|
... | ... |
@@ -2,6 +2,20 @@ |
2 | 2 |
\title{bsseq news} |
3 | 3 |
\encoding{UTF-8} |
4 | 4 |
|
5 |
+\section{Version 0.7}{ |
|
6 |
+ \itemize{ |
|
7 |
+ \item Fixed a bug in getMeth, where type="raw" resulted in an error |
|
8 |
+ for non-smoothed data objects. |
|
9 |
+ \item Updated CITATION and citations in the vignettes. |
|
10 |
+ \item Now read.bsmooth supports both gzipped and non-gzipped files, |
|
11 |
+ whereas previously it assumed the output files to be gzipped. |
|
12 |
+ Thanks to Andreas Schoenegger for reporting this problem. |
|
13 |
+ \item Fixed a bug in combine() that also resulted in a bug in |
|
14 |
+ read.bsmooth when multiple input directories were specified. Bug |
|
15 |
+ reported by Andreas Schoenegger. |
|
16 |
+ } |
|
17 |
+} |
|
18 |
+ |
|
5 | 19 |
\section{Version 0.4}{ |
6 | 20 |
\itemize{ |
7 | 21 |
\item Improved combine and fixed a bug. Also added a non-exported |
... | ... |
@@ -33,6 +33,10 @@ read.bsmooth(dirs, sampleNames = NULL, seqnames = NULL, |
33 | 33 |
\item{verbose}{Make the function verbose.} |
34 | 34 |
} |
35 | 35 |
\note{ |
36 |
+ Input files can either be gzipped or not. Gzipping the input files |
|
37 |
+ results in much greater speed of reading (and saves space), so it is |
|
38 |
+ recommended. |
|
39 |
+ |
|
36 | 40 |
We are working on making this function faster and less memory hungry. |
37 | 41 |
} |
38 | 42 |
\value{ |
... | ... |
@@ -19,10 +19,11 @@ options(width=70) |
19 | 19 |
\newcommand{\Rpackage}[1]{{\texttt{#1}}} |
20 | 20 |
\newcommand{\software}[1]{\textsf{#1}} |
21 | 21 |
\newcommand{\R}{\software{R}} |
22 |
+\newcommand{\bold}[1]{\textbf{#1}} |
|
22 | 23 |
|
23 | 24 |
\title{The bsseq user's guide} |
24 | 25 |
\author{Kasper Daniel Hansen \\ \texttt{khansen@jhsph.edu}} |
25 |
-\date{Modified: June 20, 2011. Compiled: \today} |
|
26 |
+\date{Modified: October 13, 2012. Compiled: \today} |
|
26 | 27 |
\begin{document} |
27 | 28 |
\setlength{\parskip}{1\baselineskip} |
28 | 29 |
\setlength{\parindent}{0pt} |
... | ... |
@@ -103,6 +104,14 @@ methylation (methylation that is higher in one condition/sample than in another) |
103 | 104 |
(methylation that is lower in one condition/sample than in another), and finally differentially |
104 | 105 |
methylated position (DMP) referring to a single loci. |
105 | 106 |
|
107 |
+\subsubsection*{Citation} |
|
108 |
+ |
|
109 |
+If you use this package, please cite our BSmooth paper: |
|
110 |
+ |
|
111 |
+<<citation,echo=FALSE,results=tex>>= |
|
112 |
+print(citation("bsseq"), style = "latex") |
|
113 |
+@ |
|
114 |
+ |
|
106 | 115 |
\section{Overview} |
107 | 116 |
|
108 | 117 |
The package assumes that the following data has been extracted from alignments: |
... | ... |
@@ -288,12 +297,12 @@ computed for region-level summary estimates. Examples |
288 | 297 |
|
289 | 298 |
<<getMeth>>= |
290 | 299 |
getMeth(BS.chr22, regions, type = "raw") |
291 |
-getMeth(BS.chr22, regions[2], type = "smooth", what = "perBase") |
|
300 |
+getMeth(BS.chr22, regions[2], type = "raw", what = "perBase") |
|
292 | 301 |
@ |
293 | 302 |
|
294 | 303 |
\section{Reading data} |
295 | 304 |
|
296 |
-\subsubsection{Alignment output from the BSmooth alignment suite} |
|
305 |
+\subsubsection*{Alignment output from the BSmooth alignment suite} |
|
297 | 306 |
|
298 | 307 |
It is straightforward to read output files (summarized evidence) from the BSmooth alignment suite, |
299 | 308 |
using \Rcode{read.bsmooth}. This function takes as argument a number of directories, usually |
... | ... |
@@ -301,12 +310,13 @@ corresponding to samples, with the alignment output. Sometimes, one might want |
301 | 310 |
chromosomes, which can be accomplished by the \Rcode{seqnames} argument. Also, the default values |
302 | 311 |
of \Rcode{qualityCutoff = 20} ensures that we do not use methylation evidence with a base quality |
303 | 312 |
strictly lower than 20 (since we may not be confident whether the read really supports methylation |
304 |
-or not). |
|
313 |
+or not). The function \Rcode{read.bsmooth} allows for both gzipped and non-gzipped input files. It |
|
314 |
+is faster to read gzipped files, so we recommend gzipping post-alignment. |
|
305 | 315 |
|
306 | 316 |
During the development of BSmooth we experimented with a number of different output formats. These |
307 | 317 |
legacy formats can be read with \Rcode{read.umtab} and \Rcode{read.umtab2}. |
308 | 318 |
|
309 |
-\subsubsection{Alignment output from other aligners} |
|
319 |
+\subsubsection*{Alignment output from other aligners} |
|
310 | 320 |
|
311 | 321 |
We are interested in adding additional parsers to the package; if your favorite alignment software |
312 | 322 |
is not supported, feel free to get in touch. |
... | ... |
@@ -339,7 +349,7 @@ file may be found by |
339 | 349 |
system.file("scripts", "get_BS.chr22.R", package = "bsseq") |
340 | 350 |
@ |
341 | 351 |
|
342 |
-\section{Analysis} |
|
352 |
+\section*{Analysis} |
|
343 | 353 |
|
344 | 354 |
Computing smoothed methylation levels is simple. We subset \Rcode{BS.chr22} so we only smooth 1 |
345 | 355 |
sample, for run-time purpose |
... | ... |
@@ -25,7 +25,7 @@ options(width=70) |
25 | 25 |
|
26 | 26 |
\title{Analyzing WGBS with the bsseq package} |
27 | 27 |
\author{Kasper Daniel Hansen\\ \texttt{khansen@jhsph.edu}} |
28 |
-\date{Modified: June 26, 2011. Compiled: \today} |
|
28 |
+\date{Modified: October 13, 2012. Compiled: \today} |
|
29 | 29 |
\begin{document} |
30 | 30 |
|
31 | 31 |
\maketitle |
... | ... |
@@ -62,6 +62,11 @@ BS.cancer.ex |
62 | 62 |
pData(BS.cancer.ex) |
63 | 63 |
@ |
64 | 64 |
|
65 |
+If you use this package, please cite our BSmooth paper |
|
66 |
+<<citation,echo=FALSE,results=tex>>= |
|
67 |
+print(citation("bsseq"), style = "latex") |
|
68 |
+@ |
|
69 |
+ |
|
65 | 70 |
\section*{Smoothing} |
66 | 71 |
|
67 | 72 |
The first step of the analysis is to smooth the data |