Browse code

fixed two bugs reported by Andreas Schoenegger, updated citation information

git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/bsseq@70488 bc3139a8-67e5-0310-9ffc-ced21a209358

khansen authored on 13/10/2012 16:48:16
Showing 10 changed files

... ...
@@ -1,5 +1,5 @@
1 1
 Package: bsseq
2
-Version: 0.7.0
2
+Version: 0.7.1
3 3
 Title: Analyze, manage and store bisulfite sequencing data
4 4
 Description: Tools for analyzing and visualizing bisulfite sequencing data
5 5
 Author: Kasper Daniel Hansen
... ...
@@ -193,6 +193,7 @@ setMethod("combine", signature(x = "BSseq", y = "BSseq"), function(x, y, ...) {
193 193
         stop("'x' and 'y' need to be smoothed on the same scale")
194 194
     phenoData <- combine(phenoData(x), phenoData(y))
195 195
     if(identical(granges(x), granges(y))) {
196
+        gr <- granges(x)
196 197
         M <- cbind(getBSseq(x, "M"), getBSseq(y, "M"))
197 198
         Cov <- cbind(getBSseq(x, "Cov"), getBSseq(y, "Cov"))
198 199
         if(!hasBeenSmoothed(x) || !hasBeenSmoothed(y)) {
... ...
@@ -60,6 +60,8 @@ getMeth <- function(BSseq, regions = NULL, type = c("smooth", "raw"),
60 60
     }
61 61
     stopifnot(is(BSseq, "BSseq"))
62 62
     type <- match.arg(type)
63
+    if(type == "smooth" & !hasBeenSmoothed(BSseq))
64
+        stop("'type=smooth' requires the object to have been smoothed.")
63 65
     what <- match.arg(what)
64 66
     z <- abs(qnorm((1 - alpha)/2, mean = 0, sd = 1))
65 67
     if(is.null(regions) && type == "smooth") {
... ...
@@ -336,7 +336,7 @@ read.umtab.chr <- function(files, sampleNames = NULL,
336 336
 read.bsmoothDirRaw <- function(dir, seqnames = NULL, keepCycle = FALSE, keepFilt = FALSE,
337 337
                                header = TRUE, verbose = TRUE) {
338 338
     dir <- normalizePath(dir)
339
-    inpattern <- "\\.cpg\\.tsv\\.gz$"
339
+    inpattern <- "\\.cpg\\.tsv(|\\.gz)$"
340 340
     if(length(dir) != 1 || !file.info(dir)$isdir)
341 341
         stop("argument 'dir' needs to be a single directory")
342 342
     allChrFiles <- list.files(dir, pattern = inpattern, full.names = TRUE)
... ...
@@ -347,8 +347,12 @@ read.bsmoothDirRaw <- function(dir, seqnames = NULL, keepCycle = FALSE, keepFilt
347 347
         return(NULL)
348 348
     }
349 349
     if(header) {
350
-        columnHeaders <- sapply(allChrFiles, function(file) {
351
-            out <- readLines(con <- gzfile(file), n = 1)
350
+        columnHeaders <- sapply(allChrFiles, function(thisfile) {
351
+            if(grepl("\\.gz$", thisfile))
352
+                con <- gzfile(thisfile)
353
+            else
354
+                con <- file(thisfile, open = "r")
355
+            out <- readLines(con, n = 1)
352 356
             close(con)
353 357
             out
354 358
         })
... ...
@@ -367,10 +371,14 @@ read.bsmoothDirRaw <- function(dir, seqnames = NULL, keepCycle = FALSE, keepFilt
367 371
         what0[c("Mcy", "Ucy")] <- replicate(2, NULL)
368 372
     if(!keepFilt) 
369 373
         what0[grep("^filt", names(what0))] <- replicate(length(grep("^filt", names(what0))), NULL)
370
-    outList <- lapply(allChrFiles, function(file) {
374
+    outList <- lapply(allChrFiles, function(thisfile) {
371 375
         if(verbose)
372
-            cat(sprintf("Reading '%s'\n", file))
373
-        out <- scan(con <- gzfile(file, open = "r"), skip = header, what = what0, sep = "\t",
376
+            cat(sprintf("Reading '%s'\n", thisfile))
377
+        if(grepl("\\.gz$", thisfile))
378
+            con <- gzfile(thisfile)
379
+        else
380
+            con <- file(thisfile)
381
+        out <- scan(con, skip = header, what = what0, sep = "\t",
374 382
                     quote = "", na.strings = "NA", quiet = TRUE)
375 383
         close(con)
376 384
         out
... ...
@@ -6,10 +6,11 @@ bibentry("Article",
6 6
          person(c("Rafael", "A."), "Irizarry")),
7 7
          journal = "Genome Biology",
8 8
          year = 2012,
9
-	 note = "In press",  
10
-         textVersion = 
11
-         paste("KD Hansen, B Langmead, and RA Irizarry",
12
-               "BSmooth: from whole genome bisulfite sequencing reads to differentially methylated regions", 
13
-"Genome Biology, In press (2012)")
9
+	 volume = "13",
10
+	 pages = "R83",
11
+  	 textVersion = 
12
+         paste("KD Hansen, B Langmead, and RA Irizarry.",
13
+               "BSmooth: from whole genome bisulfite sequencing reads to differentially methylated regions.", 
14
+"Genome Biology, 13:R83 (2012)")
14 15
 )
15 16
 
... ...
@@ -2,6 +2,20 @@
2 2
 \title{bsseq news}
3 3
 \encoding{UTF-8}
4 4
 
5
+\section{Version 0.7}{
6
+  \itemize{
7
+    \item Fixed a bug in getMeth, where type="raw" resulted in an error
8
+    for non-smoothed data objects.
9
+    \item Updated CITATION and citations in the vignettes.
10
+    \item Now read.bsmooth supports both gzipped and non-gzipped files,
11
+    whereas previously it assumed the output files to be gzipped.
12
+    Thanks to Andreas Schoenegger for reporting this problem.
13
+    \item Fixed a bug in combine() that also resulted in a bug in
14
+    read.bsmooth when multiple input directories were specified.  Bug
15
+    reported by Andreas Schoenegger.
16
+  }
17
+}
18
+
5 19
 \section{Version 0.4}{
6 20
   \itemize{
7 21
     \item Improved combine and fixed a bug.  Also added a non-exported
... ...
@@ -33,6 +33,10 @@ read.bsmooth(dirs, sampleNames = NULL, seqnames = NULL,
33 33
   \item{verbose}{Make the function verbose.}
34 34
 }
35 35
 \note{
36
+  Input files can either be gzipped or not.  Gzipping the input files
37
+  results in much greater speed of reading (and saves space), so it is
38
+  recommended.
39
+  
36 40
   We are working on making this function faster and less memory hungry.
37 41
 }
38 42
 \value{
... ...
@@ -19,10 +19,11 @@ options(width=70)
19 19
 \newcommand{\Rpackage}[1]{{\texttt{#1}}}
20 20
 \newcommand{\software}[1]{\textsf{#1}}
21 21
 \newcommand{\R}{\software{R}}
22
+\newcommand{\bold}[1]{\textbf{#1}}
22 23
 
23 24
 \title{The bsseq user's guide}
24 25
 \author{Kasper Daniel Hansen \\ \texttt{khansen@jhsph.edu}}
25
-\date{Modified: June 20, 2011.  Compiled: \today}
26
+\date{Modified: October 13, 2012.  Compiled: \today}
26 27
 \begin{document}
27 28
 \setlength{\parskip}{1\baselineskip}
28 29
 \setlength{\parindent}{0pt}
... ...
@@ -103,6 +104,14 @@ methylation (methylation that is higher in one condition/sample than in another)
103 104
 (methylation that is lower in one condition/sample than in another), and finally differentially
104 105
 methylated position (DMP) referring to a single loci.
105 106
 
107
+\subsubsection*{Citation}
108
+
109
+If you use this package, please cite our BSmooth paper:
110
+
111
+<<citation,echo=FALSE,results=tex>>=
112
+print(citation("bsseq"), style = "latex")
113
+@ 
114
+
106 115
 \section{Overview}
107 116
 
108 117
 The package assumes that the following data has been extracted from alignments:
... ...
@@ -288,12 +297,12 @@ computed for region-level summary estimates.  Examples
288 297
 
289 298
 <<getMeth>>=
290 299
 getMeth(BS.chr22, regions, type = "raw")
291
-getMeth(BS.chr22, regions[2], type = "smooth", what = "perBase")
300
+getMeth(BS.chr22, regions[2], type = "raw", what = "perBase")
292 301
 @ 
293 302
 
294 303
 \section{Reading data}
295 304
 
296
-\subsubsection{Alignment output from the BSmooth alignment suite}
305
+\subsubsection*{Alignment output from the BSmooth alignment suite}
297 306
 
298 307
 It is straightforward to read output files (summarized evidence) from the BSmooth alignment suite,
299 308
 using \Rcode{read.bsmooth}.  This function takes as argument a number of directories, usually
... ...
@@ -301,12 +310,13 @@ corresponding to samples, with the alignment output.  Sometimes, one might want
301 310
 chromosomes, which can be accomplished by the \Rcode{seqnames} argument.  Also, the default values
302 311
 of \Rcode{qualityCutoff = 20} ensures that we do not use methylation evidence with a base quality
303 312
 strictly lower than 20 (since we may not be confident whether the read really supports methylation
304
-or not).
313
+or not).  The function \Rcode{read.bsmooth} allows for both gzipped and non-gzipped input files. It
314
+is faster to read gzipped files, so we recommend gzipping post-alignment.
305 315
 
306 316
 During the development of BSmooth we experimented with a number of different output formats.  These
307 317
 legacy formats can be read with \Rcode{read.umtab} and \Rcode{read.umtab2}.
308 318
 
309
-\subsubsection{Alignment output from other aligners}
319
+\subsubsection*{Alignment output from other aligners}
310 320
 
311 321
 We are interested in adding additional parsers to the package; if your favorite alignment software
312 322
 is not supported, feel free to get in touch.
... ...
@@ -339,7 +349,7 @@ file may be found by
339 349
 system.file("scripts", "get_BS.chr22.R", package = "bsseq")
340 350
 @ 
341 351
 
342
-\section{Analysis}
352
+\section*{Analysis}
343 353
 
344 354
 Computing smoothed methylation levels is simple.  We subset \Rcode{BS.chr22} so we only smooth 1
345 355
 sample, for run-time purpose
... ...
@@ -4,7 +4,11 @@
4 4
                   methylated regions}},
5 5
   journal =	 {Genome Biology},
6 6
   year =	 {2012},
7
-  note =	 {In press}
7
+  volume =	 {13},
8
+  number =	 {10},
9
+  pages =	 {R83},
10
+  doi =		 {10.1186/gb-2012-13-10-r83},
11
+  pubmed =	 {23034175}
8 12
 }
9 13
 
10 14
 
... ...
@@ -25,7 +25,7 @@ options(width=70)
25 25
 
26 26
 \title{Analyzing WGBS with the bsseq package}
27 27
 \author{Kasper Daniel Hansen\\ \texttt{khansen@jhsph.edu}}
28
-\date{Modified: June 26, 2011.  Compiled: \today}
28
+\date{Modified: October 13, 2012.  Compiled: \today}
29 29
 \begin{document}
30 30
 
31 31
 \maketitle
... ...
@@ -62,6 +62,11 @@ BS.cancer.ex
62 62
 pData(BS.cancer.ex)
63 63
 @ 
64 64
 
65
+If you use this package, please cite our BSmooth paper
66
+<<citation,echo=FALSE,results=tex>>=
67
+print(citation("bsseq"), style = "latex")
68
+@
69
+
65 70
 \section*{Smoothing}
66 71
 
67 72
 The first step of the analysis is to smooth the data