Browse code

Update read.bismark() documentation

Peter Hickey authored on 12/06/2018 19:58:27
Showing 2 changed files

... ...
@@ -2,14 +2,16 @@
2 2
 
3 3
 # TODO: Test against some of Bismark's other output files. May need a stricter
4 4
 #       and more accurate heuristic to avoid 'passing' bad files.
5
-# NOTE: Not using readr::count_fields() because it is very slow on large,
6
-#       compressed files. This is because it reads the entire file into memory
7
-#       as a raw vector regardless of the value of `n_max` (see
8
-#       https://github.com/tidyverse/readr/issues/610). But good old
9
-#       utils::read.delim() doesn't have this limitation!
5
+# TODO: Test for 'bismark_methylation_extractor' and 'bedGraph' formats
6
+#       (https://github.com/FelixKrueger/Bismark/tree/master/Docs#output-1)
10 7
 .guessBismarkFileType <- function(files, n_max = 10L) {
11 8
     guessed_file_types <- setNames(vector("character", length(files)), files)
12 9
     for (file in files) {
10
+        # NOTE: Not using readr::count_fields() because it is very slow on
11
+        #       large, compressed files. This is because it reads the entire
12
+        #       file into memory as a raw vector regardless of the value of
13
+        #       `n_max` (see https://github.com/tidyverse/readr/issues/610).
14
+        #       But good old utils::read.delim() doesn't have this limitation!
13 15
         x <- read.delim(
14 16
             file = file,
15 17
             header = FALSE,
... ...
@@ -53,8 +55,9 @@
53 55
                              ...) {
54 56
     col_spec <- match.arg(col_spec)
55 57
     file_type <- .guessBismarkFileType(file)
58
+    # TODO: Test for 'bismark_methylation_extractor' and 'bedGraph' formats,
59
+    #       and error out (they're not supported).
56 60
     stopifnot(S4Vectors:::isTRUEorFALSE(check))
57
-    # TODO: Read in seqnames as a factor?
58 61
     if (file_type == "cov") {
59 62
         col_names <- c("seqnames", "start", "end", "beta", "M", "U")
60 63
         if (col_spec == "BSseq") {
... ...
@@ -353,9 +356,7 @@
353 356
 # TODO: Support BPREDO?
354 357
 # TODO: Support passing a colData so that metadata is automatically added to
355 358
 #       samples?
356
-# TODO: Properly pass down verbose as subverbose to functions that take verbose
357
-#       argument.
358
-# NOTE: `...` are used to pass filepath, chunkdim, level, etc. to
359
+# TODO: Document that `...` are used to pass filepath, chunkdim, level, etc. to
359 360
 #       HDF5RealizationSink().
360 361
 # TODO: (long term) Formalise `...` by something analogous to the
361 362
 #       BiocParallelParam class (RealizationSinkParam) i.e. something that
... ...
@@ -368,15 +369,15 @@
368 369
 #       is used in conjunction with files without strand information.
369 370
 read.bismark <- function(files,
370 371
                          sampleNames = basename(files),
372
+                         loci = NULL,
371 373
                          rmZeroCov = FALSE,
372 374
                          strandCollapse = TRUE,
373
-                         fileType = c("cov", "oldBedGraph", "cytosineReport"),
374
-                         loci = NULL,
375
-                         mc.cores = 1,
376 375
                          verbose = TRUE,
377 376
                          BPPARAM = bpparam(),
378 377
                          BACKEND = getRealizationBackend(),
379
-                         ...) {
378
+                         ...,
379
+                         fileType = c("cov", "oldBedGraph", "cytosineReport"),
380
+                         mc.cores = 1) {
380 381
     # Argument checks ----------------------------------------------------------
381 382
 
382 383
     # Check for deprecated arguments and issue warning(s) if found.
... ...
@@ -557,32 +558,20 @@ read.bismark <- function(files,
557 558
 
558 559
 # TODOs ------------------------------------------------------------------------
559 560
 
560
-# TODO: The documentation needs a complete overhaul and checked against Bismark
561
-#       docs (e.g., the description of the .cov file is wrong)
562 561
 # TODO: Consolidate use of message()/cat()/etc.
563 562
 # TODO: Add function like minfi::read.metharray.sheet()?
564 563
 # TODO: Should BACKEND really be an argument of read.bismark(); see related
565 564
 #       issue on minfi repo https://github.com/hansenlab/minfi/issues/140
566 565
 # TODO: May receive warning "In read_tokens_(data, tokenizer, col_specs, col_names,  ... : length of NULL cannot be changed". This is fixed in devel version of
567 566
 #       readr (https://github.com/tidyverse/readr/issues/833)
568
-# TODO: Decide whether to preserve verbose argument of several functions.
569
-# TODO: Document that if 'loci' is supplied then only loci in it will be
570
-#       retained.
571 567
 # TODO: Think about naming scheme for functions. Try to have the function that
572 568
 #       is bpapply()-ed have a similar name to its parent.
573 569
 # TODO: Document internal functions for my own sanity. Also, some may be useful
574 570
 #       to users of bsseq (although I still won't export these for the time
575 571
 #       being).
576
-# TODO: Allow user to specify HDF5 file and have both M and Cov written to that
577
-#       file.
578
-# TODO: Helper function to obtain CpX loci from BSgenome as GRanges to pass as
579
-#       'gr' argument in read.bismark().
580 572
 # TODO: (long term) Current implementation requires that user can load at least
581 573
 #       one sample's worth of data into memory per worker. Could instead read
582 574
 #       chunks of data, write to sink, load next chunk, etc.
583
-# TODO: Add big note to documentation that .cov file does not contain strand
584
-#       information, which means strandCollapse can't be used (unless 'gr' is
585
-#       supplied or one of the other files is a cytosineReport file).
586 575
 # TODO: Document that if 'gr' is NULL and any 'files' (especially the first
587 576
 #       file) are .cov files, then any loci present in the .cov files will have
588 577
 #       their strand set to *. If you are mixing and matching .cov and
... ...
@@ -1,4 +1,5 @@
1 1
 \name{read.bismark}
2
+% TODO: alias required?
2 3
 \alias{read.bismark}
3 4
 \title{
4 5
   Parsing output from the Bismark alignment suite.
... ...
@@ -7,125 +8,244 @@
7 8
   Parsing output from the Bismark alignment suite.
8 9
 }
9 10
 \usage{read.bismark(files,
10
-             sampleNames,
11
+             sampleNames = basename(files),
12
+             loci = NULL,
11 13
              rmZeroCov = FALSE,
12 14
              strandCollapse = TRUE,
13
-             fileType = c("cov", "oldBedGraph", "cytosineReport"),
14
-             mc.cores = 1,
15 15
              verbose = TRUE,
16
-             BACKEND = NULL)
16
+             BPPARAM = bpparam(),
17
+             BACKEND = getRealizationBackend(),
18
+             ...,
19
+             fileType = c("cov", "oldBedGraph", "cytosineReport"),
20
+             mc.cores = 1)
17 21
 }
18 22
 \arguments{
19
-  \item{files}{Input files. Each sample is in a different file. Input
20
-    files are created by running Bismark's methylation extractor; see Note for
21
-    details.}
22
-  \item{sampleNames}{sample names, based on the order of \code{files}.}
23
-  \item{rmZeroCov}{Should methylation loci that have zero coverage in
24
-    all samples be removed. This will result in a much smaller object if
25
-    the data originates from (targeted) capture bisulfite sequencing.}
26
-  \item{strandCollapse}{Should strand-symmetric methylation loci, e.g., CpGs,
27
-    be collapsed across strands. This option is only available if
28
-    \code{fileType = "cytosineReport"} since the other file types do not
29
-    contain the necessary strand information.}
30
-  \item{fileType}{The format of the input file; see Note for details.}
31
-  \item{mc.cores}{The number of cores used.  Note that setting
32
-    \code{mc.cores} to a value greater than 1 is not  supported on MS
33
-    Windows, see the help page for \code{mclapply}.}
34
-  \item{verbose}{Make the function verbose.}
35
-  \item{BACKEND}{The backend used for the 'M' and 'Cov' matrices. The default,
36
-  \code{NULL}, corresponds to using \link[base]{matrix} objects. See
37
-    \code{?DelayedArray::\link[DelayedArray]{setRealizationBackend}} for
38
-    alternative backends.}
23
+  \item{files}{The path to the files created by running Bismark's methylation
24
+    extractor, one sample per file.
25
+    Files ending in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip} will be automatically uncompressed.
26
+    We strongly recommend you use the 'genome wide cytosine report' output files.
27
+    See section 'File formats' for further details.}
28
+  \item{sampleNames}{A character vector of sample names, based on the order of \code{files}.}
29
+  \item{loci}{\code{NULL} (default) or a \linkS4class{GenomicRanges} instance
30
+    containing methylation loci (all with width equal to 1).
31
+    If \code{loci = NULL}, then \code{read.bismark()} will perform a first pass over the Bismark file to identify candidate loci.
32
+    If \code{loci} is a \linkS4class{GenomicRanges} instance, then these form the candidate loci.
33
+    In either case, the candidate loci will be filtered if
34
+    \code{rmZeroCov = TRUE} and collapsed if \code{strandCollapse = TRUE} to form the final set of methylation loci that form the \code{\link[SummarizedExperiment]{rowRanges}} of the returned \linkS4class{BSseq} object.
35
+    See section 'Efficient use of \code{read.bismark()}' for further details.}
36
+  \item{rmZeroCov}{A \code{logical(1)} indicating whether methylation loci that
37
+    have zero coverage in all samples be removed.
38
+    For several reasons, the default \code{rmZeroCov = FALSE} is recommended even in cases where you ultimately want to remove such loci.
39
+    See section 'Efficient use of \code{read.bismark()}' for further details.}
40
+  \item{strandCollapse}{A \code{logical(1)} indicating whether strand-symmetric
41
+    methylation loci (i.e. CpGs) should be collapsed across strands.
42
+    This is only applicable for stranded methylation loci, e.g., loci extracted
43
+    from 'genome wide cytosine reports' (see section 'File formats' for further details).}
44
+  \item{verbose}{A \code{logical(1)} indicating whether progress messages
45
+    should be printed (default \code{TRUE}).}
46
+\item{BPPARAM}{An optional \linkS4class{BiocParallelParam} instance
47
+    determining the parallel back-end to be used during evaluation.
48
+    Currently supported are \linkS4class{SerialParam} (Unix, Mac, Windows),
49
+    \linkS4class{MulticoreParam} (Unix and Mac), \linkS4class{SnowParam}
50
+    (Unix, Mac, and Windows, limited to single-machine clusters), and
51
+    \linkS4class{BatchJobsParam} (Unix, Mac, Windows, only with the in-memory
52
+    realization backend).
53
+    See sections 'Parallelization and progress  monitoring' and 'Realization backends' for further details.}
54
+  \item{BACKEND}{\code{NULL} or a single string specifying the name of the
55
+    realization backend.
56
+    When the backend is set to \code{NULL}, the \code{M} and \code{Cov} assays
57
+    are realized in memory as ordinary matrices, otherwise these are realized with the given \code{BACKEND}.
58
+    See section 'Realization backends' for further details.}
59
+  \item{...}{Additional arguments passed to the \linkS4class{RealizationSink}
60
+    constructor.
61
+    See section 'Realization backends' for further details.}
62
+  \item{fileType}{\strong{Deprecated}.
63
+    The format of each file is now automatically detected using the internal function \code{bsseq:::.guessBismarkFileType()}.}
64
+  \item{mc.cores}{\strong{Deprecated}.
65
+    See section 'Parallelization and progress monitoring' for further details.}
39 66
 }
40
-\note{
41
-  Input files can either be gzipped or not.
42
-
43
-  The user must specify the relevant file format via the \code{fileType}
44
-  argument. The format of the output of the Bismark alignment suite will depend
45
-  on the version of Bismark and on various user-specified options. Please
46
-  consult the Bismark documentation and the Bismark RELEASE NOTES
47
-  (\url{http://www.bioinformatics.bbsrc.ac.uk/projects/bismark/RELEASE_NOTES.txt})
48
-  for the definitive list of changes between versions. When possible, it is
49
-  strongly recommended that you use the most recent version of Bismark.
50
-
51
-  The "\code{cov}" and "\code{oldBedGraph}" formats both have six columns
52
-  ("\code{chromosome}", "\code{position}", "\code{strand}",
53
-  "\code{methylation percentage}", "\code{count methylated}",
54
-  "\code{count unmethylated}"). If you are using a recent version of Bismark
55
-  (\code{v>=0.10.0}) then the standard file extension for this file is
56
-  "\code{.cov}". If, however, you are using an older version of Bismark
57
-  (\code{v<0.10.0}) then the file extension will be "\code{.bedGraph}". Please
58
-  note that the "\code{.bedGraph}" file created in recent versions of Bismark
59
-  (\code{v>=0.10.0}) is \strong{not} suitable for analysis with bsseq because
60
-  it only contains the "\code{methylation percentage}" and not
61
-  "\code{count methylated}" nor "\code{count unmethylated}".
62
-
63
-  The "\code{cytosineReport}" format has seven columns
64
-  ("\code{chromosome}", "\code{position}", "\code{strand}",
65
-  "\code{count methylated}", "\code{count unmethylated}", "\code{C-context}",
66
-  "\code{trinucleotide context}").
67
-  There is no standard file extension for this file. The "\code{C-context}" and
68
-  "\code{trinucleotide context}" columns are not currently used by bsseq.
69
-
70
-  The following is a list of some issues to be aware of when using
71
-  output from Bismark's methylation extractor:
67
+
68
+\section{Efficient use of \code{read.bismark()}}{
69
+  We recommend the following to achieve fast and efficient importing of Bismark files:
72 70
 
73 71
   \itemize{
74
-    \item The program to extract methylation counts was named
75
-    \code{methylation_extractor} in older versions of Bismark (\code{v<0.8.0})
76
-    and re-named \code{bismark_methylation_extractor} in recent versions of
77
-    Bismark (\code{v>=0.8.0}). Furthermore, very old versions of Bismark
78
-    (\code{v<0.7.7}) required that user run a separate script (called
79
-    something like \code{genome_methylation_bismark2bedGraph}) to create the
80
-    six-column "\code{cov}"/"\code{oldBedGraph}" file.
81
-    \item The \code{--counts} and \code{--bedGraph} arguments must be supplied
82
-    to \code{methylation_extractor}/\code{bismark_methylation_extractor} in
83
-    order to use the output with \code{bsseq::read.bismark()}.
84
-    \item The genomic co-ordinates of the Bismark output file may be zero-based
85
-    or one-based depending on whether the \code{--zero_based} argument is used.
86
-    Furthermore, the default co-ordinate system varies by version of Bismark.
87
-    bsseq makes no assumptions about the basis of the genomic co-ordinates and
88
-    it is left to the user to ensure that the appropriate basis is used in the
89
-    analysis of their data. Since Bioconductor packages and
90
-    \code{\linkS4class{GRanges}} use one-based co-ordinates, it
91
-    is recommended that your Bismark files are also one-based.
72
+    \item Specify the set of methylation loci via the \code{loci} argument.
73
+    \item Leave \code{rmZeroCov = FALSE}.
74
+    \item Use a \code{BPPARAM} with a large number of workers (cores).
75
+    \item Use \code{BACKEND = "HDF5Array"}.
76
+  }
77
+
78
+  Each point is discussed below.
79
+  \subsection{Specifying \code{loci}}{
80
+    Specifying the set of methylation loci via the \code{loci} argument means that \code{read.bismark()} does not need to first parse all \code{files} to identify the set of candidate loci.
81
+    Provided that \code{rmZeroCov = FALSE}, this means that each file is only read once.
82
+    This may be a considerable saving when there are a large number of \code{files}.
83
+
84
+    \strong{If you are unsure whether the below-described shortcuts apply to your data, leave \code{loci = NULL} and let \code{read.bismark()} identify the set of candidate loci from \code{files}.}
85
+
86
+    If all \code{files} are 'genome wide cytosine reports' for samples aligned to the same reference genome, then all \code{files} contain the exact same set of methylation loci.
87
+    In this case, you may wish to first construct \code{loci} using the internal function \code{bsseq:::.readBismarkAsFWGRanges()} applied to a single file, e.g., \code{loci = bsseq:::.readBismarkAsFWGRanges(files[1], rmZeroCov, strandCollapse)}.
88
+
89
+    % TODO: Uncomment once findCytosines is exported and documented.
90
+    % Alternatively, you may wish to use the \code{\link{findCytosines}()} function to construct all methylation loci of interest (e.g., all CpGs) in your reference genome and then pass this via the \code{loci} argument.
91
+  }
92
+
93
+  \subsection{Leaving \code{rmZeroCov = FALSE}}{
94
+    If you set \code{rmZeroCov = TRUE}, then \code{read.bismark()} must first parse all the \code{files} to identify which loci have zero coverage in all samples and then filter these out from the set of candidate loci.
95
+    \strong{This will happen regardless of whether \code{loci} is \code{NULL} or is supplied as a \linkS4class{GenomicRanges} of candidate loci.}
96
+
97
+    Furthermore, any coverage-based filtering of methylation loci is best left until you have constructed your final \linkS4class{BSseq} object.
98
+    In our experience, the final \linkS4class{BSseq} object is often the product of combining multiple \linkS4class{BSseq} objects, each constructed with a separate call to \code{read.bismark()}.
99
+    In such cases, it is premature to use \code{rmZeroCov = TRUE} when running each \code{read.bismark()} and combining these objects will regretably often lead to an inefficiently constructed final \linkS4class{BSseq} object.
92 100
   }
101
+
102
+  \subsection{Using a \code{BPPARAM} with a large number of workers (cores)}{
103
+    Each file can be processed on its own, so you can process in parallel as many files as you have workers. We have good experience using as many as 40 cores via \code{BPPARAM = MulticoreParam(workers = 40)}.}
104
+
105
+  \subsection{Using \code{BACKEND = "HDF5Array"}}{
106
+    By using the HDF5Array realization backend from \pkg{HDF5Array}, we reduce the amount of data that is kept in-memory at any one time.
107
+    Once each file is parsed, the data are written to the HDF5 file and are no longer needed in-memory.
108
+    When combined with multiple workers (cores), this means that each file will only need to read and retain in-memory 1 sample's worth of data at a time.
109
+
110
+    Conversely, if you opt for all data to be in-memory (via \code{BACKEND = NULL}), then each worker will pass each file's data back to the main process and memory usage will steadily accumulate to often unreasonable levels.
111
+    }
93 112
 }
113
+
114
+\section{File formats}{
115
+  The format of each file is automatically detected using the internal function \code{bsseq:::.guessBismarkFileType()}.
116
+  Files ending in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip} will be automatically uncompressed.
117
+  \subsection{Supported file formats}{
118
+     We strongly recommend that you use the 'genome wide cytosine report' output format
119
+     (\url{https://github.com/FelixKrueger/Bismark/tree/master/Docs#the-genome-wide-cytosine-report-optional-is-tab-delimited-in-the-following-format-1-based-coords}) as this includes trand information for each locus.
120
+     The 'coverage' output format (\url{https://github.com/FelixKrueger/Bismark/tree/master/Docs#the-coverage-output-looks-like-this-tab-delimited-1-based-genomic-coords}) is also supported, although all loci from these files will have their \code{\link[BiocGenerics]{strand}} set to \code{*} in the returned \linkS4class{BSseq} object because this file format does not include strand information.
121
+  }
122
+
123
+  \subsection{Unsupported file formats}{
124
+     Neither the 'bedGraph' output format (\url{https://github.com/FelixKrueger/Bismark/tree/master/Docs#the-bedgraph-output-optional-looks-like-this-tab-delimited-0-based-start-1-based-end-coords}) nor the 'bismark_methylation_extractor' output format (\url{https://github.com/FelixKrueger/Bismark/tree/master/Docs#the-bismark_methylation_extractor-output-is-in-the-form-tab-delimited-1-based-coords}) are supported.
125
+     The former does not include the required counts of methylated and unmethylated reads hile the is an intermediate file containing read-level, rather than locus-level, data on methylation.
126
+  }
127
+  \subsection{One-based vs. zero-based genomic co-ordinates}{
128
+    The genomic co-ordinates of the Bismark output files may be zero-based or one-based depending on whether the \code{--zero_based} argument was used when running Bismark's methylation extractor.
129
+    Furthermore, the default co-ordinate counting system varies by version of Bismark.
130
+    \pkg{bsseq} makes no assumptions about the basis of the genomic co-ordinates and it is left to the user to ensure that the appropriate basis is used in the analysis of their data.
131
+
132
+    Since Bioconductor packages typically use one-based co-ordinates, we strongly recommend that your Bismark files are also one-based.
133
+  }
134
+}
135
+
136
+\section{Realization backends}{
137
+  The \code{read.bismark()} function creates a \linkS4class{BSseq} object with two assays, \code{M} and \code{Cov}.
138
+  The choice of \emph{realization backend} controls whether these assays are stored in-memory as an ordinary \link[base]{matrix} or on-disk as a \linkS4class{HDF5Array}, for example.
139
+  The choice of realization backend is controlled by the \code{BACKEND} argument, which defaults to the current value of \code{DelayedArray::\link[DelayedArray]{getRealizationBackend()}}.
140
+
141
+  \code{read.bismark()} supports the following realization backends:
142
+
143
+  \itemize{
144
+    \item \code{NULL} (in-memory): This stores each new assay in-memory using an ordinary \link[base]{matrix}.
145
+    \item \code{HDF5Array} (on-disk): This stores each new assay on-disk in a HDF5 file using an \linkS4class{HDF5Matrix} from \pkg{HDF5Array}.
146
+  }
147
+
148
+  Please note that certain combinations of realization backend and parallelization backend are currently not supported.
149
+  For example, the \linkS4class{HDF5Array} realization backend is currently only compatible when used with a single-machine parallelization backend (i.e. it is not compatible
150
+  with a \linkS4class{SnowParam} that specifies an \emph{ad hoc} cluster of
151
+  \strong{multiple} machines).
152
+  % TODO: Test this
153
+  \code{BSmooth()} will issue an error when given such incompatible realization and parallelization backends.
154
+
155
+  Additional arguments related to the realization backend can be passed via the \code{...} argument.
156
+  These arguments must be named and are passed to the relevant \linkS4class{RealizationSink} constructor.
157
+  For example, the \code{...} argument can be used to specify the path to the HDF5 file to be
158
+  used by \code{BSmooth()}.
159
+  Please see the examples at the bottom of the page.
160
+}
161
+
162
+\section{Parallelization and progress monitoring}{
163
+  \code{read.bismark()} now uses the \pkg{BiocParallel} package to implement
164
+  parallelization. This brings some notable improvements:
165
+
166
+  \itemize{
167
+    \item Imported files can now be written directly to an on-disk
168
+      realization backend by the worker. This dramatically reduces memory
169
+      usage compared to previous versions of \pkg{bsseq} that required all
170
+      results be retained in-memory.
171
+    \item Parallelization is now supported on Windows through the use of a
172
+    \linkS4class{SnowParam} object as the value of \code{BPPARAM}.
173
+    % TODO: Uncomment when/if implemented.
174
+    %\item Improved error handling makes it possible to gracefully resume
175
+    %\code{read.bismark()} jobs that encountered errors by re-doing only the necessary
176
+    %tasks.
177
+    \item Detailed and extensive job logging facilities.
178
+  }
179
+
180
+  All parallelization options are controlled via the \code{BPPARAM} argument.
181
+  In general, we recommend that users combine multicore (single-machine)
182
+  parallelization with an on-disk realization backend (see section,
183
+  'Realization backend'). For Unix and Mac users, this means using
184
+  a \linkS4class{MulticoreParam}. For Windows users, this means using a
185
+  single-machine \linkS4class{SnowParam}. Please consult the \pkg{BiocParallel}
186
+  documentation to take full advantage of the more advanced features.
187
+
188
+  \subsection{Deprecated arguments}{
189
+    \code{mc.cores} is deprecated and will be removed in subsequent releases of \pkg{bsseq}.
190
+    This argument was necessary when \code{read.bismark()} used the \pkg{parallel} package to implement parallelization, but this functionality is superseded by the aforementioned use of \pkg{BiocParallel}.
191
+    We recommend that users who previously relied on these arguments switch to \code{BPPARAM = MulticoreParam(workers = mc.cores, progressbar = TRUE)}.
192
+  }
193
+
194
+  \subsection{Progress monitoring}{
195
+    A useful feature of \pkg{BiocParallel} are progress bars to monitor the
196
+    status of long-running jobs, such as \code{BSmooth()}. Progress bars are
197
+    controlled via the \code{progressbar} argument in the
198
+    \linkS4class{BiocParallelParam} constructor. Progress bars replace the
199
+    use of the deprecated \code{verbose} argument to print out information on
200
+    the status of \code{BSmooth()}.
201
+
202
+    \pkg{BiocParallel} also supports extensive and detailed logging facilities.
203
+    Please consult the \pkg{BiocParallel} documentation to take full advantage
204
+    these advanced features.
205
+  }
206
+}
207
+
94 208
 \value{
95
-  An object of class \code{\link{BSseq}}.
209
+  A \linkS4class{BSseq} object.
96 210
 }
97 211
 
98 212
 \seealso{
99
-  \code{\link{read.bsmooth}} for parsing output from the BSmooth
100
-  alignment suite. \code{\link{read.umtab}} for parsing legacy (old)
101
-  formats from the  BSmooth alignment suite.
102
-  \code{\link{collapseBSseq}} for collapse (merging or summing) the
103
-  data in two different directories.
213
+  \code{\link{read.bsmooth}()} for parsing output from the BSmooth
214
+  alignment suite.
215
+  \code{\link{read.umtab}()} for parsing legacy (old) formats from the BSmooth alignment suite.
216
+  \code{\link{collapseBSseq}()} for collapsing (aggregating) data from sample's with multiple Bismark methylation extractor files (e.g., technical replicates).
104 217
 }
105 218
 
106 219
 \examples{
220
+  # Run read.bismark() on a single sample to construct a matrix-backed BSseq
221
+  # object.
107 222
   infile <- system.file("extdata/test_data.fastq_bismark.bismark.cov.gz",
108 223
                         package = 'bsseq')
109
-  bismarkBSseq <- read.bismark(files = infile,
110
-                               sampleNames = "test_data",
111
-                               rmZeroCov = FALSE,
112
-                               strandCollapse = FALSE,
113
-                               fileType = "cov",
114
-                               verbose = TRUE)
115
-  bismarkBSseq
116
-
117
-  #-----------------------------------------------------------------------------
118
-  # An example constructing a HDF5Array-backed BSseq object
119
-  #
120
-  library(HDF5Array)
121
-  # See ?DelayedArray::setRealizationBackend for details
122
-  hdf5_bismarkBSseq <- read.bismark(files = infile,
123
-                                    sampleNames = "test_data",
124
-                                    rmZeroCov = FALSE,
125
-                                    strandCollapse = FALSE,
126
-                                    fileType = "cov",
127
-                                    verbose = TRUE,
128
-                                    BACKEND = "HDF5Array")
224
+  bsseq <- read.bismark(files = infile,
225
+                        sampleNames = "test_data",
226
+                        rmZeroCov = FALSE,
227
+                        strandCollapse = FALSE,
228
+                        verbose = TRUE)
229
+  # This is a matrix-backed BSseq object.
230
+  sapply(assays(bsseq, withDimnames = FALSE), class)
231
+  bsseq
232
+
233
+  \dontrun{
234
+  # Run read.bismark() on a single sample to construct a HDF5Array-backed BSseq
235
+  # object (with data written to the file 'read.bismark_example.h5')
236
+  bsseq <- read.bismark(files = infile,
237
+                        sampleNames = "test_data",
238
+                        rmZeroCov = FALSE,
239
+                        strandCollapse = FALSE,
240
+                        verbose = TRUE,
241
+                        BACKEND = "HDF5Array",
242
+                        filepath = "read.bismark_example.h5")
243
+  # This is a HDF5Array-backed BSseq object.
244
+  sapply(assays(bsseq, withDimnames = FALSE), class)
245
+  # The 'M' and 'Cov' assays are in the HDF5 file 'read.bismark_example.h5' (in
246
+  # the current working directory).
247
+  sapply(assays(bsseq, withDimnames = FALSE), path)
248
+  }
129 249
 }
130 250
 
131 251
 \author{