...
|
...
|
@@ -1,4 +1,5 @@
|
1
|
1
|
\name{read.bismark}
|
|
2
|
+% TODO: alias required?
|
2
|
3
|
\alias{read.bismark}
|
3
|
4
|
\title{
|
4
|
5
|
Parsing output from the Bismark alignment suite.
|
...
|
...
|
@@ -7,125 +8,244 @@
|
7
|
8
|
Parsing output from the Bismark alignment suite.
|
8
|
9
|
}
|
9
|
10
|
\usage{read.bismark(files,
|
10
|
|
- sampleNames,
|
|
11
|
+ sampleNames = basename(files),
|
|
12
|
+ loci = NULL,
|
11
|
13
|
rmZeroCov = FALSE,
|
12
|
14
|
strandCollapse = TRUE,
|
13
|
|
- fileType = c("cov", "oldBedGraph", "cytosineReport"),
|
14
|
|
- mc.cores = 1,
|
15
|
15
|
verbose = TRUE,
|
16
|
|
- BACKEND = NULL)
|
|
16
|
+ BPPARAM = bpparam(),
|
|
17
|
+ BACKEND = getRealizationBackend(),
|
|
18
|
+ ...,
|
|
19
|
+ fileType = c("cov", "oldBedGraph", "cytosineReport"),
|
|
20
|
+ mc.cores = 1)
|
17
|
21
|
}
|
18
|
22
|
\arguments{
|
19
|
|
- \item{files}{Input files. Each sample is in a different file. Input
|
20
|
|
- files are created by running Bismark's methylation extractor; see Note for
|
21
|
|
- details.}
|
22
|
|
- \item{sampleNames}{sample names, based on the order of \code{files}.}
|
23
|
|
- \item{rmZeroCov}{Should methylation loci that have zero coverage in
|
24
|
|
- all samples be removed. This will result in a much smaller object if
|
25
|
|
- the data originates from (targeted) capture bisulfite sequencing.}
|
26
|
|
- \item{strandCollapse}{Should strand-symmetric methylation loci, e.g., CpGs,
|
27
|
|
- be collapsed across strands. This option is only available if
|
28
|
|
- \code{fileType = "cytosineReport"} since the other file types do not
|
29
|
|
- contain the necessary strand information.}
|
30
|
|
- \item{fileType}{The format of the input file; see Note for details.}
|
31
|
|
- \item{mc.cores}{The number of cores used. Note that setting
|
32
|
|
- \code{mc.cores} to a value greater than 1 is not supported on MS
|
33
|
|
- Windows, see the help page for \code{mclapply}.}
|
34
|
|
- \item{verbose}{Make the function verbose.}
|
35
|
|
- \item{BACKEND}{The backend used for the 'M' and 'Cov' matrices. The default,
|
36
|
|
- \code{NULL}, corresponds to using \link[base]{matrix} objects. See
|
37
|
|
- \code{?DelayedArray::\link[DelayedArray]{setRealizationBackend}} for
|
38
|
|
- alternative backends.}
|
|
23
|
+ \item{files}{The path to the files created by running Bismark's methylation
|
|
24
|
+ extractor, one sample per file.
|
|
25
|
+ Files ending in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip} will be automatically uncompressed.
|
|
26
|
+ We strongly recommend you use the 'genome wide cytosine report' output files.
|
|
27
|
+ See section 'File formats' for further details.}
|
|
28
|
+ \item{sampleNames}{A character vector of sample names, based on the order of \code{files}.}
|
|
29
|
+ \item{loci}{\code{NULL} (default) or a \linkS4class{GenomicRanges} instance
|
|
30
|
+ containing methylation loci (all with width equal to 1).
|
|
31
|
+ If \code{loci = NULL}, then \code{read.bismark()} will perform a first pass over the Bismark file to identify candidate loci.
|
|
32
|
+ If \code{loci} is a \linkS4class{GenomicRanges} instance, then these form the candidate loci.
|
|
33
|
+ In either case, the candidate loci will be filtered if
|
|
34
|
+ \code{rmZeroCov = TRUE} and collapsed if \code{strandCollapse = TRUE} to form the final set of methylation loci that form the \code{\link[SummarizedExperiment]{rowRanges}} of the returned \linkS4class{BSseq} object.
|
|
35
|
+ See section 'Efficient use of \code{read.bismark()}' for further details.}
|
|
36
|
+ \item{rmZeroCov}{A \code{logical(1)} indicating whether methylation loci that
|
|
37
|
+ have zero coverage in all samples be removed.
|
|
38
|
+ For several reasons, the default \code{rmZeroCov = FALSE} is recommended even in cases where you ultimately want to remove such loci.
|
|
39
|
+ See section 'Efficient use of \code{read.bismark()}' for further details.}
|
|
40
|
+ \item{strandCollapse}{A \code{logical(1)} indicating whether strand-symmetric
|
|
41
|
+ methylation loci (i.e. CpGs) should be collapsed across strands.
|
|
42
|
+ This is only applicable for stranded methylation loci, e.g., loci extracted
|
|
43
|
+ from 'genome wide cytosine reports' (see section 'File formats' for further details).}
|
|
44
|
+ \item{verbose}{A \code{logical(1)} indicating whether progress messages
|
|
45
|
+ should be printed (default \code{TRUE}).}
|
|
46
|
+\item{BPPARAM}{An optional \linkS4class{BiocParallelParam} instance
|
|
47
|
+ determining the parallel back-end to be used during evaluation.
|
|
48
|
+ Currently supported are \linkS4class{SerialParam} (Unix, Mac, Windows),
|
|
49
|
+ \linkS4class{MulticoreParam} (Unix and Mac), \linkS4class{SnowParam}
|
|
50
|
+ (Unix, Mac, and Windows, limited to single-machine clusters), and
|
|
51
|
+ \linkS4class{BatchJobsParam} (Unix, Mac, Windows, only with the in-memory
|
|
52
|
+ realization backend).
|
|
53
|
+ See sections 'Parallelization and progress monitoring' and 'Realization backends' for further details.}
|
|
54
|
+ \item{BACKEND}{\code{NULL} or a single string specifying the name of the
|
|
55
|
+ realization backend.
|
|
56
|
+ When the backend is set to \code{NULL}, the \code{M} and \code{Cov} assays
|
|
57
|
+ are realized in memory as ordinary matrices, otherwise these are realized with the given \code{BACKEND}.
|
|
58
|
+ See section 'Realization backends' for further details.}
|
|
59
|
+ \item{...}{Additional arguments passed to the \linkS4class{RealizationSink}
|
|
60
|
+ constructor.
|
|
61
|
+ See section 'Realization backends' for further details.}
|
|
62
|
+ \item{fileType}{\strong{Deprecated}.
|
|
63
|
+ The format of each file is now automatically detected using the internal function \code{bsseq:::.guessBismarkFileType()}.}
|
|
64
|
+ \item{mc.cores}{\strong{Deprecated}.
|
|
65
|
+ See section 'Parallelization and progress monitoring' for further details.}
|
39
|
66
|
}
|
40
|
|
-\note{
|
41
|
|
- Input files can either be gzipped or not.
|
42
|
|
-
|
43
|
|
- The user must specify the relevant file format via the \code{fileType}
|
44
|
|
- argument. The format of the output of the Bismark alignment suite will depend
|
45
|
|
- on the version of Bismark and on various user-specified options. Please
|
46
|
|
- consult the Bismark documentation and the Bismark RELEASE NOTES
|
47
|
|
- (\url{http://www.bioinformatics.bbsrc.ac.uk/projects/bismark/RELEASE_NOTES.txt})
|
48
|
|
- for the definitive list of changes between versions. When possible, it is
|
49
|
|
- strongly recommended that you use the most recent version of Bismark.
|
50
|
|
-
|
51
|
|
- The "\code{cov}" and "\code{oldBedGraph}" formats both have six columns
|
52
|
|
- ("\code{chromosome}", "\code{position}", "\code{strand}",
|
53
|
|
- "\code{methylation percentage}", "\code{count methylated}",
|
54
|
|
- "\code{count unmethylated}"). If you are using a recent version of Bismark
|
55
|
|
- (\code{v>=0.10.0}) then the standard file extension for this file is
|
56
|
|
- "\code{.cov}". If, however, you are using an older version of Bismark
|
57
|
|
- (\code{v<0.10.0}) then the file extension will be "\code{.bedGraph}". Please
|
58
|
|
- note that the "\code{.bedGraph}" file created in recent versions of Bismark
|
59
|
|
- (\code{v>=0.10.0}) is \strong{not} suitable for analysis with bsseq because
|
60
|
|
- it only contains the "\code{methylation percentage}" and not
|
61
|
|
- "\code{count methylated}" nor "\code{count unmethylated}".
|
62
|
|
-
|
63
|
|
- The "\code{cytosineReport}" format has seven columns
|
64
|
|
- ("\code{chromosome}", "\code{position}", "\code{strand}",
|
65
|
|
- "\code{count methylated}", "\code{count unmethylated}", "\code{C-context}",
|
66
|
|
- "\code{trinucleotide context}").
|
67
|
|
- There is no standard file extension for this file. The "\code{C-context}" and
|
68
|
|
- "\code{trinucleotide context}" columns are not currently used by bsseq.
|
69
|
|
-
|
70
|
|
- The following is a list of some issues to be aware of when using
|
71
|
|
- output from Bismark's methylation extractor:
|
|
67
|
+
|
|
68
|
+\section{Efficient use of \code{read.bismark()}}{
|
|
69
|
+ We recommend the following to achieve fast and efficient importing of Bismark files:
|
72
|
70
|
|
73
|
71
|
\itemize{
|
74
|
|
- \item The program to extract methylation counts was named
|
75
|
|
- \code{methylation_extractor} in older versions of Bismark (\code{v<0.8.0})
|
76
|
|
- and re-named \code{bismark_methylation_extractor} in recent versions of
|
77
|
|
- Bismark (\code{v>=0.8.0}). Furthermore, very old versions of Bismark
|
78
|
|
- (\code{v<0.7.7}) required that user run a separate script (called
|
79
|
|
- something like \code{genome_methylation_bismark2bedGraph}) to create the
|
80
|
|
- six-column "\code{cov}"/"\code{oldBedGraph}" file.
|
81
|
|
- \item The \code{--counts} and \code{--bedGraph} arguments must be supplied
|
82
|
|
- to \code{methylation_extractor}/\code{bismark_methylation_extractor} in
|
83
|
|
- order to use the output with \code{bsseq::read.bismark()}.
|
84
|
|
- \item The genomic co-ordinates of the Bismark output file may be zero-based
|
85
|
|
- or one-based depending on whether the \code{--zero_based} argument is used.
|
86
|
|
- Furthermore, the default co-ordinate system varies by version of Bismark.
|
87
|
|
- bsseq makes no assumptions about the basis of the genomic co-ordinates and
|
88
|
|
- it is left to the user to ensure that the appropriate basis is used in the
|
89
|
|
- analysis of their data. Since Bioconductor packages and
|
90
|
|
- \code{\linkS4class{GRanges}} use one-based co-ordinates, it
|
91
|
|
- is recommended that your Bismark files are also one-based.
|
|
72
|
+ \item Specify the set of methylation loci via the \code{loci} argument.
|
|
73
|
+ \item Leave \code{rmZeroCov = FALSE}.
|
|
74
|
+ \item Use a \code{BPPARAM} with a large number of workers (cores).
|
|
75
|
+ \item Use \code{BACKEND = "HDF5Array"}.
|
|
76
|
+ }
|
|
77
|
+
|
|
78
|
+ Each point is discussed below.
|
|
79
|
+ \subsection{Specifying \code{loci}}{
|
|
80
|
+ Specifying the set of methylation loci via the \code{loci} argument means that \code{read.bismark()} does not need to first parse all \code{files} to identify the set of candidate loci.
|
|
81
|
+ Provided that \code{rmZeroCov = FALSE}, this means that each file is only read once.
|
|
82
|
+ This may be a considerable saving when there are a large number of \code{files}.
|
|
83
|
+
|
|
84
|
+ \strong{If you are unsure whether the below-described shortcuts apply to your data, leave \code{loci = NULL} and let \code{read.bismark()} identify the set of candidate loci from \code{files}.}
|
|
85
|
+
|
|
86
|
+ If all \code{files} are 'genome wide cytosine reports' for samples aligned to the same reference genome, then all \code{files} contain the exact same set of methylation loci.
|
|
87
|
+ In this case, you may wish to first construct \code{loci} using the internal function \code{bsseq:::.readBismarkAsFWGRanges()} applied to a single file, e.g., \code{loci = bsseq:::.readBismarkAsFWGRanges(files[1], rmZeroCov, strandCollapse)}.
|
|
88
|
+
|
|
89
|
+ % TODO: Uncomment once findCytosines is exported and documented.
|
|
90
|
+ % Alternatively, you may wish to use the \code{\link{findCytosines}()} function to construct all methylation loci of interest (e.g., all CpGs) in your reference genome and then pass this via the \code{loci} argument.
|
|
91
|
+ }
|
|
92
|
+
|
|
93
|
+ \subsection{Leaving \code{rmZeroCov = FALSE}}{
|
|
94
|
+ If you set \code{rmZeroCov = TRUE}, then \code{read.bismark()} must first parse all the \code{files} to identify which loci have zero coverage in all samples and then filter these out from the set of candidate loci.
|
|
95
|
+ \strong{This will happen regardless of whether \code{loci} is \code{NULL} or is supplied as a \linkS4class{GenomicRanges} of candidate loci.}
|
|
96
|
+
|
|
97
|
+ Furthermore, any coverage-based filtering of methylation loci is best left until you have constructed your final \linkS4class{BSseq} object.
|
|
98
|
+ In our experience, the final \linkS4class{BSseq} object is often the product of combining multiple \linkS4class{BSseq} objects, each constructed with a separate call to \code{read.bismark()}.
|
|
99
|
+ In such cases, it is premature to use \code{rmZeroCov = TRUE} when running each \code{read.bismark()} and combining these objects will regretably often lead to an inefficiently constructed final \linkS4class{BSseq} object.
|
92
|
100
|
}
|
|
101
|
+
|
|
102
|
+ \subsection{Using a \code{BPPARAM} with a large number of workers (cores)}{
|
|
103
|
+ Each file can be processed on its own, so you can process in parallel as many files as you have workers. We have good experience using as many as 40 cores via \code{BPPARAM = MulticoreParam(workers = 40)}.}
|
|
104
|
+
|
|
105
|
+ \subsection{Using \code{BACKEND = "HDF5Array"}}{
|
|
106
|
+ By using the HDF5Array realization backend from \pkg{HDF5Array}, we reduce the amount of data that is kept in-memory at any one time.
|
|
107
|
+ Once each file is parsed, the data are written to the HDF5 file and are no longer needed in-memory.
|
|
108
|
+ When combined with multiple workers (cores), this means that each file will only need to read and retain in-memory 1 sample's worth of data at a time.
|
|
109
|
+
|
|
110
|
+ Conversely, if you opt for all data to be in-memory (via \code{BACKEND = NULL}), then each worker will pass each file's data back to the main process and memory usage will steadily accumulate to often unreasonable levels.
|
|
111
|
+ }
|
93
|
112
|
}
|
|
113
|
+
|
|
114
|
+\section{File formats}{
|
|
115
|
+ The format of each file is automatically detected using the internal function \code{bsseq:::.guessBismarkFileType()}.
|
|
116
|
+ Files ending in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip} will be automatically uncompressed.
|
|
117
|
+ \subsection{Supported file formats}{
|
|
118
|
+ We strongly recommend that you use the 'genome wide cytosine report' output format
|
|
119
|
+ (\url{https://github.com/FelixKrueger/Bismark/tree/master/Docs#the-genome-wide-cytosine-report-optional-is-tab-delimited-in-the-following-format-1-based-coords}) as this includes trand information for each locus.
|
|
120
|
+ The 'coverage' output format (\url{https://github.com/FelixKrueger/Bismark/tree/master/Docs#the-coverage-output-looks-like-this-tab-delimited-1-based-genomic-coords}) is also supported, although all loci from these files will have their \code{\link[BiocGenerics]{strand}} set to \code{*} in the returned \linkS4class{BSseq} object because this file format does not include strand information.
|
|
121
|
+ }
|
|
122
|
+
|
|
123
|
+ \subsection{Unsupported file formats}{
|
|
124
|
+ Neither the 'bedGraph' output format (\url{https://github.com/FelixKrueger/Bismark/tree/master/Docs#the-bedgraph-output-optional-looks-like-this-tab-delimited-0-based-start-1-based-end-coords}) nor the 'bismark_methylation_extractor' output format (\url{https://github.com/FelixKrueger/Bismark/tree/master/Docs#the-bismark_methylation_extractor-output-is-in-the-form-tab-delimited-1-based-coords}) are supported.
|
|
125
|
+ The former does not include the required counts of methylated and unmethylated reads hile the is an intermediate file containing read-level, rather than locus-level, data on methylation.
|
|
126
|
+ }
|
|
127
|
+ \subsection{One-based vs. zero-based genomic co-ordinates}{
|
|
128
|
+ The genomic co-ordinates of the Bismark output files may be zero-based or one-based depending on whether the \code{--zero_based} argument was used when running Bismark's methylation extractor.
|
|
129
|
+ Furthermore, the default co-ordinate counting system varies by version of Bismark.
|
|
130
|
+ \pkg{bsseq} makes no assumptions about the basis of the genomic co-ordinates and it is left to the user to ensure that the appropriate basis is used in the analysis of their data.
|
|
131
|
+
|
|
132
|
+ Since Bioconductor packages typically use one-based co-ordinates, we strongly recommend that your Bismark files are also one-based.
|
|
133
|
+ }
|
|
134
|
+}
|
|
135
|
+
|
|
136
|
+\section{Realization backends}{
|
|
137
|
+ The \code{read.bismark()} function creates a \linkS4class{BSseq} object with two assays, \code{M} and \code{Cov}.
|
|
138
|
+ The choice of \emph{realization backend} controls whether these assays are stored in-memory as an ordinary \link[base]{matrix} or on-disk as a \linkS4class{HDF5Array}, for example.
|
|
139
|
+ The choice of realization backend is controlled by the \code{BACKEND} argument, which defaults to the current value of \code{DelayedArray::\link[DelayedArray]{getRealizationBackend()}}.
|
|
140
|
+
|
|
141
|
+ \code{read.bismark()} supports the following realization backends:
|
|
142
|
+
|
|
143
|
+ \itemize{
|
|
144
|
+ \item \code{NULL} (in-memory): This stores each new assay in-memory using an ordinary \link[base]{matrix}.
|
|
145
|
+ \item \code{HDF5Array} (on-disk): This stores each new assay on-disk in a HDF5 file using an \linkS4class{HDF5Matrix} from \pkg{HDF5Array}.
|
|
146
|
+ }
|
|
147
|
+
|
|
148
|
+ Please note that certain combinations of realization backend and parallelization backend are currently not supported.
|
|
149
|
+ For example, the \linkS4class{HDF5Array} realization backend is currently only compatible when used with a single-machine parallelization backend (i.e. it is not compatible
|
|
150
|
+ with a \linkS4class{SnowParam} that specifies an \emph{ad hoc} cluster of
|
|
151
|
+ \strong{multiple} machines).
|
|
152
|
+ % TODO: Test this
|
|
153
|
+ \code{BSmooth()} will issue an error when given such incompatible realization and parallelization backends.
|
|
154
|
+
|
|
155
|
+ Additional arguments related to the realization backend can be passed via the \code{...} argument.
|
|
156
|
+ These arguments must be named and are passed to the relevant \linkS4class{RealizationSink} constructor.
|
|
157
|
+ For example, the \code{...} argument can be used to specify the path to the HDF5 file to be
|
|
158
|
+ used by \code{BSmooth()}.
|
|
159
|
+ Please see the examples at the bottom of the page.
|
|
160
|
+}
|
|
161
|
+
|
|
162
|
+\section{Parallelization and progress monitoring}{
|
|
163
|
+ \code{read.bismark()} now uses the \pkg{BiocParallel} package to implement
|
|
164
|
+ parallelization. This brings some notable improvements:
|
|
165
|
+
|
|
166
|
+ \itemize{
|
|
167
|
+ \item Imported files can now be written directly to an on-disk
|
|
168
|
+ realization backend by the worker. This dramatically reduces memory
|
|
169
|
+ usage compared to previous versions of \pkg{bsseq} that required all
|
|
170
|
+ results be retained in-memory.
|
|
171
|
+ \item Parallelization is now supported on Windows through the use of a
|
|
172
|
+ \linkS4class{SnowParam} object as the value of \code{BPPARAM}.
|
|
173
|
+ % TODO: Uncomment when/if implemented.
|
|
174
|
+ %\item Improved error handling makes it possible to gracefully resume
|
|
175
|
+ %\code{read.bismark()} jobs that encountered errors by re-doing only the necessary
|
|
176
|
+ %tasks.
|
|
177
|
+ \item Detailed and extensive job logging facilities.
|
|
178
|
+ }
|
|
179
|
+
|
|
180
|
+ All parallelization options are controlled via the \code{BPPARAM} argument.
|
|
181
|
+ In general, we recommend that users combine multicore (single-machine)
|
|
182
|
+ parallelization with an on-disk realization backend (see section,
|
|
183
|
+ 'Realization backend'). For Unix and Mac users, this means using
|
|
184
|
+ a \linkS4class{MulticoreParam}. For Windows users, this means using a
|
|
185
|
+ single-machine \linkS4class{SnowParam}. Please consult the \pkg{BiocParallel}
|
|
186
|
+ documentation to take full advantage of the more advanced features.
|
|
187
|
+
|
|
188
|
+ \subsection{Deprecated arguments}{
|
|
189
|
+ \code{mc.cores} is deprecated and will be removed in subsequent releases of \pkg{bsseq}.
|
|
190
|
+ This argument was necessary when \code{read.bismark()} used the \pkg{parallel} package to implement parallelization, but this functionality is superseded by the aforementioned use of \pkg{BiocParallel}.
|
|
191
|
+ We recommend that users who previously relied on these arguments switch to \code{BPPARAM = MulticoreParam(workers = mc.cores, progressbar = TRUE)}.
|
|
192
|
+ }
|
|
193
|
+
|
|
194
|
+ \subsection{Progress monitoring}{
|
|
195
|
+ A useful feature of \pkg{BiocParallel} are progress bars to monitor the
|
|
196
|
+ status of long-running jobs, such as \code{BSmooth()}. Progress bars are
|
|
197
|
+ controlled via the \code{progressbar} argument in the
|
|
198
|
+ \linkS4class{BiocParallelParam} constructor. Progress bars replace the
|
|
199
|
+ use of the deprecated \code{verbose} argument to print out information on
|
|
200
|
+ the status of \code{BSmooth()}.
|
|
201
|
+
|
|
202
|
+ \pkg{BiocParallel} also supports extensive and detailed logging facilities.
|
|
203
|
+ Please consult the \pkg{BiocParallel} documentation to take full advantage
|
|
204
|
+ these advanced features.
|
|
205
|
+ }
|
|
206
|
+}
|
|
207
|
+
|
94
|
208
|
\value{
|
95
|
|
- An object of class \code{\link{BSseq}}.
|
|
209
|
+ A \linkS4class{BSseq} object.
|
96
|
210
|
}
|
97
|
211
|
|
98
|
212
|
\seealso{
|
99
|
|
- \code{\link{read.bsmooth}} for parsing output from the BSmooth
|
100
|
|
- alignment suite. \code{\link{read.umtab}} for parsing legacy (old)
|
101
|
|
- formats from the BSmooth alignment suite.
|
102
|
|
- \code{\link{collapseBSseq}} for collapse (merging or summing) the
|
103
|
|
- data in two different directories.
|
|
213
|
+ \code{\link{read.bsmooth}()} for parsing output from the BSmooth
|
|
214
|
+ alignment suite.
|
|
215
|
+ \code{\link{read.umtab}()} for parsing legacy (old) formats from the BSmooth alignment suite.
|
|
216
|
+ \code{\link{collapseBSseq}()} for collapsing (aggregating) data from sample's with multiple Bismark methylation extractor files (e.g., technical replicates).
|
104
|
217
|
}
|
105
|
218
|
|
106
|
219
|
\examples{
|
|
220
|
+ # Run read.bismark() on a single sample to construct a matrix-backed BSseq
|
|
221
|
+ # object.
|
107
|
222
|
infile <- system.file("extdata/test_data.fastq_bismark.bismark.cov.gz",
|
108
|
223
|
package = 'bsseq')
|
109
|
|
- bismarkBSseq <- read.bismark(files = infile,
|
110
|
|
- sampleNames = "test_data",
|
111
|
|
- rmZeroCov = FALSE,
|
112
|
|
- strandCollapse = FALSE,
|
113
|
|
- fileType = "cov",
|
114
|
|
- verbose = TRUE)
|
115
|
|
- bismarkBSseq
|
116
|
|
-
|
117
|
|
- #-----------------------------------------------------------------------------
|
118
|
|
- # An example constructing a HDF5Array-backed BSseq object
|
119
|
|
- #
|
120
|
|
- library(HDF5Array)
|
121
|
|
- # See ?DelayedArray::setRealizationBackend for details
|
122
|
|
- hdf5_bismarkBSseq <- read.bismark(files = infile,
|
123
|
|
- sampleNames = "test_data",
|
124
|
|
- rmZeroCov = FALSE,
|
125
|
|
- strandCollapse = FALSE,
|
126
|
|
- fileType = "cov",
|
127
|
|
- verbose = TRUE,
|
128
|
|
- BACKEND = "HDF5Array")
|
|
224
|
+ bsseq <- read.bismark(files = infile,
|
|
225
|
+ sampleNames = "test_data",
|
|
226
|
+ rmZeroCov = FALSE,
|
|
227
|
+ strandCollapse = FALSE,
|
|
228
|
+ verbose = TRUE)
|
|
229
|
+ # This is a matrix-backed BSseq object.
|
|
230
|
+ sapply(assays(bsseq, withDimnames = FALSE), class)
|
|
231
|
+ bsseq
|
|
232
|
+
|
|
233
|
+ \dontrun{
|
|
234
|
+ # Run read.bismark() on a single sample to construct a HDF5Array-backed BSseq
|
|
235
|
+ # object (with data written to the file 'read.bismark_example.h5')
|
|
236
|
+ bsseq <- read.bismark(files = infile,
|
|
237
|
+ sampleNames = "test_data",
|
|
238
|
+ rmZeroCov = FALSE,
|
|
239
|
+ strandCollapse = FALSE,
|
|
240
|
+ verbose = TRUE,
|
|
241
|
+ BACKEND = "HDF5Array",
|
|
242
|
+ filepath = "read.bismark_example.h5")
|
|
243
|
+ # This is a HDF5Array-backed BSseq object.
|
|
244
|
+ sapply(assays(bsseq, withDimnames = FALSE), class)
|
|
245
|
+ # The 'M' and 'Cov' assays are in the HDF5 file 'read.bismark_example.h5' (in
|
|
246
|
+ # the current working directory).
|
|
247
|
+ sapply(assays(bsseq, withDimnames = FALSE), path)
|
|
248
|
+ }
|
129
|
249
|
}
|
130
|
250
|
|
131
|
251
|
\author{
|