Browse code

replace BiocInstaller biocLite mentions with BiocManager

LiNk-NY authored on 30/08/2018 17:49:42
Showing 1 changed files
... ...
@@ -125,8 +125,9 @@ example demonstrates:
125 125
 library(gmapR)
126 126
 
127 127
 if (!suppressWarnings(require(BSgenome.Dmelanogaster.UCSC.dm3))) {
128
-  source("http://bioconductor.org/biocLite.R")
129
-  biocLite("BSgenome.Dmelanogaster.UCSC.dm3")
128
+  if (!requireNamespace("BiocManager", quietly=TRUE))
129
+      install.packages("BiocManager")
130
+  BiocManager::install("BSgenome.Dmelanogaster.UCSC.dm3")
130 131
   library(BSgenome.Dmelanogaster.UCSC.dm3)
131 132
 }
132 133
 
... ...
@@ -348,8 +349,9 @@ object:
348 349
 
349 350
 <<create_hg19_GmapGenome, eval=FALSE>>=
350 351
 if (!suppressWarnings(require(BSgenome.Hsapiens.UCSC.hg19))) {
351
-  source("http://bioconductor.org/biocLite.R")
352
-  biocLite("BSgenome.Hsapiens.UCSC.hg19")
352
+  if (!requireNamespace("BiocManager", quietly=TRUE))
353
+      install.packages("BiocManager")
354
+  BiocManager::install("BSgenome.Hsapiens.UCSC.hg19")
353 355
   library(BSgenome.Hsapiens.UCSC.hg19)
354 356
 }
355 357
 gmapGenome <- GmapGenome(genome=Hsapiens,
Browse code

update to latest GSTRUCT

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@123972 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 14/11/2016 21:54:30
Showing 1 changed files
... ...
@@ -308,10 +308,11 @@ param <- BamTallyParam(genome,
308 308
                        primary_only = FALSE,
309 309
                        min_depth = 0L, variant_strand = 1L,
310 310
                        ignore_query_Ns = TRUE,
311
-                       indels = FALSE, include_soft_clips = 1L, xs=TRUE)
311
+                       indels = FALSE, include_soft_clips = 1L, xs=TRUE,
312
+                       min_base_quality = 23L)
312 313
 tallies <-bam_tally(bam_file,
313 314
                     param)
314
-variantSummary(tallies, high_base_quality = 23L)
315
+variantSummary(tallies)
315 316
 @ 
316 317
 
317 318
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Browse code

adjustments in response to the vals -> filter renaming in GenomicFeatures

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@115831 bc3139a8-67e5-0310-9ffc-ced21a209358

Herve Pages authored on 05/04/2016 22:55:27
Showing 1 changed files
... ...
@@ -165,7 +165,7 @@ library("org.Hs.eg.db")
165 165
 library("TxDb.Hsapiens.UCSC.hg19.knownGene")
166 166
 eg <- org.Hs.eg.db::org.Hs.egSYMBOL2EG[["TP53"]]
167 167
 txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
168
-tx <- transcripts(txdb, vals = list(gene_id = eg))
168
+tx <- transcripts(txdb, filter = list(gene_id = eg))
169 169
 roi <- range(tx) + 1e6
170 170
 strand(roi) <- "*"
171 171
 @ 
Browse code

resurrect improvements that we reverted before the April release, expect breakage for a while

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110039 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 28/10/2015 18:51:00
Showing 1 changed files
... ...
@@ -308,7 +308,7 @@ param <- BamTallyParam(genome,
308 308
                        primary_only = FALSE,
309 309
                        min_depth = 0L, variant_strand = 1L,
310 310
                        ignore_query_Ns = TRUE,
311
-                       indels = FALSE, include_soft_clips = 1L)
311
+                       indels = FALSE, include_soft_clips = 1L, xs=TRUE)
312 312
 tallies <-bam_tally(bam_file,
313 313
                     param)
314 314
 variantSummary(tallies, high_base_quality = 23L)
Browse code

Reverted to r101133, along with NAMESPACE fixes

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@102487 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 14/04/2015 21:40:44
Showing 1 changed files
... ...
@@ -308,7 +308,7 @@ param <- BamTallyParam(genome,
308 308
                        primary_only = FALSE,
309 309
                        min_depth = 0L, variant_strand = 1L,
310 310
                        ignore_query_Ns = TRUE,
311
-                       indels = FALSE, include_soft_clips = 1L, xs=TRUE)
311
+                       indels = FALSE, include_soft_clips = 1L)
312 312
 tallies <-bam_tally(bam_file,
313 313
                     param)
314 314
 variantSummary(tallies, high_base_quality = 23L)
Browse code

Recent work towards supporting the new features... will revert back to a stable version immediately...

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@102485 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 14/04/2015 21:32:10
Showing 1 changed files
... ...
@@ -308,7 +308,7 @@ param <- BamTallyParam(genome,
308 308
                        primary_only = FALSE,
309 309
                        min_depth = 0L, variant_strand = 1L,
310 310
                        ignore_query_Ns = TRUE,
311
-                       indels = FALSE, include_soft_clips = 1L)
311
+                       indels = FALSE, include_soft_clips = 1L, xs=TRUE)
312 312
 tallies <-bam_tally(bam_file,
313 313
                     param)
314 314
 variantSummary(tallies, high_base_quality = 23L)
Browse code

add codon tally support. No vbump yet

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@93316 bc3139a8-67e5-0310-9ffc-ced21a209358

Gabriel Becker authored on 11/08/2014 23:42:48
Showing 1 changed files
... ...
@@ -308,7 +308,7 @@ param <- BamTallyParam(genome,
308 308
                        primary_only = FALSE,
309 309
                        min_depth = 0L, variant_strand = 1L,
310 310
                        ignore_query_Ns = TRUE,
311
-                       indels = FALSE, include_soft_clips = 1L, count_xs = TRUE)
311
+                       indels = FALSE, include_soft_clips = 1L)
312 312
 tallies <-bam_tally(bam_file,
313 313
                     param)
314 314
 variantSummary(tallies, high_base_quality = 23L)
Browse code

update to new bam_tally with support for XS counting, which we now support via BamTallyParam@count_xs.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@90225 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 13/05/2014 02:04:05
Showing 1 changed files
... ...
@@ -308,7 +308,7 @@ param <- BamTallyParam(genome,
308 308
                        primary_only = FALSE,
309 309
                        min_depth = 0L, variant_strand = 1L,
310 310
                        ignore_query_Ns = TRUE,
311
-                       indels = FALSE, include_soft_clips = 1L)
311
+                       indels = FALSE, include_soft_clips = 1L, count_xs = TRUE)
312 312
 tallies <-bam_tally(bam_file,
313 313
                     param)
314 314
 variantSummary(tallies, high_base_quality = 23L)
Browse code

update GSTRUCT (bam_tally); add include_soft_clip parameter for counting over soft clips of a given max length (more accurate allele frequency)

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@88343 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 02/04/2014 22:03:40
Showing 1 changed files
... ...
@@ -308,7 +308,7 @@ param <- BamTallyParam(genome,
308 308
                        primary_only = FALSE,
309 309
                        min_depth = 0L, variant_strand = 1L,
310 310
                        ignore_query_Ns = TRUE,
311
-                       indels = FALSE)
311
+                       indels = FALSE, include_soft_clips = 1L)
312 312
 tallies <-bam_tally(bam_file,
313 313
                     param)
314 314
 variantSummary(tallies, high_base_quality = 23L)
Browse code

TP53 genome is now named by the TxDb package from which the TP53 region was retrieved, so that it is automatically refreshed with annotation changes, and devel and release can coexist.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@87282 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 10/03/2014 23:10:51
Showing 1 changed files
... ...
@@ -182,9 +182,12 @@ library("gmapR")
182 182
 p53Seq <- getSeq(BSgenome.Hsapiens.UCSC.hg19::Hsapiens, roi,
183 183
                  as.character = FALSE) 
184 184
 names(p53Seq) <- "TP53"
185
-gmapGenome <- GmapGenome(genome = p53Seq, name = "TP53_demo", create = TRUE, 
185
+gmapGenome <- GmapGenome(genome = p53Seq, 
186
+                         name = paste0("TP53_demo_", 
187
+                           packageVersion("TxDb.Hsapiens.UCSC.hg19.knownGene")), 
188
+                         create = TRUE, 
186 189
                          k = 12L)
187
-@ 
190
+@
188 191
 
189 192
 We add the known transcripts (splice sites) to the genome index:
190 193
 <<set_TP53_splicesites>>=
... ...
@@ -294,11 +297,8 @@ summarizing these data into R data structures. For now, there is one:
294 297
 \Rfunction{variantSummary}, which returns a \Rcode{VRanges} object
295 298
 describing putative genetic variants in the sample.
296 299
 <<run_bamtally, eval=FALSE>>=
297
-library(gmapR)
298
-
299 300
 bam_file <- system.file("extdata/H1993.analyzed.bam", 
300 301
                         package="LungCancerLines", mustWork=TRUE)
301
-
302 302
 breaks <- c(0L, 15L, 60L, 75L)
303 303
 bqual <- 56L
304 304
 mapq <- 13L
Browse code

clip_overlaps should be clip_overlap

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@86371 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 12/02/2014 17:13:58
Showing 1 changed files
... ...
@@ -215,7 +215,7 @@ gsnapParam <- GsnapParam(genome=gmapGenome,
215 215
                          splicing="knownGene",
216 216
                          indel_penalty=1L,
217 217
                          distant_splice_penalty=1L,
218
-                         clip_overlaps=TRUE)
218
+                         clip_overlap=TRUE)
219 219
 @ 
220 220
 
221 221
 Now we are ready to align.
Browse code

vignette improvements

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@85509 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 14/01/2014 19:15:01
Showing 1 changed files
... ...
@@ -137,9 +137,9 @@ gmapGenomeDirectory <- GmapGenomeDirectory(gmapGenomePath, create = TRUE)
137 137
 ##path: /reshpcfs/home/coryba/projects/gmapR2/testGenome 
138 138
 
139 139
 gmapGenome <- GmapGenome(genome=Dmelanogaster,
140
-                         directory = gmapGenomeDirectory,
141
-                         name = "dm3",
142
-                         create = TRUE,
140
+                         directory=gmapGenomeDirectory,
141
+                         name="dm3",
142
+                         create=TRUE,
143 143
                          k = 12L)
144 144
 ##> gmapGenome
145 145
 ##GmapGenome object
... ...
@@ -167,6 +167,7 @@ eg <- org.Hs.eg.db::org.Hs.egSYMBOL2EG[["TP53"]]
167 167
 txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
168 168
 tx <- transcripts(txdb, vals = list(gene_id = eg))
169 169
 roi <- range(tx) + 1e6
170
+strand(roi) <- "*"
170 171
 @ 
171 172
 
172 173
 Next we get the genetic sequence and use it to create a GmapGenome
... ...
@@ -181,7 +182,14 @@ library("gmapR")
181 182
 p53Seq <- getSeq(BSgenome.Hsapiens.UCSC.hg19::Hsapiens, roi,
182 183
                  as.character = FALSE) 
183 184
 names(p53Seq) <- "TP53"
184
-gmapGenome <- GmapGenome(genome = p53Seq, name = "TP53_demo", create = TRUE, k = 12L)
185
+gmapGenome <- GmapGenome(genome = p53Seq, name = "TP53_demo", create = TRUE, 
186
+                         k = 12L)
187
+@ 
188
+
189
+We add the known transcripts (splice sites) to the genome index:
190
+<<set_TP53_splicesites>>=
191
+exons <- gmapR:::subsetRegion(exonsBy(txdb), roi, "TP53")
192
+spliceSites(gmapGenome, "knownGene") <- exons
185 193
 @ 
186 194
 
187 195
 The data package \software{LungCancerLines} contains fastqs of reads
... ...
@@ -199,15 +207,15 @@ behavior.
199 207
 
200 208
 <<create_gsnapParam>>=
201 209
 ##specify how GSNAP should behave using a GsnapParam object
202
-gsnapParam <- GsnapParam(genome = gmapGenome,
203
-                         unique_only = FALSE,
204
-                         max_mismatches = NULL,
205
-                         suboptimal_levels = 0, mode = "standard",
206
-                         npaths = 10,
207
-                         novelsplicing = FALSE, splicing = NULL, 
208
-                         nthreads = 1,
209
-                         batch = 2L,
210
-                         gunzip=TRUE)
210
+gsnapParam <- GsnapParam(genome=gmapGenome,
211
+                         unique_only=FALSE,
212
+                         suboptimal_levels=2L, 
213
+                         npaths=10L,
214
+                         novelsplicing=TRUE,
215
+                         splicing="knownGene",
216
+                         indel_penalty=1L,
217
+                         distant_splice_penalty=1L,
218
+                         clip_overlaps=TRUE)
211 219
 @ 
212 220
 
213 221
 Now we are ready to align.
... ...
@@ -216,6 +224,7 @@ Now we are ready to align.
216 224
 gsnapOutput <- gsnap(input_a=fastqs["H1993.first"],
217 225
                      input_b=fastqs["H1993.last"],
218 226
                      params=gsnapParam)
227
+
219 228
 ##gsnapOutput
220 229
 ##An object of class "GsnapOutput"
221 230
 ##Slot "path":
Browse code

call Rcode{} instead of code{}

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@80762 bc3139a8-67e5-0310-9ffc-ced21a209358

Dan Tenenbaum authored on 25/09/2013 20:07:03
Showing 1 changed files
... ...
@@ -282,7 +282,7 @@ scores, and read position for each allele.
282 282
 The call to \Rfunction{bam\_tally} returns an opaque pointer to a
283 283
 C-level data structure. We anticipate having multiple means of
284 284
 summarizing these data into R data structures. For now, there is one:
285
-\Rfunction{variantSummary}, which returns a \code{VRanges} object
285
+\Rfunction{variantSummary}, which returns a \Rcode{VRanges} object
286 286
 describing putative genetic variants in the sample.
287 287
 <<run_bamtally, eval=FALSE>>=
288 288
 library(gmapR)
Browse code

update to latest gstruct; brings faster bam_tally (for high coverage regions) and read-group filtering support in bam_tally

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@80611 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 20/09/2013 02:05:42
Showing 1 changed files
... ...
@@ -279,6 +279,11 @@ demonstrate the use of \software{bam\_tally} through the
279 279
 the needed information such as number of alternative alleles, quality
280 280
 scores, and read position for each allele.
281 281
 
282
+The call to \Rfunction{bam\_tally} returns an opaque pointer to a
283
+C-level data structure. We anticipate having multiple means of
284
+summarizing these data into R data structures. For now, there is one:
285
+\Rfunction{variantSummary}, which returns a \code{VRanges} object
286
+describing putative genetic variants in the sample.
282 287
 <<run_bamtally, eval=FALSE>>=
283 288
 library(gmapR)
284 289
 
... ...
@@ -288,8 +293,7 @@ bam_file <- system.file("extdata/H1993.analyzed.bam",
288 293
 breaks <- c(0L, 15L, 60L, 75L)
289 294
 bqual <- 56L
290 295
 mapq <- 13L
291
-param <- BamTallyParam(genome, read_pos_breaks = breaks,
292
-                       high_base_quality = bqual,
296
+param <- BamTallyParam(genome,
293 297
                        minimum_mapq = mapq,
294 298
                        concordant_only = FALSE, unique_only = FALSE,
295 299
                        primary_only = FALSE,
... ...
@@ -298,6 +302,7 @@ param <- BamTallyParam(genome, read_pos_breaks = breaks,
298 302
                        indels = FALSE)
299 303
 tallies <-bam_tally(bam_file,
300 304
                     param)
305
+variantSummary(tallies, high_base_quality = 23L)
301 306
 @ 
302 307
 
303 308
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Browse code

Refactor bam_tally, so that bam_tally returns a TallyIIT object, which is then summarized via summarizeVariants; this allows computing tallies once and summarizing them in different ways (like maybe get the coverage). The summarizeVariants function yields a VRanges.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@78427 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 14/07/2013 00:43:43
Showing 1 changed files
... ...
@@ -277,7 +277,7 @@ aligned to the TP53 region of the human genome. We'll use this file to
277 277
 demonstrate the use of \software{bam\_tally} through the
278 278
 \software{gmapR} package. The resulting data structure will contain
279 279
 the needed information such as number of alternative alleles, quality
280
-scores, and nucleotide cycle for each allele.
280
+scores, and read position for each allele.
281 281
 
282 282
 <<run_bamtally, eval=FALSE>>=
283 283
 library(gmapR)
... ...
@@ -288,7 +288,7 @@ bam_file <- system.file("extdata/H1993.analyzed.bam",
288 288
 breaks <- c(0L, 15L, 60L, 75L)
289 289
 bqual <- 56L
290 290
 mapq <- 13L
291
-param <- BamTallyParam(genome, cycle_breaks = breaks,
291
+param <- BamTallyParam(genome, read_pos_breaks = breaks,
292 292
                        high_base_quality = bqual,
293 293
                        minimum_mapq = mapq,
294 294
                        concordant_only = FALSE, unique_only = FALSE,
Browse code

dropped size of kmer from 15 down to 12 for creation of GmapGenome object in vignette for the purpose of reducing memory footprint so gmapR builds on modern Macbooks

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@71533 bc3139a8-67e5-0310-9ffc-ced21a209358

Cory Barr authored on 28/11/2012 22:29:13
Showing 1 changed files
... ...
@@ -181,7 +181,7 @@ library("gmapR")
181 181
 p53Seq <- getSeq(BSgenome.Hsapiens.UCSC.hg19::Hsapiens, roi,
182 182
                  as.character = FALSE) 
183 183
 names(p53Seq) <- "TP53"
184
-gmapGenome <- GmapGenome(genome = p53Seq, name = "TP53_demo", create = TRUE)
184
+gmapGenome <- GmapGenome(genome = p53Seq, name = "TP53_demo", create = TRUE, k = 12L)
185 185
 @ 
186 186
 
187 187
 The data package \software{LungCancerLines} contains fastqs of reads
Browse code

*Hutch's Mac build machine was running out of memory during build. Reduced the memory footprint used by lowering the k-mer size used for creating the fly GmapGenome in the vignette.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@71493 bc3139a8-67e5-0310-9ffc-ced21a209358

Cory Barr authored on 27/11/2012 19:34:50
Showing 1 changed files
... ...
@@ -139,7 +139,8 @@ gmapGenomeDirectory <- GmapGenomeDirectory(gmapGenomePath, create = TRUE)
139 139
 gmapGenome <- GmapGenome(genome=Dmelanogaster,
140 140
                          directory = gmapGenomeDirectory,
141 141
                          name = "dm3",
142
-                         create = TRUE)
142
+                         create = TRUE,
143
+                         k = 12L)
143 144
 ##> gmapGenome
144 145
 ##GmapGenome object
145 146
 ##genome: dm3 
Browse code

An underscore character was not escaped in the gmapR vignette, fixed

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@69291 bc3139a8-67e5-0310-9ffc-ced21a209358

Cory Barr authored on 10/09/2012 22:42:20
Showing 1 changed files
... ...
@@ -169,7 +169,7 @@ roi <- range(tx) + 1e6
169 169
 @ 
170 170
 
171 171
 Next we get the genetic sequence and use it to create a GmapGenome
172
-object. (Please note that the TP53_demo GmapGenome object is used by
172
+object. (Please note that the TP53\_demo GmapGenome object is used by
173 173
 many examples and tests in the gmapR and VariationTools packages. If
174 174
 the object has been created before, its subsequent creation will be
175 175
 instantaneous.)
... ...
@@ -273,7 +273,7 @@ genome <- TP53Genome()
273 273
 
274 274
 The \software{LungCancerLines} R package contains a BAM file of reads
275 275
 aligned to the TP53 region of the human genome. We'll use this file to
276
-demonstrate the use of \software{bam_tally} through the
276
+demonstrate the use of \software{bam\_tally} through the
277 277
 \software{gmapR} package. The resulting data structure will contain
278 278
 the needed information such as number of alternative alleles, quality
279 279
 scores, and nucleotide cycle for each allele.
Browse code

added 'library(gmapR)' to vignette section to function GmapGenome is found

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@69290 bc3139a8-67e5-0310-9ffc-ced21a209358

Cory Barr authored on 10/09/2012 22:35:25
Showing 1 changed files
... ...
@@ -176,6 +176,7 @@ instantaneous.)
176 176
 
177 177
 <<get_TP53_seq>>=
178 178
 library("BSgenome.Hsapiens.UCSC.hg19")
179
+library("gmapR")
179 180
 p53Seq <- getSeq(BSgenome.Hsapiens.UCSC.hg19::Hsapiens, roi,
180 181
                  as.character = FALSE) 
181 182
 names(p53Seq) <- "TP53"
Browse code

removed Hsapiens GmapGenome from gmapR vignette, dropped pasillaBamSubset from 'Suggests'

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@69287 bc3139a8-67e5-0310-9ffc-ced21a209358

Cory Barr authored on 10/09/2012 20:58:54
Showing 1 changed files
... ...
@@ -130,7 +130,7 @@ if (!suppressWarnings(require(BSgenome.Dmelanogaster.UCSC.dm3))) {
130 130
   library(BSgenome.Dmelanogaster.UCSC.dm3)
131 131
 }
132 132
 
133
-gmapGenomePath <- file.path(getwd(), "testGenome")
133
+gmapGenomePath <- file.path(getwd(), "flyGenome")
134 134
 gmapGenomeDirectory <- GmapGenomeDirectory(gmapGenomePath, create = TRUE)
135 135
 ##> gmapGenomeDirectory
136 136
 ##GmapGenomeDirectory object
... ...
@@ -151,41 +151,51 @@ gmapGenome <- GmapGenome(genome=Dmelanogaster,
151 151
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
152 152
 
153 153
 The \software{GSNAP} algorithm incorporates biological knowledge to
154
-provide accurate alignments, particularly for RNA-seq data. The
155
-following example synthesizes some input reads and aligns them using
156
-\software{GSNAP}.
157
-
158
-If you have created the D. melanogaster \Rclass{GmapGenome} package as
159
-detailed above and installed it, you can begin this section as
160
-follows:
154
+provide accurate alignments, particularly for RNA-seq data. In this
155
+section, we will align reads from an RNA-seq experiment provided in a
156
+fastq file to a selected region of the human genome. In this example,
157
+we will align to the region of the human genome containing TP53 plus
158
+an additional one megabase on each side of this gene.
159
+
160
+First we need to obtain the desired region of interest from the genome:
161
+
162
+<<get_TP53_coordinates>>=
163
+library("org.Hs.eg.db")
164
+library("TxDb.Hsapiens.UCSC.hg19.knownGene")
165
+eg <- org.Hs.eg.db::org.Hs.egSYMBOL2EG[["TP53"]]
166
+txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
167
+tx <- transcripts(txdb, vals = list(gene_id = eg))
168
+roi <- range(tx) + 1e6
169
+@ 
161 170
 
162
-<<<load_GmapGenome, eval=FALSE>>=
163
-library("GmapGenome.Dmelanogaster.UCSC.dm3")
164
-gmapGenome <- GmapGenome.Dmelanogaster.UCSC.dm3
171
+Next we get the genetic sequence and use it to create a GmapGenome
172
+object. (Please note that the TP53_demo GmapGenome object is used by
173
+many examples and tests in the gmapR and VariationTools packages. If
174
+the object has been created before, its subsequent creation will be
175
+instantaneous.)
176
+
177
+<<get_TP53_seq>>=
178
+library("BSgenome.Hsapiens.UCSC.hg19")
179
+p53Seq <- getSeq(BSgenome.Hsapiens.UCSC.hg19::Hsapiens, roi,
180
+                 as.character = FALSE) 
181
+names(p53Seq) <- "TP53"
182
+gmapGenome <- GmapGenome(genome = p53Seq, name = "TP53_demo", create = TRUE)
165 183
 @ 
166 184
 
167
-Otherwise, you can create the \Rcode{gmapGenome} object as detailed in the
168
-initial section of this vignette.
185
+The data package \software{LungCancerLines} contains fastqs of reads
186
+obtained from sequencing H1993 and H2073 cell lines. We will use these
187
+fastqs to demonstrate \software{GSNAP} alignment with \software{gmapR}.
169 188
 
170
-<<align_with_gsnap, eval=FALSE>>=
171
-##synthesize paired-end FASTA input to align against the genome
172
-library("BSgenome.Dmelanogaster.UCSC.dm3")
173
-dnaStringSet <- Dmelanogaster$chr4
174
-makeTestFasta <- function(seqStarts, pairNum) {
175
-  seqEnds <- seqStarts + 74
176
-  nucleotides <- sapply(seq_along(seqStarts),
177
-                        function(i) as.character(subseq(dnaStringSet, seqStarts[i], seqEnds[i]))
178
-                        )
179
-  fastaHeaders <- paste0(">", "fakeRead", seq_along(nucleotides), "#0/", pairNum)
180
-  fastaLines <- paste(fastaHeaders, nucleotides, sep="\n")
181
-  tmpFasta <- paste0(tempfile(), ".", pairNum, ".fa")
182
-  writeLines(fastaLines, con=tmpFasta)
183
-  tmpFasta
184
-}
185
-seqStarts <- seq(1, 100000, by = 10000) + length(dnaStringSet) * 2 / 3
186
-tmpFasta1 <- makeTestFasta(seqStarts, '1')
187
-tmpFasta2 <- makeTestFasta(seqStarts + 125, '2')
189
+<<get_lung_cancer_fastqs>>=
190
+library("LungCancerLines")
191
+fastqs <- LungCancerFastqFiles()
192
+@ 
188 193
 
194
+\software{GSNAP} is highly configurable. Users create
195
+\Rclass{GsnapParam} objects to specify desired \software{GSNAP}
196
+behavior. 
197
+
198
+<<create_gsnapParam>>=
189 199
 ##specify how GSNAP should behave using a GsnapParam object
190 200
 gsnapParam <- GsnapParam(genome = gmapGenome,
191 201
                          unique_only = FALSE,
... ...
@@ -194,10 +204,15 @@ gsnapParam <- GsnapParam(genome = gmapGenome,
194 204
                          npaths = 10,
195 205
                          novelsplicing = FALSE, splicing = NULL, 
196 206
                          nthreads = 1,
197
-                         batch = 2L)
207
+                         batch = 2L,
208
+                         gunzip=TRUE)
209
+@ 
198 210
 
199
-gsnapOutput <- gsnap(input_a=tmpFasta1,
200
-                     input_b=tmpFasta2,
211
+Now we are ready to align.
212
+
213
+<<align_with_gsnap, eval=FALSE>>=
214
+gsnapOutput <- gsnap(input_a=fastqs["H1993.first"],
215
+                     input_b=fastqs["H1993.last"],
201 216
                      params=gsnapParam)
202 217
 ##gsnapOutput
203 218
 ##An object of class "GsnapOutput"
... ...
@@ -244,29 +259,41 @@ alignments that \software{GSNAP} outputs along with other utilities.
244 259
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
245 260
 
246 261
 Running the \Rmethod{bam\_tally} method will return a \Rclass{GRanges}
247
-of information per nucleotide. Below is a basic example. See the
248
-documentation for the \Rmethod{bam\_tally} method for details.
262
+of information per nucleotide. Below is an example demonstrating how
263
+to find variants in the TP53 gene of the human genome. See the
264
+documentation for the \Rmethod{bam\_tally} method for more details.
265
+
266
+\software{gmapR} provides access to a demo genome for examples. This
267
+genome encompasses the TP53 gene along with a 1-megabase flanking
268
+region on each side.
269
+<<TP53Genome_accessor>>=
270
+genome <- TP53Genome()
271
+@ 
272
+
273
+The \software{LungCancerLines} R package contains a BAM file of reads
274
+aligned to the TP53 region of the human genome. We'll use this file to
275
+demonstrate the use of \software{bam_tally} through the
276
+\software{gmapR} package. The resulting data structure will contain
277
+the needed information such as number of alternative alleles, quality
278
+scores, and nucleotide cycle for each allele.
249 279
 
250 280
 <<run_bamtally, eval=FALSE>>=
251 281
 library(gmapR)
252
-library(GmapGenome.Hsapiens.UCSC.hg19)
253
-genome <- GmapGenome.Hsapiens.UCSC.hg19
254 282
 
255
-bam_file <- file.path(system.file(package = "gmapR", mustWork=TRUE),
256
-                      "extdata/bam_tally_test.bam")
283
+bam_file <- system.file("extdata/H1993.analyzed.bam", 
284
+                        package="LungCancerLines", mustWork=TRUE)
257 285
 
258 286
 breaks <- c(0L, 15L, 60L, 75L)
259 287
 bqual <- 56L
260 288
 mapq <- 13L
261 289
 param <- BamTallyParam(genome, cycle_breaks = breaks,
262
-                       high_quality_cutoff = bqual,
290
+                       high_base_quality = bqual,
263 291
                        minimum_mapq = mapq,
264 292
                        concordant_only = FALSE, unique_only = FALSE,
265 293
                        primary_only = FALSE,
266 294
                        min_depth = 0L, variant_strand = 1L,
267 295
                        ignore_query_Ns = TRUE,
268 296
                        indels = FALSE)
269
-
270 297
 tallies <-bam_tally(bam_file,
271 298
                     param)
272 299
 @ 
... ...
@@ -330,26 +357,28 @@ repository, etc. After installation,
330 357
 \Rcode{library("GmapGenome.Hsapiens.UCSC.hg19")} will load a
331 358
 \Rclass{GmapGenome} object that has the same name as the package.
332 359
 
333
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
334
-\section{Aligning with SNP Tolerance}
335
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
336 360
 
337
-Both \software{GSNAP} and \software{GMAP} have the ability to perform
338
-alignment without penalizing against a set of known SNPs. In this
339
-section, we work through an example of aligning with this SNP
340
-tolerance. By using SNP-tolerance, we'll perform alignments without
341
-penalizing reads that match SNPs contained in the provided SNPs. In
342
-this case, we'll use dbSNP to provide the SNPs.
343 361
 
344
-This example will use hg19. If you have not installed
345
-\Rcode{GmapGenome.Hsapiens.UCSC.hg19}, please refer to the section
346
-\ref{create_hg19_GmapGenome}.
347
-
348
-First we load the Hsapiens \Rclass{GmapGenome} object:
362
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
363
+%\section{Aligning with SNP Tolerance}
364
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
349 365
 
350
-<<<load_GmapGenomeHsapiens>>=
351
-library("GmapGenome.Hsapiens.UCSC.hg19")
352
-@ 
366
+%Both \software{GSNAP} and \software{GMAP} have the ability to perform
367
+%alignment without penalizing against a set of known SNPs. In this
368
+%section, we work through an example of aligning with this SNP
369
+%tolerance. By using SNP-tolerance, we'll perform alignments without
370
+%penalizing reads that match SNPs contained in the provided SNPs. In
371
+%this case, we'll use dbSNP to provide the SNPs.
372
+%
373
+%This example will use hg19. If you have not installed
374
+%\Rcode{GmapGenome.Hsapiens.UCSC.hg19}, please refer to the section
375
+%\ref{create_hg19_GmapGenome}.
376
+%
377
+%First we load the Hsapiens \Rclass{GmapGenome} object:
378
+%
379
+%<<<load_GmapGenomeHsapiens>>=
380
+%library("GmapGenome.Hsapiens.UCSC.hg19")
381
+%@ 
353 382
 
354 383
 <<SessionInfo>>=
355 384
 sessionInfo()
Browse code

Modify the vignette to test the new GsnapParam constructor

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@69142 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 06/09/2012 12:02:36
Showing 1 changed files
... ...
@@ -155,8 +155,9 @@ provide accurate alignments, particularly for RNA-seq data. The
155 155
 following example synthesizes some input reads and aligns them using
156 156
 \software{GSNAP}.
157 157
 
158
-If you've created the D. melanogaster \Rclass{GmapGenome} package as detailed above and installed
159
-it, you can begin this section as follows:
158
+If you have created the D. melanogaster \Rclass{GmapGenome} package as
159
+detailed above and installed it, you can begin this section as
160
+follows:
160 161
 
161 162
 <<<load_GmapGenome, eval=FALSE>>=
162 163
 library("GmapGenome.Dmelanogaster.UCSC.dm3")
... ...
@@ -189,11 +190,11 @@ tmpFasta2 <- makeTestFasta(seqStarts + 125, '2')
189 190
 gsnapParam <- GsnapParam(genome = gmapGenome,
190 191
                          unique_only = FALSE,
191 192
                          max_mismatches = NULL,
192
-                         suboptimal_levels = 0L, mode = "standard",
193
-                         npaths = 10L,
193
+                         suboptimal_levels = 0, mode = "standard",
194
+                         npaths = 10,
194 195
                          novelsplicing = FALSE, splicing = NULL, 
195
-                         nthreads = 1L,
196
-                         batch = "2")
196
+                         nthreads = 1,
197
+                         batch = 2L)
197 198
 
198 199
 gsnapOutput <- gsnap(input_a=tmpFasta1,
199 200
                      input_b=tmpFasta2,
Browse code

GmapSnpDirectory replace method now accepts name arg

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@69060 bc3139a8-67e5-0310-9ffc-ced21a209358

Cory Barr authored on 04/09/2012 22:50:45
Showing 1 changed files
... ...
@@ -289,7 +289,7 @@ makeGmapGenomePackage(gmapGenome=gmapGenome,
289 289
                       destDir="myDestDir",
290 290
                       license="Artistic-2.0",
291 291
                       pkgName="GmapGenome.Dmelanogaster.UCSC.dm3")
292
-@ 
292
+@
293 293
 
294 294
 After creating the package, you can run \Rcode{R CMD INSTALL
295 295
   myDestDir} to install it, or run \Rcode{R CMD build myDestDir} to
Browse code

exporting GmapSnps and GmapSnpDirectory

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@68698 bc3139a8-67e5-0310-9ffc-ced21a209358

Cory Barr authored on 21/08/2012 23:52:09
Showing 1 changed files
... ...
@@ -295,6 +295,61 @@ After creating the package, you can run \Rcode{R CMD INSTALL
295 295
   myDestDir} to install it, or run \Rcode{R CMD build myDestDir} to
296 296
 create a distribution of the package.
297 297
 
298
+Many users with be working with the human genome. Many of the examples
299
+used in \software{gmapR} make use of a particular build of the human
300
+genome. As such, creating a \Rclass{GmapGenome} of hg19 is
301
+recommended. Here is one way to create it, using a \Rclass{BSgenome}
302
+object:
303
+
304
+<<create_hg19_GmapGenome, eval=FALSE>>=
305
+if (!suppressWarnings(require(BSgenome.Hsapiens.UCSC.hg19))) {
306
+  source("http://bioconductor.org/biocLite.R")
307
+  biocLite("BSgenome.Hsapiens.UCSC.hg19")
308
+  library(BSgenome.Hsapiens.UCSC.hg19)
309
+}
310
+gmapGenome <- GmapGenome(genome=Hsapiens,
311
+                         directory = "Hsapiens",
312
+                         name = "hg19",
313
+                         create = TRUE)
314
+destDir <- "HsapiensGmapGenome"
315
+pkgName <- "GmapGenome.Hsapiens.UCSC.hg19"
316
+makeGmapGenomePackage(gmapGenome=gmapGenome,
317
+                      version="1.0.0",
318
+                      maintainer="<your.name@somewhere.com>",
319
+                      author="Your Name",
320
+                      destDir=destDir,
321
+                      license="Artistic-2.0",
322
+                      pkgName="GmapGenome.Hsapiens.UCSC.hg19")
323
+@ 
324
+
325
+After running the above code, you should be able to run \Rcode{R CMD
326
+  INSTALL GmapGenome.Hsapiens.UCSC.hg19} in the appropriate directory
327
+from the command line, submit GmapGenome.Hsapiens.UCSC.hg19 to a
328
+repository, etc. After installation,
329
+\Rcode{library("GmapGenome.Hsapiens.UCSC.hg19")} will load a
330
+\Rclass{GmapGenome} object that has the same name as the package.
331
+
332
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
333
+\section{Aligning with SNP Tolerance}
334
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
335
+
336
+Both \software{GSNAP} and \software{GMAP} have the ability to perform
337
+alignment without penalizing against a set of known SNPs. In this
338
+section, we work through an example of aligning with this SNP
339
+tolerance. By using SNP-tolerance, we'll perform alignments without
340
+penalizing reads that match SNPs contained in the provided SNPs. In
341
+this case, we'll use dbSNP to provide the SNPs.
342
+
343
+This example will use hg19. If you have not installed
344
+\Rcode{GmapGenome.Hsapiens.UCSC.hg19}, please refer to the section
345
+\ref{create_hg19_GmapGenome}.
346
+
347
+First we load the Hsapiens \Rclass{GmapGenome} object:
348
+
349
+<<<load_GmapGenomeHsapiens>>=
350
+library("GmapGenome.Hsapiens.UCSC.hg19")
351
+@ 
352
+
298 353
 <<SessionInfo>>=
299 354
 sessionInfo()
300 355
 @ 
Browse code

Update vignette for changes to bam_tally/BamTallyParam

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@68552 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 17/08/2012 20:20:36
Showing 1 changed files
... ...
@@ -1,3 +1,4 @@
1
+
1 2
 % \VignetteIndexEntry{gmapR}
2 3
 % \VignetteKeywords{gmap,gsnap}
3 4
 % \VignettePackage{gmapR}
... ...
@@ -256,9 +257,9 @@ bam_file <- file.path(system.file(package = "gmapR", mustWork=TRUE),
256 257
 breaks <- c(0L, 15L, 60L, 75L)
257 258
 bqual <- 56L
258 259
 mapq <- 13L
259
-param <- BamTallyParam(cycle_breaks = as.integer(breaks),
260
-                       high_quality_cutoff = as.integer(bqual),
261
-                       minimum_mapq = as.integer(mapq),
260
+param <- BamTallyParam(genome, cycle_breaks = breaks,
261
+                       high_quality_cutoff = bqual,
262
+                       minimum_mapq = mapq,
262 263
                        concordant_only = FALSE, unique_only = FALSE,
263 264
                        primary_only = FALSE,
264 265
                        min_depth = 0L, variant_strand = 1L,
... ...
@@ -266,7 +267,6 @@ param <- BamTallyParam(cycle_breaks = as.integer(breaks),
266 267
                        indels = FALSE)
267 268
 
268 269
 tallies <-bam_tally(bam_file,
269
-                    genome,
270 270
                     param)
271 271
 @ 
272 272
 
Browse code

renaming gmapR2 to gmapR: it lives again

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@68172 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 02/08/2012 22:24:24
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,301 @@
1
+% \VignetteIndexEntry{gmapR}
2
+% \VignetteKeywords{gmap,gsnap}
3
+% \VignettePackage{gmapR}
4
+\documentclass[10pt]{article}
5
+
6
+\usepackage{times}
7
+\usepackage{hyperref}
8
+\usepackage{Sweave}
9
+
10
+\textwidth=6.5in
11
+\textheight=8.5in
12
+% \parskip=.3cm
13
+\oddsidemargin=-.1in
14
+\evensidemargin=-.1in
15
+\headheight=-.3in
16
+
17
+\newcommand{\Rfunction}[1]{{\texttt{#1}}}
18
+\newcommand{\Robject}[1]{{\texttt{#1}}}
19
+\newcommand{\Rpackage}[1]{{\textit{#1}}}
20
+\newcommand{\Rmethod}[1]{{\texttt{#1}}}
21
+\newcommand{\Rfunarg}[1]{{\texttt{#1}}}
22
+\newcommand{\Rclass}[1]{{\textit{#1}}}
23
+\newcommand{\Rcode}[1]{{\texttt{#1}}}
24
+
25
+\newcommand{\software}[1]{\textsf{#1}}
26
+\newcommand{\R}{\software{R}}
27
+
28
+\title{gmapR: Use the GMAP Suite of Tools in R}
29
+\author{Michael Lawrence, Cory Barr}
30
+\date{\today}
31
+
32
+\begin{document}
33
+
34
+\maketitle
35
+\tableofcontents
36
+\newpage
37
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
38
+\section{Introduction}
39
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
40
+
41
+The \Rpackage{gmapR} packages provides users with a way to access
42
+\software{GSNAP}, \software{bam\_tally}, and other utilities from the
43
+\software{GMAP} suite of tools within an R session. In this vignette,
44
+we briefly look at the \software{GMAP} suite of tools available
45
+through the \Rpackage{gmapR} package and work through an example.
46
+
47
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
48
+\section{What is \software{GMAP}, \software{GSNAP}, and \software{bam\_tally}?}
49
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
50
+
51
+The \software{GMAP} suite offers useful tools for the following:
52
+
53
+\begin{itemize}
54
+
55
+  \item Genomic mapping: Given a cDNA, find where it best aligns to
56
+    an entire genome
57
+
58
+  \item Genomic alignment: Given a cDNA and a genomic segment,
59
+    provide a nucleotide-level correspondence for the exons of the
60
+    cDNA to the genomic segment
61
+
62
+  \item Summarization via coverage plus reference and allele
63
+    nucleotide polymorphism counts for an aligned set of sequencing
64
+    reads over a given genomic location
65
+
66
+\end{itemize}
67
+
68
+\software{GMAP} (Genomic Mapping and Alignment Program) is
69
+particularly suited to relatively long mRNA and EST sequences such as
70
+those that are obtained from Roche 454 or Pacific Biosciences
71
+sequencing technologies. (At present, only \software{GSNAP} is
72
+available through the \Rpackage{gmapR}. \software{GMAP} integration is
73
+scheduled for the near future.)
74
+
75
+\software{GSNAP} (Genomic Short-read Nucleotide Alignment Program)
76
+also provides users with genomic mapping and alignment capabilities,
77
+but is optimized to handle issues that arise when dealing with the
78
+alignment of short reads generated from sequencing technologies such
79
+as those from Illumina/Solexa or ABI/SOLiD. \software{GSNAP} offers
80
+the following functionality, as mentioned in
81
+\href{http://bioinformatics.oxfordjournals.org/content/26/7/873.full}{Fast
82
+  and SNP-tolerant detection of complex variants and splicing in short
83
+  reads} by Thomas D. Wu and Serban Nacu:
84
+
85
+\begin{itemize}
86
+  
87
+  \item fast detection of complex variants and splicing in short
88
+    reads, based on a successively constrained search process of
89
+    merging and filtering position lists from a genomic index
90
+
91
+  \item alignment of both single- and paired-end reads as short as 14
92
+    nt and of arbitrarily long length
93
+    
94
+  \item detection of short- and long-distance splicing, including
95
+    interchromosomal splicing, in individual reads, using
96
+    probabilistic models or a database of known splice sites
97
+    
98
+  \item SNP-tolerant alignment to a reference space of all possible
99
+    combinations of major and minor alleles
100
+
101
+  \item alignment of reads from bisulfite-treated DNA for the study of
102
+    methylation state
103
+    
104
+\end{itemize}
105
+    
106
+\software{bam\_tally} provides users with the coverage as well as
107
+counts of both reference alleles, alternative alleles, and indels over
108
+a genomic region.
109
+
110
+For more detailed information on the \software{GMAP} suite of tools
111
+including a detailed explication of algorithmic specifics, see
112
+\url{http://research-pub.gene.com/gmap/}.
113
+
114
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
115
+\section{Create a \Rclass{GmapGenome} Object}
116
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
117
+To align reads with \software{GMAP} or \software{GSNAP}, or to use
118
+\software{bam\_tally}, you will need to either obtain or create a
119
+\Rclass{GmapGenome} object. \Rclass{GmapGenome} objects can be created
120
+from FASTA files or \Rclass{BSgenome} objects, as the following
121
+example demonstrates:
122
+
123
+<<create_GmapGenome, eval=FALSE>>=
124
+library(gmapR)
125
+
126
+if (!suppressWarnings(require(BSgenome.Dmelanogaster.UCSC.dm3))) {
127
+  source("http://bioconductor.org/biocLite.R")
128
+  biocLite("BSgenome.Dmelanogaster.UCSC.dm3")
129
+  library(BSgenome.Dmelanogaster.UCSC.dm3)
130
+}
131
+
132
+gmapGenomePath <- file.path(getwd(), "testGenome")
133
+gmapGenomeDirectory <- GmapGenomeDirectory(gmapGenomePath, create = TRUE)
134
+##> gmapGenomeDirectory
135
+##GmapGenomeDirectory object
136
+##path: /reshpcfs/home/coryba/projects/gmapR2/testGenome 
137
+
138
+gmapGenome <- GmapGenome(genome=Dmelanogaster,
139
+                         directory = gmapGenomeDirectory,
140
+                         name = "dm3",
141
+                         create = TRUE)
142
+##> gmapGenome
143
+##GmapGenome object
144
+##genome: dm3 
145
+##directory: /reshpcfs/home/coryba/projects/gmapR2/testGenome 
146
+@
147
+
148
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
149
+\section{Aligning with \software{GSNAP}}
150
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
151
+
152
+The \software{GSNAP} algorithm incorporates biological knowledge to
153
+provide accurate alignments, particularly for RNA-seq data. The
154
+following example synthesizes some input reads and aligns them using
155
+\software{GSNAP}.
156
+
157
+If you've created the D. melanogaster \Rclass{GmapGenome} package as detailed above and installed
158
+it, you can begin this section as follows:
159
+
160
+<<<load_GmapGenome, eval=FALSE>>=
161
+library("GmapGenome.Dmelanogaster.UCSC.dm3")
162
+gmapGenome <- GmapGenome.Dmelanogaster.UCSC.dm3
163
+@ 
164
+
165
+Otherwise, you can create the \Rcode{gmapGenome} object as detailed in the
166
+initial section of this vignette.
167
+
168
+<<align_with_gsnap, eval=FALSE>>=
169
+##synthesize paired-end FASTA input to align against the genome
170
+library("BSgenome.Dmelanogaster.UCSC.dm3")
171
+dnaStringSet <- Dmelanogaster$chr4
172
+makeTestFasta <- function(seqStarts, pairNum) {
173
+  seqEnds <- seqStarts + 74
174
+  nucleotides <- sapply(seq_along(seqStarts),
175
+                        function(i) as.character(subseq(dnaStringSet, seqStarts[i], seqEnds[i]))
176
+                        )
177
+  fastaHeaders <- paste0(">", "fakeRead", seq_along(nucleotides), "#0/", pairNum)
178
+  fastaLines <- paste(fastaHeaders, nucleotides, sep="\n")
179
+  tmpFasta <- paste0(tempfile(), ".", pairNum, ".fa")
180
+  writeLines(fastaLines, con=tmpFasta)
181
+  tmpFasta
182
+}
183
+seqStarts <- seq(1, 100000, by = 10000) + length(dnaStringSet) * 2 / 3
184
+tmpFasta1 <- makeTestFasta(seqStarts, '1')
185
+tmpFasta2 <- makeTestFasta(seqStarts + 125, '2')
186
+
187
+##specify how GSNAP should behave using a GsnapParam object
188
+gsnapParam <- GsnapParam(genome = gmapGenome,
189
+                         unique_only = FALSE,
190
+                         max_mismatches = NULL,
191
+                         suboptimal_levels = 0L, mode = "standard",
192
+                         npaths = 10L,
193
+                         novelsplicing = FALSE, splicing = NULL, 
194
+                         nthreads = 1L,
195
+                         batch = "2")
196
+
197
+gsnapOutput <- gsnap(input_a=tmpFasta1,
198
+                     input_b=tmpFasta2,
199
+                     params=gsnapParam)
200
+##gsnapOutput
201
+##An object of class "GsnapOutput"
202
+##Slot "path":
203
+##[1] "/local/Rtmporwsvr"
204
+##
205
+##Slot "param":
206
+##A GsnapParams object
207
+##genome: dm3 (/reshpcfs/home/coryba/projects/gmapR2/testGenome)
208
+##part: NULL
209
+##batch: 2
210
+##max_mismatches: NULL
211
+##suboptimal_levels: 0
212
+##snps: NULL
213
+##mode: standard
214
+##nthreads: 1
215
+##novelsplicing: FALSE
216
+##splicing: NULL
217
+##npaths: 10
218
+##quiet_if_excessive: FALSE
219
+##nofails: FALSE
220
+##split_output: TRUE
221
+##extra: list()
222
+##
223
+##Slot "version":
224
+## [1] NA NA NA NA NA NA NA NA NA NA NA NA
225
+##
226
+@ 
227
+
228
+The \Rcode{gsnapOutput} object will be of the class
229
+\Rclass{GsnapOutput}. It will provide access to the BAM files of
230
+alignments that \software{GSNAP} outputs along with other utilities.
231
+
</