Browse code

doc updates/fixes

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@81628 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 14/10/2013 18:52:22
Showing 3 changed files

... ...
@@ -14,16 +14,14 @@
14 14
 }
15 15
 
16 16
 \usage{
17
-  BamTallyParam(genome, which = RangesList(),
17
+  BamTallyParam(genome, which = GRanges(),
18 18
                 desired_read_group = NULL,
19
-                read_pos_breaks = NULL,
20
-                high_base_quality = 0L,
21 19
                 minimum_mapq = 0L,
22 20
                 concordant_only = FALSE, unique_only = FALSE,
23 21
                 primary_only = FALSE, ignore_duplicates = FALSE,
24 22
                 min_depth = 0L, variant_strand = 0L,
25 23
                 ignore_query_Ns = FALSE,
26
-                indels = TRUE)
24
+                indels = FALSE)
27 25
 }
28 26
 \arguments{
29 27
   \item{genome}{A \code{GmapGenome} object, or something coercible to one.}
... ...
@@ -33,11 +31,6 @@
33 31
   }
34 32
   \item{desired_read_group}{The name of the read group to which to limit
35 33
     the tallying; if not NULL, must be a single, non-NA string.}
36
-  \item{read_pos_breaks}{The breaks, like those passed to \code{\link{cut}}
37
-    for aggregating the per-read position counts. If \code{NULL}, no per-cycle
38
-    counts are returned.}
39
-  \item{high_base_quality}{The minimum mapping quality for a
40
-    read to be counted as high quality.}
41 34
   \item{minimum_mapq}{Minimum mapping quality for a read to be counted
42 35
     at all.}
43 36
   \item{concordant_only}{Consider only what gnsap
... ...
@@ -12,54 +12,73 @@
12 12
 }
13 13
 
14 14
 \usage{
15
-GsnapParam(genome, unique_only = FALSE,
16
-   max_mismatches = NULL, suboptimal_levels = 0L,
17
-   mode = "standard", snps = NULL,
18
-   npaths = if (unique_only) 1L else 100L,
19
-   quiet_if_excessive = unique_only, nofails = unique_only,
20
-   split_output = !unique_only,
21
-   novelsplicing = FALSE, splicing = NULL, 
22
-   nthreads = 1L, part = NULL, batch = "2", ...)
15
+GsnapParam(genome, unique_only = FALSE, molecule = c("RNA", "DNA"),
16
+           max_mismatches = NULL,
17
+           suboptimal_levels = 0L, mode = "standard",
18
+           snps = NULL,
19
+           npaths = if (unique_only) 1L else 100L,
20
+           quiet_if_excessive = unique_only, nofails = unique_only,
21
+           split_output = !unique_only,
22
+           novelsplicing = FALSE, splicing = NULL, 
23
+           nthreads = 1L, part = NULL, batch = "2",
24
+           terminal_threshold = if (molecule == "DNA") 1000L else 2L,
25
+           gmap_mode = if (molecule == "DNA") "none" else
26
+                       "pairsearch,terminal,improve", ...)
23 27
 }
24 28
 
25 29
 \arguments{
26
-\item{genome}{A GmapGenome object to align against}
27
-\item{unique_only}{Whether only alignments with a unique match should be
28
-  output. The default is FALSE.}
29
-\item{max_mismatches}{The maximum number of mismatches to allow per
30
-  alignment. If NULL, then the value defaults to ((readlength + 2) / 12 - 2))}
31
-\item{suboptimal_levels}{Report suboptimal hits beyond best hit. The
32
-  default is 0L.}
33
-\item{mode}{The alignment mode. It can be "standard", "cmet-stranded",
34
-  "cmet-nonstranded", "atoi-stranded", or "atoi-nonstranded". The default 
35
-  is "standard".}
36
-\item{snps}{If not NULL, then a GmapSnps object. Provided SNPs will not
37
-  count as mismatches.}
38
-\item{npaths}{The maximum number of paths to print.}
39
-\item{quiet_if_excessive}{If more than maximum number of paths are
40
-  found, then no alignment from the read will be in the output.}
41
-\item{nofails}{Exclude failed alignments from output}
42
-\item{split_output}{Basename for multiple-file output, separately for nomapping,
43
-  halfmapping_uniq, halfmapping_mult, unpaired_uniq, unpaired_mult, paired_uniq,
44
-  paired_mult, concordant_uniq, and concordant_mult results (up to 9
45
-  files, or 10 if --fails-as-input is selected, or 3 for single-end reads)}
46
-\item{novelsplicing}{Logical indicating whether to look for novel
47
-  splicing events. FALSE is the default.}
48
-\item{splicing}{If not NULL, a GmapSplices object. NULL is the default.}
49
-\item{nthreads}{The number of worker threads gsnap should use to align.}
50
-\item{part}{If not NULL, then process only the i-th out of every n
51
-  sequences e.g., 0/100 or 99/100 (useful for distributing jobs to a
52
-  computer farm). If NULL, then all sequences are processed. NULL is the default.}
53
-\item{batch}{This argument allows control over gsnap's memory mapping
54
-  and allocation. The default is mode 2.
55
-  Mode 0: \{offsets=allocate, positions=mmap, genome=mmap\},
56
-  Mode 1: \{offsets=allocate, positions=mmap & preload,genome=mmap\},
57
-  Mode 2: \{offsets=allocate, positions=mmap & preload,genome=mmap &
58
-  preload\}, 
59
-  Mode 3: \{offsets=allocate, positions=allocate,genome=mmap & preload\},
60
-  Mode 4: \{offsets=allocate, positions=allocate,genome=allocate\}}
30
+  \item{genome}{A GmapGenome object to align against}
31
+  \item{unique_only}{Whether only alignments with a unique match should be
32
+    output. The default is FALSE.}
33
+  \item{molecule}{The type of molecule sequenced; used to determine
34
+    appropriate parameter defaults.}
35
+  \item{max_mismatches}{The maximum number of mismatches to allow per
36
+    alignment. If NULL, then the value defaults to ((readlength + 2) / 12 - 2))}
37
+  \item{suboptimal_levels}{Report suboptimal hits beyond best hit. The
38
+    default is 0L.}
39
+  \item{mode}{The alignment mode. It can be "standard", "cmet-stranded",
40
+    "cmet-nonstranded", "atoi-stranded", or "atoi-nonstranded". The default 
41
+    is "standard".}
42
+  \item{snps}{If not NULL, then a GmapSnps object. Provided SNPs will not
43
+    count as mismatches.}
44
+  \item{npaths}{The maximum number of paths to print.}
45
+  \item{quiet_if_excessive}{If more than maximum number of paths are
46
+    found, then no alignment from the read will be in the output.}
47
+  \item{nofails}{Exclude failed alignments from output}
48
+  \item{split_output}{Basename for multiple-file output, separately for
49
+    nomapping, halfmapping_uniq, halfmapping_mult, unpaired_uniq,
50
+    unpaired_mult, paired_uniq, paired_mult, concordant_uniq, and
51
+    concordant_mult results (up to 9 files, or 10 if --fails-as-input is
52
+    selected, or 3 for single-end reads)}
53
+  \item{novelsplicing}{Logical indicating whether to look for novel
54
+    splicing events. FALSE is the default.}
55
+  \item{splicing}{If not NULL, a GmapSplices object. NULL is the default.}
56
+  \item{nthreads}{The number of worker threads gsnap should use to align.}
57
+  \item{part}{If not NULL, then process only the i-th out of every n
58
+    sequences e.g., 0/100 or 99/100 (useful for distributing jobs to a
59
+    computer farm). If NULL, then all sequences are processed. NULL is
60
+    the default.}
61
+  \item{batch}{This argument allows control over gsnap's memory mapping
62
+    and allocation. The default is mode 2.
63
+    Mode 0: \{offsets=allocate, positions=mmap, genome=mmap\},
64
+    Mode 1: \{offsets=allocate, positions=mmap & preload,genome=mmap\},
65
+    Mode 2: \{offsets=allocate, positions=mmap & preload,genome=mmap &
66
+    preload\}, 
67
+    Mode 3: \{offsets=allocate, positions=allocate,genome=mmap & preload\},
68
+    Mode 4: \{offsets=allocate, positions=allocate,genome=allocate\}}
61 69
   \item{...}{Additional parameters for gsnap. See gsnap's full
62
-  documentation for those available.}
70
+    documentation for those available.}
71
+  \item{terminal_threshold}{If this number of mismatches is exceeded,
72
+    GSNAP will attempt to align from one of the sequence, eventually
73
+    giving up and discarding the rest of the sequence. This is called a
74
+    \dQuote{terminal alignment}. By setting this to a high value, we
75
+    have effectively disabled it for DNA, since terminal alignments were
76
+    motivated by splicing alignment problems and other special cases.
77
+  }
78
+  \item{gmap_mode}{Specifies the GMAP pipeline executed when GSNAP
79
+    delegates to GMAP (a Smith-Waterman aligner) in difficult cases. We
80
+    have disabled this for DNA, since such difficult cases are only
81
+    anticipated in the context of splicing or complex rearrangements.}
63 82
 }
64 83
 
65 84
 \seealso{
... ...
@@ -4,7 +4,9 @@
4 4
 \alias{bam_tally,BamFile-method}
5 5
 \alias{bam_tally,character-method}
6 6
 \alias{bam_tally,GmapBamReader-method}
7
+\alias{genome,TallyIIT-method}
7 8
 \alias{bam_tally}
9
+\alias{variantSummary}
8 10
 
9 11
 \title{Per-position Alignment Summaries}
10 12
 
... ...
@@ -17,41 +19,46 @@
17 19
 \usage{
18 20
 \S4method{bam_tally}{BamFile}(x, param, ...)
19 21
 \S4method{bam_tally}{character}(x, param, ...)
22
+variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
23
+               keep_ref_rows = FALSE)
20 24
 }
21 25
 
22 26
 \arguments{
23 27
   \item{x}{a \code{BamFile} object or string path to a BAM file to read}
24 28
   \item{param}{The \code{\linkS4class{BamTallyParam}} object with
25 29
     parameters for the tally operation. }
30
+  \item{read_pos_breaks}{The breaks, like those passed to \code{\link{cut}}
31
+    for aggregating the per-read position counts. If \code{NULL}, no per-cycle
32
+    counts are returned.}
33
+  \item{high_base_quality}{The minimum mapping quality for a
34
+    read to be counted as high quality.}
35
+  \item{keep_ref_rows}{Whether to keep the rows describing only the
36
+    reference calls, i.e., where ref and alt are the same. These are
37
+    useful when one needs the reference counts even when there are no
38
+    alts at that position.}
26 39
   \item{...}{Arguments that override settings in \code{param}.}
27 40
 }
28 41
 
29 42
 \value{
30
-  A \code{\link[GenomicRanges]{GRanges}}, with a range for each position
31
-  that passed the filters, and with the following \code{elementMetadata}
32
-  columns:
33
-  \item{location}{A string representation of the location, of the form
34
-    \dQuote{chr:pos}. This makes it easy, e.g., to check for the
35
-    presence of a variant in another result object.}
36
-  \item{ref}{The reference base at that position.}
37
-  \item{alt}{The base for the alternate allele, \code{NA} for the
38
-    reference allele row.}
43
+  The \code{bam_tally} function returns an opaque pointer to a C-level
44
+  data structure with the class \dQuote{TallyIIT}. Currently, the only
45
+  operation applicable to this object is \code{variantSummary}.
46
+  
47
+  The \code{variantSummary} function returns
48
+  a \code{\link[VariantAnnotation]{VRanges}}, with a range for each position
49
+  that passed the filters. The depth columns correspond to the counts
50
+  after quality filtering (except for indels, for which there is no
51
+  quality filtering). The following \code{elementMetadata}
52
+  columns are also present:
39 53
   \item{n.read.pos}{The number of unique cycles at which the alternate allele was
40 54
     observed, \code{NA} for the reference allele row.}
41 55
   \item{n.read.pos.ref}{The number of unique cycles at which the reference
42 56
     allele was observed.}
43
-  \item{count}{The number of reads with the alternate allele,
57
+  \item{raw.count}{The number of reads with the alternate allele,
44 58
     \code{NA} for the reference allele row.}
45
-  \item{count.ref}{The number of reads with the reference allele.}
46
-  \item{count.total}{The total number of reads at that position,
59
+  \item{raw.count.ref}{The number of reads with the reference allele.}
60
+  \item{raw.count.total}{The total number of reads at that position,
47 61
     including reference and all alternates.}
48
-  \item{high.quality}{The number of reads for the alternate allele that were
49
-    above \code{high_quality_cutoff}, \code{NA} for the reference allele
50
-    row.}
51
-  \item{high.quality.ref}{The number of reads for the reference allele that were
52
-    above \code{high_quality_cutoff}.}
53
-  \item{mean.quality}{The mean mapping quality for the alternate allele,
54
-    \code{NA} for the reference allele row.}
55 62
   \item{mean.quality.ref}{The mean mapping quality for the reference
56 63
     allele.}
57 64
   \item{count.pos}{The number of positive strand reads for the alternate
... ...
@@ -62,9 +69,14 @@
62 69
     allele, \code{NA} for the reference allele row.}
63 70
   \item{count.neg.ref}{The number of negative strand reads for the reference
64 71
     allele.}
65
-
72
+  \item{read.pos.var}{Variance in the read positions for the alt allele.}
73
+  \item{read.pos.var.ref}{Variance in the read positions for the ref allele.}
74
+  
66 75
   An additional column is present for each bin formed by
67 76
   the \code{read_pos_breaks} parameter, with the read count for that bin.
68 77
 }
69 78
 
79
+\seealso{\code{tallyVariants} in the VariantTools package provides a
80
+  high-level wrapper for this functionality.}
81
+
70 82
 \author{Michael Lawrence}