Browse code

general cleanup and fixes, doc updates

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@111183 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 03/12/2015 21:11:09
Showing 1 changed files
... ...
@@ -19,8 +19,9 @@
19 19
 \usage{
20 20
 \S4method{bam_tally}{BamFile}(x, param, ...)
21 21
 \S4method{bam_tally}{character}(x, param, ...)
22
-variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
23
-               keep_ref_rows = FALSE, read_length = NA_integer_)
22
+variantSummary(x, read_pos_breaks = NULL,
23
+               keep_ref_rows = FALSE, read_length = NA_integer_,
24
+               high_nm_score = NA_integer_)
24 25
 }
25 26
 
26 27
 \arguments{
... ...
@@ -30,8 +31,6 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
30 31
   \item{read_pos_breaks}{The breaks, like those passed to \code{\link{cut}}
31 32
     for aggregating the per-read position counts. If \code{NULL}, no per-cycle
32 33
     counts are returned.}
33
-  \item{high_base_quality}{The minimum mapping quality for a
34
-    read to be counted as high quality.}
35 34
   \item{keep_ref_rows}{Whether to keep the rows describing only the
36 35
     reference calls, i.e., where ref and alt are the same. These are
37 36
     useful when one needs the reference counts even when there are no
... ...
@@ -39,6 +38,7 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
39 38
   \item{read_length}{The expected read length. If the read length is NA,
40 39
     the MDFNE (median distance from nearest end) statistic will NOT be
41 40
     calculated.}
41
+  \item{high_nm_score}{The value at which an NM value is considered high.}
42 42
   \item{...}{Arguments that override settings in \code{param}.}
43 43
 }
44 44
 
... ...
@@ -55,15 +55,8 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
55 55
   columns are also present:
56 56
   \item{n.read.pos}{The number of unique read positions for the alt allele.}
57 57
   \item{n.read.pos.ref}{The number of unique read positions for the ref allele.}
58
-  \item{raw.count}{The number of reads with the alternate allele,
59
-    \code{NA} for the reference allele row.}
60
-  \item{raw.count.ref}{The number of reads with the reference allele.}
61 58
   \item{raw.count.total}{The total number of reads at that position,
62 59
     including reference and all alternates.}
63
-  \item{mean.quality}{The mean base quality for the alt allele,
64
-    truncated at \code{high_base_quality}.}
65
-  \item{mean.quality.ref}{The mean base quality for the ref allele,
66
-    truncated at \code{high_base_quality}.}
67 60
   \item{count.plus}{The number of positive strand reads for the alternate
68 61
     allele, \code{NA} for the reference allele row.}
69 62
   \item{count.plus.ref}{The number of positive strand reads for the reference
... ...
@@ -72,12 +65,28 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
72 65
     allele, \code{NA} for the reference allele row.}
73 66
   \item{count.minus.ref}{The number of negative strand reads for the reference
74 67
     allele.}
68
+  \item{count.del.plus}{The plus strand deletion count over the
69
+    position.}
70
+  \item{count.del.minus}{The minus strand deletion count over the
71
+    position.}
75 72
   \item{read.pos.mean}{Mean read position for the alt allele.}
76 73
   \item{read.pos.mean.ref}{Mean read position for the ref allele.}
77 74
   \item{read.pos.var}{Variance in the read positions for the alt allele.}
78 75
   \item{read.pos.var.ref}{Variance in the read positions for the ref allele.}
79 76
   \item{mdfne}{Median distance from nearest end for the alt allele.}
80 77
   \item{mdfne.ref}{Median distance from nearest end for the ref allele.}
78
+  \item{count.high.nm}{The number of alt reads with an NM value at or above the
79
+    \code{high_nm_score} cutoff.}
80
+  \item{count.high.nm.ref}{The number of ref reads with an NM value at
81
+    or above the \code{high_nm_score} cutoff.}
82
+  
83
+  If codon counting was enabled, there will be a column giving the codon
84
+  strand: \code{codon.strand}.
85
+  
86
+  If the \code{xs} parameter was \code{TRUE}, there will be four
87
+  additional columns giving the counts by aligner-determined
88
+  strand: \code{count.xs.plus}, \code{count.xs.plus.ref},
89
+  \code{count.xs.minus}, and \code{count.xs.minus.ref}.
81 90
   
82 91
   An additional column is present for each bin formed by
83 92
   the \code{read_pos_breaks} parameter, with the read count for that bin.
Browse code

re-add counting of unique read positions (convenient when dupes are not marked) and fix an uninitialized value (param.cycle_breaks) detected by valgrind

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@89842 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 02/05/2014 12:51:58
Showing 1 changed files
... ...
@@ -53,6 +53,8 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
53 53
   after quality filtering (except for indels, for which there is no
54 54
   quality filtering). The following \code{elementMetadata}
55 55
   columns are also present:
56
+  \item{n.read.pos}{The number of unique read positions for the alt allele.}
57
+  \item{n.read.pos.ref}{The number of unique read positions for the ref allele.}
56 58
   \item{raw.count}{The number of reads with the alternate allele,
57 59
     \code{NA} for the reference allele row.}
58 60
   \item{raw.count.ref}{The number of reads with the reference allele.}
Browse code

update docs to reflect that the read length is no longer guessed

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@88933 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 14/04/2014 20:05:59
Showing 1 changed files
... ...
@@ -36,11 +36,9 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
36 36
     reference calls, i.e., where ref and alt are the same. These are
37 37
     useful when one needs the reference counts even when there are no
38 38
     alts at that position.}
39
-  \item{read_length}{The expected read length. If NA, a best guess is
40
-    made by inspecting a random sample from the BAM file. If the read
41
-    length is determined to be variable, the
42
-    MDFNE (median distance from nearest end) statistic will NOT
43
-    be calculated.}
39
+  \item{read_length}{The expected read length. If the read length is NA,
40
+    the MDFNE (median distance from nearest end) statistic will NOT be
41
+    calculated.}
44 42
   \item{...}{Arguments that override settings in \code{param}.}
45 43
 }
46 44
 
Browse code

doc updates

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@88761 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 10/04/2014 19:40:59
Showing 1 changed files
... ...
@@ -20,7 +20,7 @@
20 20
 \S4method{bam_tally}{BamFile}(x, param, ...)
21 21
 \S4method{bam_tally}{character}(x, param, ...)
22 22
 variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
23
-               keep_ref_rows = FALSE)
23
+               keep_ref_rows = FALSE, read_length = NA_integer_)
24 24
 }
25 25
 
26 26
 \arguments{
... ...
@@ -36,6 +36,11 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
36 36
     reference calls, i.e., where ref and alt are the same. These are
37 37
     useful when one needs the reference counts even when there are no
38 38
     alts at that position.}
39
+  \item{read_length}{The expected read length. If NA, a best guess is
40
+    made by inspecting a random sample from the BAM file. If the read
41
+    length is determined to be variable, the
42
+    MDFNE (median distance from nearest end) statistic will NOT
43
+    be calculated.}
39 44
   \item{...}{Arguments that override settings in \code{param}.}
40 45
 }
41 46
 
Browse code

drop the unique read position counts; renamed count.pos/count.neg to count.plus/count.minus (way better names)

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@88417 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 04/04/2014 13:36:22
Showing 1 changed files
... ...
@@ -55,9 +55,10 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
55 55
   \item{raw.count.ref}{The number of reads with the reference allele.}
56 56
   \item{raw.count.total}{The total number of reads at that position,
57 57
     including reference and all alternates.}
58
-  \item{mean.quality}{The mean mapping quality for the alt allele.}
59
-  \item{mean.quality.ref}{The mean mapping quality for the reference
60
-    allele.}
58
+  \item{mean.quality}{The mean base quality for the alt allele,
59
+    truncated at \code{high_base_quality}.}
60
+  \item{mean.quality.ref}{The mean base quality for the ref allele,
61
+    truncated at \code{high_base_quality}.}
61 62
   \item{count.plus}{The number of positive strand reads for the alternate
62 63
     allele, \code{NA} for the reference allele row.}
63 64
   \item{count.plus.ref}{The number of positive strand reads for the reference
Browse code

bring back the unique read position count, and mean/variance of read positions

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@88415 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 04/04/2014 13:16:32
Showing 1 changed files
... ...
@@ -50,27 +50,28 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
50 50
   after quality filtering (except for indels, for which there is no
51 51
   quality filtering). The following \code{elementMetadata}
52 52
   columns are also present:
53
-  \item{n.read.pos}{The number of unique cycles at which the alternate allele was
54
-    observed, \code{NA} for the reference allele row.}
55
-  \item{n.read.pos.ref}{The number of unique cycles at which the reference
56
-    allele was observed.}
57 53
   \item{raw.count}{The number of reads with the alternate allele,
58 54
     \code{NA} for the reference allele row.}
59 55
   \item{raw.count.ref}{The number of reads with the reference allele.}
60 56
   \item{raw.count.total}{The total number of reads at that position,
61 57
     including reference and all alternates.}
58
+  \item{mean.quality}{The mean mapping quality for the alt allele.}
62 59
   \item{mean.quality.ref}{The mean mapping quality for the reference
63 60
     allele.}
64
-  \item{count.pos}{The number of positive strand reads for the alternate
61
+  \item{count.plus}{The number of positive strand reads for the alternate
65 62
     allele, \code{NA} for the reference allele row.}
66
-  \item{count.pos.ref}{The number of positive strand reads for the reference
63
+  \item{count.plus.ref}{The number of positive strand reads for the reference
67 64
     allele.}
68
-  \item{count.neg}{The number of negative strand reads for the alternate
65
+  \item{count.minus}{The number of negative strand reads for the alternate
69 66
     allele, \code{NA} for the reference allele row.}
70
-  \item{count.neg.ref}{The number of negative strand reads for the reference
67
+  \item{count.minus.ref}{The number of negative strand reads for the reference
71 68
     allele.}
69
+  \item{read.pos.mean}{Mean read position for the alt allele.}
70
+  \item{read.pos.mean.ref}{Mean read position for the ref allele.}
72 71
   \item{read.pos.var}{Variance in the read positions for the alt allele.}
73 72
   \item{read.pos.var.ref}{Variance in the read positions for the ref allele.}
73
+  \item{mdfne}{Median distance from nearest end for the alt allele.}
74
+  \item{mdfne.ref}{Median distance from nearest end for the ref allele.}
74 75
   
75 76
   An additional column is present for each bin formed by
76 77
   the \code{read_pos_breaks} parameter, with the read count for that bin.
Browse code

doc updates/fixes

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@81628 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 14/10/2013 18:52:22
Showing 1 changed files
... ...
@@ -4,7 +4,9 @@
4 4
 \alias{bam_tally,BamFile-method}
5 5
 \alias{bam_tally,character-method}
6 6
 \alias{bam_tally,GmapBamReader-method}
7
+\alias{genome,TallyIIT-method}
7 8
 \alias{bam_tally}
9
+\alias{variantSummary}
8 10
 
9 11
 \title{Per-position Alignment Summaries}
10 12
 
... ...
@@ -17,41 +19,46 @@
17 19
 \usage{
18 20
 \S4method{bam_tally}{BamFile}(x, param, ...)
19 21
 \S4method{bam_tally}{character}(x, param, ...)
22
+variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
23
+               keep_ref_rows = FALSE)
20 24
 }
21 25
 
22 26
 \arguments{
23 27
   \item{x}{a \code{BamFile} object or string path to a BAM file to read}
24 28
   \item{param}{The \code{\linkS4class{BamTallyParam}} object with
25 29
     parameters for the tally operation. }
30
+  \item{read_pos_breaks}{The breaks, like those passed to \code{\link{cut}}
31
+    for aggregating the per-read position counts. If \code{NULL}, no per-cycle
32
+    counts are returned.}
33
+  \item{high_base_quality}{The minimum mapping quality for a
34
+    read to be counted as high quality.}
35
+  \item{keep_ref_rows}{Whether to keep the rows describing only the
36
+    reference calls, i.e., where ref and alt are the same. These are
37
+    useful when one needs the reference counts even when there are no
38
+    alts at that position.}
26 39
   \item{...}{Arguments that override settings in \code{param}.}
27 40
 }
28 41
 
29 42
 \value{
30
-  A \code{\link[GenomicRanges]{GRanges}}, with a range for each position
31
-  that passed the filters, and with the following \code{elementMetadata}
32
-  columns:
33
-  \item{location}{A string representation of the location, of the form
34
-    \dQuote{chr:pos}. This makes it easy, e.g., to check for the
35
-    presence of a variant in another result object.}
36
-  \item{ref}{The reference base at that position.}
37
-  \item{alt}{The base for the alternate allele, \code{NA} for the
38
-    reference allele row.}
43
+  The \code{bam_tally} function returns an opaque pointer to a C-level
44
+  data structure with the class \dQuote{TallyIIT}. Currently, the only
45
+  operation applicable to this object is \code{variantSummary}.
46
+  
47
+  The \code{variantSummary} function returns
48
+  a \code{\link[VariantAnnotation]{VRanges}}, with a range for each position
49
+  that passed the filters. The depth columns correspond to the counts
50
+  after quality filtering (except for indels, for which there is no
51
+  quality filtering). The following \code{elementMetadata}
52
+  columns are also present:
39 53
   \item{n.read.pos}{The number of unique cycles at which the alternate allele was
40 54
     observed, \code{NA} for the reference allele row.}
41 55
   \item{n.read.pos.ref}{The number of unique cycles at which the reference
42 56
     allele was observed.}
43
-  \item{count}{The number of reads with the alternate allele,
57
+  \item{raw.count}{The number of reads with the alternate allele,
44 58
     \code{NA} for the reference allele row.}
45
-  \item{count.ref}{The number of reads with the reference allele.}
46
-  \item{count.total}{The total number of reads at that position,
59
+  \item{raw.count.ref}{The number of reads with the reference allele.}
60
+  \item{raw.count.total}{The total number of reads at that position,
47 61
     including reference and all alternates.}
48
-  \item{high.quality}{The number of reads for the alternate allele that were
49
-    above \code{high_quality_cutoff}, \code{NA} for the reference allele
50
-    row.}
51
-  \item{high.quality.ref}{The number of reads for the reference allele that were
52
-    above \code{high_quality_cutoff}.}
53
-  \item{mean.quality}{The mean mapping quality for the alternate allele,
54
-    \code{NA} for the reference allele row.}
55 62
   \item{mean.quality.ref}{The mean mapping quality for the reference
56 63
     allele.}
57 64
   \item{count.pos}{The number of positive strand reads for the alternate
... ...
@@ -62,9 +69,14 @@
62 69
     allele, \code{NA} for the reference allele row.}
63 70
   \item{count.neg.ref}{The number of negative strand reads for the reference
64 71
     allele.}
65
-
72
+  \item{read.pos.var}{Variance in the read positions for the alt allele.}
73
+  \item{read.pos.var.ref}{Variance in the read positions for the ref allele.}
74
+  
66 75
   An additional column is present for each bin formed by
67 76
   the \code{read_pos_breaks} parameter, with the read count for that bin.
68 77
 }
69 78
 
79
+\seealso{\code{tallyVariants} in the VariantTools package provides a
80
+  high-level wrapper for this functionality.}
81
+
70 82
 \author{Michael Lawrence}
Browse code

Refactor bam_tally, so that bam_tally returns a TallyIIT object, which is then summarized via summarizeVariants; this allows computing tallies once and summarizing them in different ways (like maybe get the coverage). The summarizeVariants function yields a VRanges.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@78427 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 14/07/2013 00:43:43
Showing 1 changed files
... ...
@@ -36,9 +36,9 @@
36 36
   \item{ref}{The reference base at that position.}
37 37
   \item{alt}{The base for the alternate allele, \code{NA} for the
38 38
     reference allele row.}
39
-  \item{ncycles}{The number of unique cycles at which the alternate allele was
39
+  \item{n.read.pos}{The number of unique cycles at which the alternate allele was
40 40
     observed, \code{NA} for the reference allele row.}
41
-  \item{ncycles.ref}{The number of unique cycles at which the reference
41
+  \item{n.read.pos.ref}{The number of unique cycles at which the reference
42 42
     allele was observed.}
43 43
   \item{count}{The number of reads with the alternate allele,
44 44
     \code{NA} for the reference allele row.}
... ...
@@ -64,7 +64,7 @@
64 64
     allele.}
65 65
 
66 66
   An additional column is present for each bin formed by
67
-  the \code{cycle_breaks} parameter, with the read count for that bin.
67
+  the \code{read_pos_breaks} parameter, with the read count for that bin.
68 68
 }
69 69
 
70 70
 \author{Michael Lawrence}
Browse code

Make 'genome' a BamTallyParam slot, instead of a bam_tally argument. This is consistent with GsnapParam. Also, rename 'high_quality_cutoff' to 'high_base_quality'.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@68551 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 17/08/2012 20:19:50
Showing 1 changed files
... ...
@@ -6,8 +6,8 @@
6 6
 \alias{bam_tally,GmapBamReader-method}
7 7
 \alias{bam_tally}
8 8
 
9
-\title{Create a Summarization of the Read Sequences and Qualities for Each
10
-  Genomic Position}
9
+\title{Per-position Alignment Summaries}
10
+
11 11
 \description{
12 12
   Given a set of alignments, for each position in the genome output
13 13
   counts for the reference allele and all alternate alleles. Often used
... ...
@@ -15,15 +15,13 @@
15 15
 }
16 16
 
17 17
 \usage{
18
-\S4method{bam_tally}{BamFile}(x, genome, param = BamTallyParam(), ...)
19
-\S4method{bam_tally}{character}(x, genome, param = BamTallyParam(), ...)
18
+\S4method{bam_tally}{BamFile}(x, param, ...)
19
+\S4method{bam_tally}{character}(x, param, ...)
20 20
 }
21 21
 
22 22
 \arguments{
23 23
   \item{x}{a \code{BamFile} object or string path to a BAM file to read}
24
-  \item{genome}{the \code{GmapGenome} object corresponding to the
25
-    alignments in the BAM file}
26
-  \item{param}{The \code{\linkS4class{BamTallyParam}} object with extra
24
+  \item{param}{The \code{\linkS4class{BamTallyParam}} object with
27 25
     parameters for the tally operation. }
28 26
   \item{...}{Arguments that override settings in \code{param}.}
29 27
 }
Browse code

renaming gmapR2 to gmapR: it lives again

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@68172 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 02/08/2012 22:24:24
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,72 @@
1
+\name{bam_tally-methods}
2
+\docType{methods}
3
+\alias{bam_tally-methods}
4
+\alias{bam_tally,BamFile-method}
5
+\alias{bam_tally,character-method}
6
+\alias{bam_tally,GmapBamReader-method}
7
+\alias{bam_tally}
8
+
9
+\title{Create a Summarization of the Read Sequences and Qualities for Each
10
+  Genomic Position}
11
+\description{
12
+  Given a set of alignments, for each position in the genome output
13
+  counts for the reference allele and all alternate alleles. Often used
14
+  as a precursor to detecting variants. Indels will be supported soon.
15
+}
16
+
17
+\usage{
18
+\S4method{bam_tally}{BamFile}(x, genome, param = BamTallyParam(), ...)
19
+\S4method{bam_tally}{character}(x, genome, param = BamTallyParam(), ...)
20
+}
21
+
22
+\arguments{
23
+  \item{x}{a \code{BamFile} object or string path to a BAM file to read}
24
+  \item{genome}{the \code{GmapGenome} object corresponding to the
25
+    alignments in the BAM file}
26
+  \item{param}{The \code{\linkS4class{BamTallyParam}} object with extra
27
+    parameters for the tally operation. }
28
+  \item{...}{Arguments that override settings in \code{param}.}
29
+}
30
+
31
+\value{
32
+  A \code{\link[GenomicRanges]{GRanges}}, with a range for each position
33
+  that passed the filters, and with the following \code{elementMetadata}
34
+  columns:
35
+  \item{location}{A string representation of the location, of the form
36
+    \dQuote{chr:pos}. This makes it easy, e.g., to check for the
37
+    presence of a variant in another result object.}
38
+  \item{ref}{The reference base at that position.}
39
+  \item{alt}{The base for the alternate allele, \code{NA} for the
40
+    reference allele row.}
41
+  \item{ncycles}{The number of unique cycles at which the alternate allele was
42
+    observed, \code{NA} for the reference allele row.}
43
+  \item{ncycles.ref}{The number of unique cycles at which the reference
44
+    allele was observed.}
45
+  \item{count}{The number of reads with the alternate allele,
46
+    \code{NA} for the reference allele row.}
47
+  \item{count.ref}{The number of reads with the reference allele.}
48
+  \item{count.total}{The total number of reads at that position,
49
+    including reference and all alternates.}
50
+  \item{high.quality}{The number of reads for the alternate allele that were
51
+    above \code{high_quality_cutoff}, \code{NA} for the reference allele
52
+    row.}
53
+  \item{high.quality.ref}{The number of reads for the reference allele that were
54
+    above \code{high_quality_cutoff}.}
55
+  \item{mean.quality}{The mean mapping quality for the alternate allele,
56
+    \code{NA} for the reference allele row.}
57
+  \item{mean.quality.ref}{The mean mapping quality for the reference
58
+    allele.}
59
+  \item{count.pos}{The number of positive strand reads for the alternate
60
+    allele, \code{NA} for the reference allele row.}
61
+  \item{count.pos.ref}{The number of positive strand reads for the reference
62
+    allele.}
63
+  \item{count.neg}{The number of negative strand reads for the alternate
64
+    allele, \code{NA} for the reference allele row.}
65
+  \item{count.neg.ref}{The number of negative strand reads for the reference
66
+    allele.}
67
+
68
+  An additional column is present for each bin formed by
69
+  the \code{cycle_breaks} parameter, with the read count for that bin.
70
+}
71
+
72
+\author{Michael Lawrence}