Browse code

support new version of bam_tally

Michael Lawrence authored on 12/06/2019 22:48:54
Showing 1 changed files
... ...
@@ -19,9 +19,9 @@
19 19
                 minimum_mapq = 0L,
20 20
                 concordant_only = FALSE, unique_only = FALSE,
21 21
                 primary_only = FALSE, ignore_duplicates = FALSE,
22
-                min_depth = 0L, variant_strand = 0L,
22
+                min_depth = 0L, variant_strand = 0L, variant_pct = 0,
23 23
                 ignore_query_Ns = FALSE,
24
-                indels = FALSE, include_soft_clips = 0L,
24
+                indels = FALSE, min_softclip = 0L, max_softclip = 0L,
25 25
                 exon_iit = NULL, IIT_BPPARAM = NULL,
26 26
                 xs = FALSE, read_pos = FALSE,
27 27
                 min_base_quality = 0L, noncovered = FALSE, nm = FALSE)
... ...
@@ -52,6 +52,8 @@
52 52
     only positions where a variant was seen on at least one strand, and
53 53
     2 requires the variant be seen on both strands. Setting this to 1
54 54
     is a good way to save resources.}
55
+  \item{variant_pct}{The minimum alternate allele fraction
56
+    for a variant to be reported for a strand.} 
55 57
   \item{ignore_query_Ns}{Whether to ignore the N base pairs when
56 58
     counting. Can save a lot of resources when processing low quality data.}
57 59
   \item{indels}{Whether to return indel counts. The \code{ref} and
... ...
@@ -60,7 +62,8 @@
60 62
     always spans the sequence in \code{ref}; so e.g. a deletion extends
61 63
     one nt upstream of the actual deleted sequence.
62 64
   }
63
-  \item{include_soft_clips}{Maximum length of soft clips that are
65
+  \item{min_softclip, max_softclip}{
66
+    Minimum and maximum length of soft clips that are
64 67
     considered for counting. Soft-clipping is often useful (for GSNAP at
65 68
     least) during alignment, and it should be preserved in the
66 69
     output. However, soft clipping can preferentially occur in regions
Browse code

follow renaming of RangesList class -> IntegerRangesList in IRanges 2.13.12

Hervé Pagès authored on 22/01/2018 00:24:43
Showing 1 changed files
... ...
@@ -28,7 +28,7 @@
28 28
 }
29 29
 \arguments{
30 30
   \item{genome}{A \code{GmapGenome} object, or something coercible to one.}
31
-  \item{which}{A \code{RangesList} or something coercible to
31
+  \item{which}{A \code{IntegerRangesList} or something coercible to
32 32
     one that limits the tally to that range or set of ranges. By
33 33
     default, the entire genome is processed.
34 34
   }
Browse code

general cleanup and fixes, doc updates

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@111183 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 03/12/2015 21:11:09
Showing 1 changed files
... ...
@@ -22,7 +22,9 @@
22 22
                 min_depth = 0L, variant_strand = 0L,
23 23
                 ignore_query_Ns = FALSE,
24 24
                 indels = FALSE, include_soft_clips = 0L,
25
-                exon_iit = NULL, IIT_BPPARAM = NULL)
25
+                exon_iit = NULL, IIT_BPPARAM = NULL,
26
+                xs = FALSE, read_pos = FALSE,
27
+                min_base_quality = 0L, noncovered = FALSE, nm = FALSE)
26 28
 }
27 29
 \arguments{
28 30
   \item{genome}{A \code{GmapGenome} object, or something coercible to one.}
... ...
@@ -74,6 +76,20 @@
74 76
     generating the iit file from an R object. Ignored if \code{exon_iit}
75 77
     is a character vector or \code{NULL}
76 78
   }
79
+  \item{xs}{Whether to tabulate reads by XS tag, the aligner's best
80
+    guess about the strand of transcription.
81
+  }
82
+  \item{read_pos}{Whether to tabulate by read position.
83
+  }
84
+  \item{min_base_quality}{Minimum base quality cutoff. Calls of lower
85
+    quality are not counted, except in the total raw depth.
86
+  }
87
+  \item{noncovered}{
88
+    Whether to report zero tallies, where there is no coverage.
89
+  }
90
+  \item{nm}{
91
+    Whether to tally by NM tag, the number of mismatches for a read.
92
+  }
77 93
 }
78 94
 \seealso{
79 95
   \code{\link{bam_tally}}
Browse code

fix naming of exon_iit argument

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@95338 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 11/10/2014 20:33:34
Showing 1 changed files
... ...
@@ -22,7 +22,7 @@
22 22
                 min_depth = 0L, variant_strand = 0L,
23 23
                 ignore_query_Ns = FALSE,
24 24
                 indels = FALSE, include_soft_clips = 0L,
25
-                cds_iit = NULL, IIT_BPPARAM = NULL)
25
+                exon_iit = NULL, IIT_BPPARAM = NULL)
26 26
 }
27 27
 \arguments{
28 28
   \item{genome}{A \code{GmapGenome} object, or something coercible to one.}
Browse code

add codon tally support. No vbump yet

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@93316 bc3139a8-67e5-0310-9ffc-ced21a209358

Gabriel Becker authored on 11/08/2014 23:42:48
Showing 1 changed files
... ...
@@ -22,7 +22,7 @@
22 22
                 min_depth = 0L, variant_strand = 0L,
23 23
                 ignore_query_Ns = FALSE,
24 24
                 indels = FALSE, include_soft_clips = 0L,
25
-                count_xs = FALSE, noncovered = FALSE)
25
+                cds_iit = NULL, IIT_BPPARAM = NULL)
26 26
 }
27 27
 \arguments{
28 28
   \item{genome}{A \code{GmapGenome} object, or something coercible to one.}
... ...
@@ -65,15 +65,16 @@
65 65
     of discordance with the reference, and if those clipped regions are
66 66
     ignored during counting, the allele fraction is misestimated.
67 67
   }
68
-  \item{count_xs}{Whether to tabulate the +/- values of the XS tag,
69
-    often output by RNA-seq aligners to indicate the strand of
70
-    transcription.
71
-  }
72
-  \item{noncovered}{Whether to output rows for positions without any
73
-    coverage.
68
+  \item{exon_iit}{An object which indicates the exons to be used for
69
+    tallying codons (a character value indicating an existing .iit file, a
70
+    \code{GRangesList} of exons by gene or a \code{TxDb} object from which
71
+    to make such a \code{GRangesList}) or \code{NULL} indicating no
72
+    codon-level tallying should be done.}
73
+  \item{IIT_BPPARAM}{A \code{BiocParallelParam} object to use when
74
+    generating the iit file from an R object. Ignored if \code{exon_iit}
75
+    is a character vector or \code{NULL}
74 76
   }
75 77
 }
76
-
77 78
 \seealso{
78 79
   \code{\link{bam_tally}}
79 80
 }
Browse code

update to new bam_tally with support for XS counting, which we now support via BamTallyParam@count_xs.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@90225 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 13/05/2014 02:04:05
Showing 1 changed files
... ...
@@ -21,7 +21,8 @@
21 21
                 primary_only = FALSE, ignore_duplicates = FALSE,
22 22
                 min_depth = 0L, variant_strand = 0L,
23 23
                 ignore_query_Ns = FALSE,
24
-                indels = FALSE, include_soft_clips = 0L)
24
+                indels = FALSE, include_soft_clips = 0L,
25
+                count_xs = FALSE, noncovered = FALSE)
25 26
 }
26 27
 \arguments{
27 28
   \item{genome}{A \code{GmapGenome} object, or something coercible to one.}
... ...
@@ -64,6 +65,13 @@
64 65
     of discordance with the reference, and if those clipped regions are
65 66
     ignored during counting, the allele fraction is misestimated.
66 67
   }
68
+  \item{count_xs}{Whether to tabulate the +/- values of the XS tag,
69
+    often output by RNA-seq aligners to indicate the strand of
70
+    transcription.
71
+  }
72
+  \item{noncovered}{Whether to output rows for positions without any
73
+    coverage.
74
+  }
67 75
 }
68 76
 
69 77
 \seealso{
Browse code

update GSTRUCT (bam_tally); add include_soft_clip parameter for counting over soft clips of a given max length (more accurate allele frequency)

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@88343 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 02/04/2014 22:03:40
Showing 1 changed files
... ...
@@ -21,7 +21,7 @@
21 21
                 primary_only = FALSE, ignore_duplicates = FALSE,
22 22
                 min_depth = 0L, variant_strand = 0L,
23 23
                 ignore_query_Ns = FALSE,
24
-                indels = FALSE)
24
+                indels = FALSE, include_soft_clips = 0L)
25 25
 }
26 26
 \arguments{
27 27
   \item{genome}{A \code{GmapGenome} object, or something coercible to one.}
... ...
@@ -57,6 +57,13 @@
57 57
     always spans the sequence in \code{ref}; so e.g. a deletion extends
58 58
     one nt upstream of the actual deleted sequence.
59 59
   }
60
+  \item{include_soft_clips}{Maximum length of soft clips that are
61
+    considered for counting. Soft-clipping is often useful (for GSNAP at
62
+    least) during alignment, and it should be preserved in the
63
+    output. However, soft clipping can preferentially occur in regions
64
+    of discordance with the reference, and if those clipped regions are
65
+    ignored during counting, the allele fraction is misestimated.
66
+  }
60 67
 }
61 68
 
62 69
 \seealso{
Browse code

doc updates/fixes

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@81628 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 14/10/2013 18:52:22
Showing 1 changed files
... ...
@@ -14,16 +14,14 @@
14 14
 }
15 15
 
16 16
 \usage{
17
-  BamTallyParam(genome, which = RangesList(),
17
+  BamTallyParam(genome, which = GRanges(),
18 18
                 desired_read_group = NULL,
19
-                read_pos_breaks = NULL,
20
-                high_base_quality = 0L,
21 19
                 minimum_mapq = 0L,
22 20
                 concordant_only = FALSE, unique_only = FALSE,
23 21
                 primary_only = FALSE, ignore_duplicates = FALSE,
24 22
                 min_depth = 0L, variant_strand = 0L,
25 23
                 ignore_query_Ns = FALSE,
26
-                indels = TRUE)
24
+                indels = FALSE)
27 25
 }
28 26
 \arguments{
29 27
   \item{genome}{A \code{GmapGenome} object, or something coercible to one.}
... ...
@@ -33,11 +31,6 @@
33 31
   }
34 32
   \item{desired_read_group}{The name of the read group to which to limit
35 33
     the tallying; if not NULL, must be a single, non-NA string.}
36
-  \item{read_pos_breaks}{The breaks, like those passed to \code{\link{cut}}
37
-    for aggregating the per-read position counts. If \code{NULL}, no per-cycle
38
-    counts are returned.}
39
-  \item{high_base_quality}{The minimum mapping quality for a
40
-    read to be counted as high quality.}
41 34
   \item{minimum_mapq}{Minimum mapping quality for a read to be counted
42 35
     at all.}
43 36
   \item{concordant_only}{Consider only what gnsap
Browse code

update to latest gstruct; brings faster bam_tally (for high coverage regions) and read-group filtering support in bam_tally

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@80611 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 20/09/2013 02:05:42
Showing 1 changed files
... ...
@@ -14,14 +14,16 @@
14 14
 }
15 15
 
16 16
 \usage{
17
-  BamTallyParam(genome, which = RangesList(), read_pos_breaks = NULL,
17
+  BamTallyParam(genome, which = RangesList(),
18
+                desired_read_group = NULL,
19
+                read_pos_breaks = NULL,
18 20
                 high_base_quality = 0L,
19 21
                 minimum_mapq = 0L,
20 22
                 concordant_only = FALSE, unique_only = FALSE,
21 23
                 primary_only = FALSE, ignore_duplicates = FALSE,
22 24
                 min_depth = 0L, variant_strand = 0L,
23 25
                 ignore_query_Ns = FALSE,
24
-                indels = FALSE)
26
+                indels = TRUE)
25 27
 }
26 28
 \arguments{
27 29
   \item{genome}{A \code{GmapGenome} object, or something coercible to one.}
... ...
@@ -29,6 +31,8 @@
29 31
     one that limits the tally to that range or set of ranges. By
30 32
     default, the entire genome is processed.
31 33
   }
34
+  \item{desired_read_group}{The name of the read group to which to limit
35
+    the tallying; if not NULL, must be a single, non-NA string.}
32 36
   \item{read_pos_breaks}{The breaks, like those passed to \code{\link{cut}}
33 37
     for aggregating the per-read position counts. If \code{NULL}, no per-cycle
34 38
     counts are returned.}
... ...
@@ -54,7 +58,12 @@
54 58
     is a good way to save resources.}
55 59
   \item{ignore_query_Ns}{Whether to ignore the N base pairs when
56 60
     counting. Can save a lot of resources when processing low quality data.}
57
-  \item{indels}{Whether to return indel counts; not supported yet.}
61
+  \item{indels}{Whether to return indel counts. The \code{ref} and
62
+    \code{alt} columns in the returned \code{VRanges} conform to VCF
63
+    conventions; i.e., the first base upstream is included. The range
64
+    always spans the sequence in \code{ref}; so e.g. a deletion extends
65
+    one nt upstream of the actual deleted sequence.
66
+  }
58 67
 }
59 68
 
60 69
 \seealso{
Browse code

Refactor bam_tally, so that bam_tally returns a TallyIIT object, which is then summarized via summarizeVariants; this allows computing tallies once and summarizing them in different ways (like maybe get the coverage). The summarizeVariants function yields a VRanges.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@78427 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 14/07/2013 00:43:43
Showing 1 changed files
... ...
@@ -14,7 +14,7 @@
14 14
 }
15 15
 
16 16
 \usage{
17
-  BamTallyParam(genome, which = RangesList(), cycle_breaks = NULL,
17
+  BamTallyParam(genome, which = RangesList(), read_pos_breaks = NULL,
18 18
                 high_base_quality = 0L,
19 19
                 minimum_mapq = 0L,
20 20
                 concordant_only = FALSE, unique_only = FALSE,
... ...
@@ -29,8 +29,8 @@
29 29
     one that limits the tally to that range or set of ranges. By
30 30
     default, the entire genome is processed.
31 31
   }
32
-  \item{cycle_breaks}{The breaks, like those passed to \code{\link{cut}}
33
-    for aggregating the per-cycle counts. If \code{NULL}, no per-cycle
32
+  \item{read_pos_breaks}{The breaks, like those passed to \code{\link{cut}}
33
+    for aggregating the per-read position counts. If \code{NULL}, no per-cycle
34 34
     counts are returned.}
35 35
   \item{high_base_quality}{The minimum mapping quality for a
36 36
     read to be counted as high quality.}
Browse code

Add ignore_duplicate argument to BamTallyParam

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@74874 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 26/03/2013 21:40:15
Showing 1 changed files
... ...
@@ -18,7 +18,7 @@
18 18
                 high_base_quality = 0L,
19 19
                 minimum_mapq = 0L,
20 20
                 concordant_only = FALSE, unique_only = FALSE,
21
-                primary_only = FALSE,
21
+                primary_only = FALSE, ignore_duplicates = FALSE,
22 22
                 min_depth = 0L, variant_strand = 0L,
23 23
                 ignore_query_Ns = FALSE,
24 24
                 indels = FALSE)
... ...
@@ -41,6 +41,9 @@
41 41
   }
42 42
   \item{unique_only}{Consider only the uniquly mapped reads.}
43 43
   \item{primary_only}{Consider only primary pairs.}
44
+  \item{ignore_duplicates}{Whether to ignore the reads flagged as
45
+    PCR/optical duplicates.
46
+  }
44 47
   \item{min_depth}{The minimum number of reads overlapping a position for
45 48
     it to be counted.}
46 49
   \item{variant_strand}{The number of strands on which a variant must be
Browse code

Make 'genome' a BamTallyParam slot, instead of a bam_tally argument. This is consistent with GsnapParam. Also, rename 'high_quality_cutoff' to 'high_base_quality'.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@68551 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 17/08/2012 20:19:50
Showing 1 changed files
... ...
@@ -14,8 +14,8 @@
14 14
 }
15 15
 
16 16
 \usage{
17
-  BamTallyParam(which = RangesList(), cycle_breaks = NULL,
18
-                high_quality_cutoff = 0L,
17
+  BamTallyParam(genome, which = RangesList(), cycle_breaks = NULL,
18
+                high_base_quality = 0L,
19 19
                 minimum_mapq = 0L,
20 20
                 concordant_only = FALSE, unique_only = FALSE,
21 21
                 primary_only = FALSE,
... ...
@@ -24,6 +24,7 @@
24 24
                 indels = FALSE)
25 25
 }
26 26
 \arguments{
27
+  \item{genome}{A \code{GmapGenome} object, or something coercible to one.}
27 28
   \item{which}{A \code{RangesList} or something coercible to
28 29
     one that limits the tally to that range or set of ranges. By
29 30
     default, the entire genome is processed.
... ...
@@ -31,7 +32,7 @@
31 32
   \item{cycle_breaks}{The breaks, like those passed to \code{\link{cut}}
32 33
     for aggregating the per-cycle counts. If \code{NULL}, no per-cycle
33 34
     counts are returned.}
34
-  \item{high_quality_cutoff}{The minimum mapping quality for a
35
+  \item{high_base_quality}{The minimum mapping quality for a
35 36
     read to be counted as high quality.}
36 37
   \item{minimum_mapq}{Minimum mapping quality for a read to be counted
37 38
     at all.}
Browse code

renaming gmapR2 to gmapR: it lives again

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@68172 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 02/08/2012 22:24:24
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,58 @@
1
+\name{BamTallyParam-class}
2
+\Rdversion{1.1}
3
+\docType{class}
4
+\alias{BamTallyParam-class}
5
+\alias{coerce,BamTallyParam,list-method}
6
+\alias{as.list,BamTallyParam-method}
7
+\alias{BamTallyParam}
8
+
9
+\title{Class \code{"BamTallyParam"}}
10
+\description{
11
+  A \code{BamTallyParam} object stores parameters for
12
+  \code{\link{bam_tally}}. The function of the same name serves as its
13
+  constructor. 
14
+}
15
+
16
+\usage{
17
+  BamTallyParam(which = RangesList(), cycle_breaks = NULL,
18
+                high_quality_cutoff = 0L,
19
+                minimum_mapq = 0L,
20
+                concordant_only = FALSE, unique_only = FALSE,
21
+                primary_only = FALSE,
22
+                min_depth = 0L, variant_strand = 0L,
23
+                ignore_query_Ns = FALSE,
24
+                indels = FALSE)
25
+}
26
+\arguments{
27
+  \item{which}{A \code{RangesList} or something coercible to
28
+    one that limits the tally to that range or set of ranges. By
29
+    default, the entire genome is processed.
30
+  }
31
+  \item{cycle_breaks}{The breaks, like those passed to \code{\link{cut}}
32
+    for aggregating the per-cycle counts. If \code{NULL}, no per-cycle
33
+    counts are returned.}
34
+  \item{high_quality_cutoff}{The minimum mapping quality for a
35
+    read to be counted as high quality.}
36
+  \item{minimum_mapq}{Minimum mapping quality for a read to be counted
37
+    at all.}
38
+  \item{concordant_only}{Consider only what gnsap
39
+    calls \dQuote{concordant} alignments.
40
+  }
41
+  \item{unique_only}{Consider only the uniquly mapped reads.}
42
+  \item{primary_only}{Consider only primary pairs.}
43
+  \item{min_depth}{The minimum number of reads overlapping a position for
44
+    it to be counted.}
45
+  \item{variant_strand}{The number of strands on which a variant must be
46
+    seen for it to be counted. This means that a value of 0 will report
47
+    reference alleles in addition to variants. A value of 1 will report
48
+    only positions where a variant was seen on at least one strand, and
49
+    2 requires the variant be seen on both strands. Setting this to 1
50
+    is a good way to save resources.}
51
+  \item{ignore_query_Ns}{Whether to ignore the N base pairs when
52
+    counting. Can save a lot of resources when processing low quality data.}
53
+  \item{indels}{Whether to return indel counts; not supported yet.}
54
+}
55
+
56
+\seealso{
57
+  \code{\link{bam_tally}}
58
+}