 ... ... @@ -19,9 +19,9 @@ 19 19  minimum_mapq = 0L, 20 20  concordant_only = FALSE, unique_only = FALSE, 21 21  primary_only = FALSE, ignore_duplicates = FALSE, 22 - min_depth = 0L, variant_strand = 0L, 22 + min_depth = 0L, variant_strand = 0L, variant_pct = 0, 23 23  ignore_query_Ns = FALSE, 24 - indels = FALSE, include_soft_clips = 0L, 24 + indels = FALSE, min_softclip = 0L, max_softclip = 0L, 25 25  exon_iit = NULL, IIT_BPPARAM = NULL, 26 26  xs = FALSE, read_pos = FALSE, 27 27  min_base_quality = 0L, noncovered = FALSE, nm = FALSE) ... ... @@ -52,6 +52,8 @@ 52 52  only positions where a variant was seen on at least one strand, and 53 53  2 requires the variant be seen on both strands. Setting this to 1 54 54  is a good way to save resources.} 55 + \item{variant_pct}{The minimum alternate allele fraction 56 + for a variant to be reported for a strand.}  55 57  \item{ignore_query_Ns}{Whether to ignore the N base pairs when 56 58  counting. Can save a lot of resources when processing low quality data.} 57 59  \item{indels}{Whether to return indel counts. The \code{ref} and ... ... @@ -60,7 +62,8 @@ 60 62  always spans the sequence in \code{ref}; so e.g. a deletion extends 61 63  one nt upstream of the actual deleted sequence. 62 64  } 63 - \item{include_soft_clips}{Maximum length of soft clips that are 65 + \item{min_softclip, max_softclip}{ 66 + Minimum and maximum length of soft clips that are 64 67  considered for counting. Soft-clipping is often useful (for GSNAP at 65 68  least) during alignment, and it should be preserved in the 66 69  output. However, soft clipping can preferentially occur in regions

 ... ... @@ -28,7 +28,7 @@ 28 28  } 29 29  \arguments{ 30 30  \item{genome}{A \code{GmapGenome} object, or something coercible to one.} 31 - \item{which}{A \code{RangesList} or something coercible to 31 + \item{which}{A \code{IntegerRangesList} or something coercible to 32 32  one that limits the tally to that range or set of ranges. By 33 33  default, the entire genome is processed. 34 34  }

 ... ... @@ -22,7 +22,9 @@ 22 22  min_depth = 0L, variant_strand = 0L, 23 23  ignore_query_Ns = FALSE, 24 24  indels = FALSE, include_soft_clips = 0L, 25 - exon_iit = NULL, IIT_BPPARAM = NULL) 25 + exon_iit = NULL, IIT_BPPARAM = NULL, 26 + xs = FALSE, read_pos = FALSE, 27 + min_base_quality = 0L, noncovered = FALSE, nm = FALSE) 26 28  } 27 29  \arguments{ 28 30  \item{genome}{A \code{GmapGenome} object, or something coercible to one.} ... ... @@ -74,6 +76,20 @@ 74 76  generating the iit file from an R object. Ignored if \code{exon_iit} 75 77  is a character vector or \code{NULL} 76 78  } 79 + \item{xs}{Whether to tabulate reads by XS tag, the aligner's best 80 + guess about the strand of transcription. 81 + } 82 + \item{read_pos}{Whether to tabulate by read position. 83 + } 84 + \item{min_base_quality}{Minimum base quality cutoff. Calls of lower 85 + quality are not counted, except in the total raw depth. 86 + } 87 + \item{noncovered}{ 88 + Whether to report zero tallies, where there is no coverage. 89 + } 90 + \item{nm}{ 91 + Whether to tally by NM tag, the number of mismatches for a read. 92 + } 77 93  } 78 94  \seealso{ 79 95  \code{\link{bam_tally}}

 ... ... @@ -22,7 +22,7 @@ 22 22  min_depth = 0L, variant_strand = 0L, 23 23  ignore_query_Ns = FALSE, 24 24  indels = FALSE, include_soft_clips = 0L, 25 - cds_iit = NULL, IIT_BPPARAM = NULL) 25 + exon_iit = NULL, IIT_BPPARAM = NULL) 26 26  } 27 27  \arguments{ 28 28  \item{genome}{A \code{GmapGenome} object, or something coercible to one.}

 ... ... @@ -22,7 +22,7 @@ 22 22  min_depth = 0L, variant_strand = 0L, 23 23  ignore_query_Ns = FALSE, 24 24  indels = FALSE, include_soft_clips = 0L, 25 - count_xs = FALSE, noncovered = FALSE) 25 + cds_iit = NULL, IIT_BPPARAM = NULL) 26 26  } 27 27  \arguments{ 28 28  \item{genome}{A \code{GmapGenome} object, or something coercible to one.} ... ... @@ -65,15 +65,16 @@ 65 65  of discordance with the reference, and if those clipped regions are 66 66  ignored during counting, the allele fraction is misestimated. 67 67  } 68 - \item{count_xs}{Whether to tabulate the +/- values of the XS tag, 69 - often output by RNA-seq aligners to indicate the strand of 70 - transcription. 71 - } 72 - \item{noncovered}{Whether to output rows for positions without any 73 - coverage. 68 + \item{exon_iit}{An object which indicates the exons to be used for 69 + tallying codons (a character value indicating an existing .iit file, a 70 + \code{GRangesList} of exons by gene or a \code{TxDb} object from which 71 + to make such a \code{GRangesList}) or \code{NULL} indicating no 72 + codon-level tallying should be done.} 73 + \item{IIT_BPPARAM}{A \code{BiocParallelParam} object to use when 74 + generating the iit file from an R object. Ignored if \code{exon_iit} 75 + is a character vector or \code{NULL} 74 76  } 75 77  } 76 - 77 78  \seealso{ 78 79  \code{\link{bam_tally}} 79 80  }

 ... ... @@ -21,7 +21,8 @@ 21 21  primary_only = FALSE, ignore_duplicates = FALSE, 22 22  min_depth = 0L, variant_strand = 0L, 23 23  ignore_query_Ns = FALSE, 24 - indels = FALSE, include_soft_clips = 0L) 24 + indels = FALSE, include_soft_clips = 0L, 25 + count_xs = FALSE, noncovered = FALSE) 25 26  } 26 27  \arguments{ 27 28  \item{genome}{A \code{GmapGenome} object, or something coercible to one.} ... ... @@ -64,6 +65,13 @@ 64 65  of discordance with the reference, and if those clipped regions are 65 66  ignored during counting, the allele fraction is misestimated. 66 67  } 68 + \item{count_xs}{Whether to tabulate the +/- values of the XS tag, 69 + often output by RNA-seq aligners to indicate the strand of 70 + transcription. 71 + } 72 + \item{noncovered}{Whether to output rows for positions without any 73 + coverage. 74 + } 67 75  } 68 76   69 77  \seealso{

 ... ... @@ -21,7 +21,7 @@ 21 21  primary_only = FALSE, ignore_duplicates = FALSE, 22 22  min_depth = 0L, variant_strand = 0L, 23 23  ignore_query_Ns = FALSE, 24 - indels = FALSE) 24 + indels = FALSE, include_soft_clips = 0L) 25 25  } 26 26  \arguments{ 27 27  \item{genome}{A \code{GmapGenome} object, or something coercible to one.} ... ... @@ -57,6 +57,13 @@ 57 57  always spans the sequence in \code{ref}; so e.g. a deletion extends 58 58  one nt upstream of the actual deleted sequence. 59 59  } 60 + \item{include_soft_clips}{Maximum length of soft clips that are 61 + considered for counting. Soft-clipping is often useful (for GSNAP at 62 + least) during alignment, and it should be preserved in the 63 + output. However, soft clipping can preferentially occur in regions 64 + of discordance with the reference, and if those clipped regions are 65 + ignored during counting, the allele fraction is misestimated. 66 + } 60 67  } 61 68   62 69  \seealso{

 ... ... @@ -14,16 +14,14 @@ 14 14  } 15 15   16 16  \usage{ 17 - BamTallyParam(genome, which = RangesList(), 17 + BamTallyParam(genome, which = GRanges(), 18 18  desired_read_group = NULL, 19 - read_pos_breaks = NULL, 20 - high_base_quality = 0L, 21 19  minimum_mapq = 0L, 22 20  concordant_only = FALSE, unique_only = FALSE, 23 21  primary_only = FALSE, ignore_duplicates = FALSE, 24 22  min_depth = 0L, variant_strand = 0L, 25 23  ignore_query_Ns = FALSE, 26 - indels = TRUE) 24 + indels = FALSE) 27 25  } 28 26  \arguments{ 29 27  \item{genome}{A \code{GmapGenome} object, or something coercible to one.} ... ... @@ -33,11 +31,6 @@ 33 31  } 34 32  \item{desired_read_group}{The name of the read group to which to limit 35 33  the tallying; if not NULL, must be a single, non-NA string.} 36 - \item{read_pos_breaks}{The breaks, like those passed to \code{\link{cut}} 37 - for aggregating the per-read position counts. If \code{NULL}, no per-cycle 38 - counts are returned.} 39 - \item{high_base_quality}{The minimum mapping quality for a 40 - read to be counted as high quality.} 41 34  \item{minimum_mapq}{Minimum mapping quality for a read to be counted 42 35  at all.} 43 36  \item{concordant_only}{Consider only what gnsap

 ... ... @@ -14,14 +14,16 @@ 14 14  } 15 15   16 16  \usage{ 17 - BamTallyParam(genome, which = RangesList(), read_pos_breaks = NULL, 17 + BamTallyParam(genome, which = RangesList(), 18 + desired_read_group = NULL, 19 + read_pos_breaks = NULL, 18 20  high_base_quality = 0L, 19 21  minimum_mapq = 0L, 20 22  concordant_only = FALSE, unique_only = FALSE, 21 23  primary_only = FALSE, ignore_duplicates = FALSE, 22 24  min_depth = 0L, variant_strand = 0L, 23 25  ignore_query_Ns = FALSE, 24 - indels = FALSE) 26 + indels = TRUE) 25 27  } 26 28  \arguments{ 27 29  \item{genome}{A \code{GmapGenome} object, or something coercible to one.} ... ... @@ -29,6 +31,8 @@ 29 31  one that limits the tally to that range or set of ranges. By 30 32  default, the entire genome is processed. 31 33  } 34 + \item{desired_read_group}{The name of the read group to which to limit 35 + the tallying; if not NULL, must be a single, non-NA string.} 32 36  \item{read_pos_breaks}{The breaks, like those passed to \code{\link{cut}} 33 37  for aggregating the per-read position counts. If \code{NULL}, no per-cycle 34 38  counts are returned.} ... ... @@ -54,7 +58,12 @@ 54 58  is a good way to save resources.} 55 59  \item{ignore_query_Ns}{Whether to ignore the N base pairs when 56 60  counting. Can save a lot of resources when processing low quality data.} 57 - \item{indels}{Whether to return indel counts; not supported yet.} 61 + \item{indels}{Whether to return indel counts. The \code{ref} and 62 + \code{alt} columns in the returned \code{VRanges} conform to VCF 63 + conventions; i.e., the first base upstream is included. The range 64 + always spans the sequence in \code{ref}; so e.g. a deletion extends 65 + one nt upstream of the actual deleted sequence. 66 + } 58 67  } 59 68   60 69  \seealso{

 ... ... @@ -14,7 +14,7 @@ 14 14  } 15 15   16 16  \usage{ 17 - BamTallyParam(genome, which = RangesList(), cycle_breaks = NULL, 17 + BamTallyParam(genome, which = RangesList(), read_pos_breaks = NULL, 18 18  high_base_quality = 0L, 19 19  minimum_mapq = 0L, 20 20  concordant_only = FALSE, unique_only = FALSE, ... ... @@ -29,8 +29,8 @@ 29 29  one that limits the tally to that range or set of ranges. By 30 30  default, the entire genome is processed. 31 31  } 32 - \item{cycle_breaks}{The breaks, like those passed to \code{\link{cut}} 33 - for aggregating the per-cycle counts. If \code{NULL}, no per-cycle 32 + \item{read_pos_breaks}{The breaks, like those passed to \code{\link{cut}} 33 + for aggregating the per-read position counts. If \code{NULL}, no per-cycle 34 34  counts are returned.} 35 35  \item{high_base_quality}{The minimum mapping quality for a 36 36  read to be counted as high quality.}

 ... ... @@ -18,7 +18,7 @@ 18 18  high_base_quality = 0L, 19 19  minimum_mapq = 0L, 20 20  concordant_only = FALSE, unique_only = FALSE, 21 - primary_only = FALSE, 21 + primary_only = FALSE, ignore_duplicates = FALSE, 22 22  min_depth = 0L, variant_strand = 0L, 23 23  ignore_query_Ns = FALSE, 24 24  indels = FALSE) ... ... @@ -41,6 +41,9 @@ 41 41  } 42 42  \item{unique_only}{Consider only the uniquly mapped reads.} 43 43  \item{primary_only}{Consider only primary pairs.} 44 + \item{ignore_duplicates}{Whether to ignore the reads flagged as 45 + PCR/optical duplicates. 46 + } 44 47  \item{min_depth}{The minimum number of reads overlapping a position for 45 48  it to be counted.} 46 49  \item{variant_strand}{The number of strands on which a variant must be

 ... ... @@ -14,8 +14,8 @@ 14 14  } 15 15   16 16  \usage{ 17 - BamTallyParam(which = RangesList(), cycle_breaks = NULL, 18 - high_quality_cutoff = 0L, 17 + BamTallyParam(genome, which = RangesList(), cycle_breaks = NULL, 18 + high_base_quality = 0L, 19 19  minimum_mapq = 0L, 20 20  concordant_only = FALSE, unique_only = FALSE, 21 21  primary_only = FALSE, ... ... @@ -24,6 +24,7 @@ 24 24  indels = FALSE) 25 25  } 26 26  \arguments{ 27 + \item{genome}{A \code{GmapGenome} object, or something coercible to one.} 27 28  \item{which}{A \code{RangesList} or something coercible to 28 29  one that limits the tally to that range or set of ranges. By 29 30  default, the entire genome is processed. ... ... @@ -31,7 +32,7 @@ 31 32  \item{cycle_breaks}{The breaks, like those passed to \code{\link{cut}} 32 33  for aggregating the per-cycle counts. If \code{NULL}, no per-cycle 33 34  counts are returned.} 34 - \item{high_quality_cutoff}{The minimum mapping quality for a 35 + \item{high_base_quality}{The minimum mapping quality for a 35 36  read to be counted as high quality.} 36 37  \item{minimum_mapq}{Minimum mapping quality for a read to be counted 37 38  at all.}
 1 1 new file mode 100644 ... ... @@ -0,0 +1,58 @@ 1 +\name{BamTallyParam-class} 2 +\Rdversion{1.1} 3 +\docType{class} 4 +\alias{BamTallyParam-class} 5 +\alias{coerce,BamTallyParam,list-method} 6 +\alias{as.list,BamTallyParam-method} 7 +\alias{BamTallyParam} 8 + 9 +\title{Class \code{"BamTallyParam"}} 10 +\description{ 11 + A \code{BamTallyParam} object stores parameters for 12 + \code{\link{bam_tally}}. The function of the same name serves as its 13 + constructor.  14 +} 15 + 16 +\usage{ 17 + BamTallyParam(which = RangesList(), cycle_breaks = NULL, 18 + high_quality_cutoff = 0L, 19 + minimum_mapq = 0L, 20 + concordant_only = FALSE, unique_only = FALSE, 21 + primary_only = FALSE, 22 + min_depth = 0L, variant_strand = 0L, 23 + ignore_query_Ns = FALSE, 24 + indels = FALSE) 25 +} 26 +\arguments{ 27 + \item{which}{A \code{RangesList} or something coercible to 28 + one that limits the tally to that range or set of ranges. By 29 + default, the entire genome is processed. 30 + } 31 + \item{cycle_breaks}{The breaks, like those passed to \code{\link{cut}} 32 + for aggregating the per-cycle counts. If \code{NULL}, no per-cycle 33 + counts are returned.} 34 + \item{high_quality_cutoff}{The minimum mapping quality for a 35 + read to be counted as high quality.} 36 + \item{minimum_mapq}{Minimum mapping quality for a read to be counted 37 + at all.} 38 + \item{concordant_only}{Consider only what gnsap 39 + calls \dQuote{concordant} alignments. 40 + } 41 + \item{unique_only}{Consider only the uniquly mapped reads.} 42 + \item{primary_only}{Consider only primary pairs.} 43 + \item{min_depth}{The minimum number of reads overlapping a position for 44 + it to be counted.} 45 + \item{variant_strand}{The number of strands on which a variant must be 46 + seen for it to be counted. This means that a value of 0 will report 47 + reference alleles in addition to variants. A value of 1 will report 48 + only positions where a variant was seen on at least one strand, and 49 + 2 requires the variant be seen on both strands. Setting this to 1 50 + is a good way to save resources.} 51 + \item{ignore_query_Ns}{Whether to ignore the N base pairs when 52 + counting. Can save a lot of resources when processing low quality data.} 53 + \item{indels}{Whether to return indel counts; not supported yet.} 54 +} 55 + 56 +\seealso{ 57 + \code{\link{bam_tally}} 58 +}