git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@82008 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -10,13 +10,13 @@ Description: GSNAP and GMAP are a pair of tools to align short-read |
10 | 10 |
to work with GMAP and GSNAP from within R. In addition, it provides |
11 | 11 |
methods to tally alignment results on a per-nucleotide basis using |
12 | 12 |
the bam_tally tool. |
13 |
-Version: 1.5.0 |
|
13 |
+Version: 1.5.1 |
|
14 | 14 |
Depends: R (>= 2.15.0), methods, GenomicRanges |
15 |
-Imports: IRanges, Rsamtools (>= 1.7.4), rtracklayer (>= 1.17.15), GenomicRanges, |
|
16 |
- GenomicFeatures, Biostrings, VariantAnnotation (>= 1.7.34), tools, |
|
17 |
- Biobase, BSgenome, methods |
|
15 |
+Imports: IRanges, Rsamtools (>= 1.7.4), rtracklayer (>= 1.17.15), |
|
16 |
+ GenomicFeatures, Biostrings, VariantAnnotation (>= 1.9.4), tools, |
|
17 |
+ Biobase, BSgenome |
|
18 | 18 |
Suggests: RUnit, BSgenome.Dmelanogaster.UCSC.dm3, |
19 |
- BSgenome.Scerevisiae.UCSC.sacCer3, VariantAnnotation, |
|
19 |
+ BSgenome.Scerevisiae.UCSC.sacCer3, |
|
20 | 20 |
org.Hs.eg.db, TxDb.Hsapiens.UCSC.hg19.knownGene, |
21 | 21 |
BSgenome.Hsapiens.UCSC.hg19, LungCancerLines |
22 | 22 |
License: Artistic-2.0 |
... | ... |
@@ -1,3 +1,44 @@ |
1 |
+CHANGES IN VERSION 1.4.0 |
|
2 |
+----------------------- |
|
3 |
+ |
|
4 |
+NEW FEATURES |
|
5 |
+ |
|
6 |
+ o Add desired_read_group to BamTallyParam; will limit tallies to |
|
7 |
+ that specific read group (useful for multi-amplicon sequencing, |
|
8 |
+ like Fluidigm) |
|
9 |
+ |
|
10 |
+ o Add keep_ref_rows argument to variantSummary() for keeping rows |
|
11 |
+ for positions where no alt is detected (the rows where ref == alt). |
|
12 |
+ |
|
13 |
+ o gsnap() will now output a GsnapOutputList when there are |
|
14 |
+ multiple input files |
|
15 |
+ |
|
16 |
+ o Support 'terminal_threshold' and 'gmap_mode' parameters in |
|
17 |
+ GsnapParam, and use different defaults for DNA vs. RNA. This |
|
18 |
+ means a big improvement in alignment quality for DNA. |
|
19 |
+ |
|
20 |
+ o GmapGenome now accepts a direct path to the genome as its first argument |
|
21 |
+ |
|
22 |
+USER-VISIBLE CHANGES |
|
23 |
+ |
|
24 |
+ o Renamed summarizeVariants to variantSummary |
|
25 |
+ |
|
26 |
+ o The 'which' in GsnapParam is now a GenomicRanges instead of a RangesList |
|
27 |
+ |
|
28 |
+ o Refactor bam_tally, so that bam_tally returns a TallyIIT object, |
|
29 |
+ which is then summarized via summarizeVariants; this allows computing |
|
30 |
+ tallies once and summarizing them in different ways (like maybe get |
|
31 |
+ the coverage). The summarizeVariants function yields a VRanges. |
|
32 |
+ |
|
33 |
+BUG FIXES |
|
34 |
+ |
|
35 |
+ o fix minimum quality cutoff check to >=, instead of > |
|
36 |
+ |
|
37 |
+ o fix asBam,GsnapOutput for when unique_only is TRUE |
|
38 |
+ |
|
39 |
+ o package created by makeGmapGenomePackage now have a GmapGenome with |
|
40 |
+ the correct name |
|
41 |
+ |
|
1 | 42 |
CHANGES IN VERSION 1.2.0 |
2 | 43 |
----------------------- |
3 | 44 |
|
... | ... |
@@ -82,6 +82,7 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
82 | 82 |
"count.neg", "count.neg.ref", |
83 | 83 |
"read.pos.mean", "read.pos.mean.ref", |
84 | 84 |
"read.pos.var", "read.pos.var.ref") |
85 |
+ break_names <- character() |
|
85 | 86 |
if (length(read_pos_breaks) > 0L) { |
86 | 87 |
read_pos_breaks <- as.integer(read_pos_breaks) |
87 | 88 |
break_names <- paste("readPosCount", head(read_pos_breaks, -1), |
... | ... |
@@ -101,6 +102,8 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
101 | 102 |
"high.quality.ref", "high.quality.total")) |
102 | 103 |
genome <- genome(x) |
103 | 104 |
indel <- nchar(tally$ref) == 0L | nchar(tally$alt) == 0L |
105 |
+ metacols <- DataFrame(tally[meta_names]) |
|
106 |
+ mcols(metacols) <- variantSummaryColumnDescriptions(read_pos_breaks) |
|
104 | 107 |
gr <- with(tally, |
105 | 108 |
VRanges(seqnames, |
106 | 109 |
IRanges(pos, |
... | ... |
@@ -109,8 +112,8 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
109 | 112 |
ifelse(indel, raw.count.total, high.quality.total), |
110 | 113 |
ifelse(indel, raw.count.ref, high.quality.ref), |
111 | 114 |
ifelse(indel, raw.count, high.quality), |
112 |
- DataFrame(tally[meta_names]), |
|
113 | 115 |
seqlengths = seqlengths(genome))) |
116 |
+ mcols(gr) <- metacols |
|
114 | 117 |
seqinfo(gr) <- seqinfo(genome) |
115 | 118 |
gr <- normalizeIndelAlleles(gr, genome) |
116 | 119 |
gr |
... | ... |
@@ -197,3 +200,33 @@ normArgTRUEorFALSE <- function(x) { |
197 | 200 |
normArgSingleInteger(blocksize), |
198 | 201 |
normArgTRUEorFALSE(verbosep)) |
199 | 202 |
} |
203 |
+ |
|
204 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
205 |
+### Column metadata |
|
206 |
+### |
|
207 |
+ |
|
208 |
+variantSummaryColumnDescriptions <- function(read_pos_breaks) { |
|
209 |
+ desc <- c( |
|
210 |
+ n.read.pos = "Number of unique read positions for the ALT", |
|
211 |
+ n.read.pos.ref = "Number of unique read positions for the REF", |
|
212 |
+ raw.count = "Raw ALT count", |
|
213 |
+ raw.count.ref = "Raw REF count", |
|
214 |
+ raw.count.total = "Raw total count", |
|
215 |
+ mean.quality = "Average ALT base quality", |
|
216 |
+ mean.quality.ref = "Average REF base quality", |
|
217 |
+ count.pos = "Raw positive strand ALT count", |
|
218 |
+ count.pos.ref = "Raw positive strand REF count", |
|
219 |
+ count.neg = "Raw negative strand ALT count", |
|
220 |
+ count.neg.ref = "Raw negative strand REF count", |
|
221 |
+ read.pos.mean = "Average read position for the ALT", |
|
222 |
+ read.pos.mean.ref = "Average read position for the ALT", |
|
223 |
+ read.pos.var = "Variance in read position for the ALT", |
|
224 |
+ read.pos.var.ref = "Variance in read position for the REF") |
|
225 |
+ if (length(read_pos_breaks) > 0L) { |
|
226 |
+ break_desc <- paste0("Raw ALT count in read position range [", |
|
227 |
+ head(read_pos_breaks, -1), ",", |
|
228 |
+ tail(read_pos_breaks, -1), ")") |
|
229 |
+ desc <- c(desc, break_desc) |
|
230 |
+ } |
|
231 |
+ DataFrame(Description = desc) |
|
232 |
+} |