... | ... |
@@ -205,6 +205,13 @@ normArgSingleInteger <- function(x) { |
205 | 205 |
stop("'", name, "' should be a single, non-NA integer") |
206 | 206 |
x |
207 | 207 |
} |
208 |
+normArgSingleNumber <- function(x) { |
|
209 |
+ name <- deparse(substitute(x)) |
|
210 |
+ x <- as.numeric(x) |
|
211 |
+ if (!isSingleNumber(x)) |
|
212 |
+ stop("'", name, "' should be a single, non-NA number") |
|
213 |
+ x |
|
214 |
+} |
|
208 | 215 |
normArgTRUEorFALSE <- function(x) { |
209 | 216 |
name <- deparse(substitute(x)) |
210 | 217 |
if (!isTRUEorFALSE(x)) |
... | ... |
@@ -227,11 +234,11 @@ normArgSingleCharacterOrNULL <- function(x) { |
227 | 234 |
maximum_nhits = 1000000L, |
228 | 235 |
concordant_only = FALSE, unique_only = FALSE, |
229 | 236 |
primary_only = FALSE, ignore_duplicates = FALSE, |
230 |
- min_depth = 0L, variant_strand = 0L, |
|
237 |
+ min_depth = 0L, variant_strand = 0L, variant_pct = 0, |
|
231 | 238 |
ignore_query_Ns = FALSE, |
232 | 239 |
indels = FALSE, |
233 | 240 |
blocksize = 1000L, verbosep = FALSE, |
234 |
- include_soft_clips = 0L, |
|
241 |
+ min_softclip = 0L, max_softclip = 0L, |
|
235 | 242 |
exon_iit = NULL, xs = FALSE, read_pos = FALSE, |
236 | 243 |
min_base_quality = 0L, noncovered = FALSE, nm = FALSE) |
237 | 244 |
{ |
... | ... |
@@ -258,11 +265,13 @@ normArgSingleCharacterOrNULL <- function(x) { |
258 | 265 |
normArgTRUEorFALSE(ignore_duplicates), |
259 | 266 |
normArgSingleInteger(min_depth), |
260 | 267 |
normArgSingleInteger(variant_strand), |
268 |
+ normArgSingleNumber(variant_pct), |
|
261 | 269 |
normArgTRUEorFALSE(ignore_query_Ns), |
262 | 270 |
normArgTRUEorFALSE(indels), |
263 | 271 |
normArgSingleInteger(blocksize), |
264 | 272 |
normArgTRUEorFALSE(verbosep), |
265 |
- normArgSingleInteger(include_soft_clips), |
|
273 |
+ normArgSingleInteger(min_softclip), |
|
274 |
+ normArgSingleInteger(max_softclip), |
|
266 | 275 |
normArgSingleCharacterOrNULL(exon_iit), |
267 | 276 |
normArgTRUEorFALSE(xs), |
268 | 277 |
normArgTRUEorFALSE(read_pos), |
... | ... |
@@ -152,7 +152,8 @@ variantSummary <- function(x, read_pos_breaks = NULL, |
152 | 152 |
gr <- with(tally, |
153 | 153 |
VRanges(seqnames, |
154 | 154 |
IRanges(pos, |
155 |
- width = ifelse(nchar(alt) == 0, nchar(ref), 1L)), |
|
155 |
+ width = ifelse(nchar(alt) == 0 & !is.na(alt), |
|
156 |
+ nchar(ref), 1L)), |
|
156 | 157 |
ref, alt, |
157 | 158 |
count.total, count.ref, count, |
158 | 159 |
seqlengths = seqlengths(genome))) |
... | ... |
@@ -173,7 +174,7 @@ checkTallyConsistency <- function(x) { |
173 | 174 |
} |
174 | 175 |
|
175 | 176 |
normalizeIndelAlleles <- function(x, genome) { |
176 |
- is.indel <- nchar(ref(x)) == 0L | nchar(alt(x)) == 0L |
|
177 |
+ is.indel <- nchar(ref(x)) == 0L | (nchar(alt(x)) == 0L & !is.na(alt(x))) |
|
177 | 178 |
if (any(is.indel)) { |
178 | 179 |
indels <- x[is.indel] |
179 | 180 |
flanks <- flank(indels, 1) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@125153 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -7,13 +7,13 @@ |
7 | 7 |
### Raw tally result |
8 | 8 |
### |
9 | 9 |
|
10 |
-setClass("TallyIIT", representation(ptr = "externalptr", |
|
11 |
- genome = "GmapGenome", |
|
10 |
+setClass("TallyIIT", representation(genome = "GmapGenome", |
|
12 | 11 |
bam = "BamFile", |
13 | 12 |
xs = "logical", |
14 | 13 |
read_pos = "logical", |
15 | 14 |
nm = "logical", |
16 |
- codon = "logical")) |
|
15 |
+ codon = "logical"), |
|
16 |
+ contains = "IIT") |
|
17 | 17 |
|
18 | 18 |
TallyIIT <- function(ptr, genome, bam, xs, read_pos, nm, codon) { |
19 | 19 |
new("TallyIIT", ptr = ptr, genome = genome, bam = bam, xs = xs, |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110891 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -110,7 +110,7 @@ variantSummary <- function(x, read_pos_breaks = NULL, |
110 | 110 |
|
111 | 111 |
tally_names <- c("seqnames", "pos", "ref", "alt", |
112 | 112 |
"n.read.pos", "n.read.pos.ref", |
113 |
- "count", "count.ref", "count.total", |
|
113 |
+ "count", "count.ref", "raw.count.total", "count.total", |
|
114 | 114 |
"count.plus", "count.plus.ref", |
115 | 115 |
"count.minus", "count.minus.ref", |
116 | 116 |
"count.del.plus", "count.del.minus", |
... | ... |
@@ -280,6 +280,7 @@ variantSummaryColumnDescriptions <- |
280 | 280 |
desc <- c( |
281 | 281 |
n.read.pos = "Number of unique read positions for the ALT", |
282 | 282 |
n.read.pos.ref = "Number of unique read positions for the REF", |
283 |
+ raw.count.total = "Total depth, before quality filtering", |
|
283 | 284 |
count.plus = "Positive strand ALT count", |
284 | 285 |
count.plus.ref = "Positive strand REF count", |
285 | 286 |
count.minus = "Negative strand ALT count", |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110596 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -12,11 +12,12 @@ setClass("TallyIIT", representation(ptr = "externalptr", |
12 | 12 |
bam = "BamFile", |
13 | 13 |
xs = "logical", |
14 | 14 |
read_pos = "logical", |
15 |
- nm = "logical")) |
|
15 |
+ nm = "logical", |
|
16 |
+ codon = "logical")) |
|
16 | 17 |
|
17 |
-TallyIIT <- function(ptr, genome, bam, xs, read_pos, nm) { |
|
18 |
+TallyIIT <- function(ptr, genome, bam, xs, read_pos, nm, codon) { |
|
18 | 19 |
new("TallyIIT", ptr = ptr, genome = genome, bam = bam, xs = xs, |
19 |
- read_pos = read_pos, nm = nm) |
|
20 |
+ read_pos = read_pos, nm = nm, codon = codon) |
|
20 | 21 |
} |
21 | 22 |
|
22 | 23 |
setMethod("genome", "TallyIIT", function(x) x@genome) |
... | ... |
@@ -73,7 +74,8 @@ setMethod("bam_tally", "GmapBamReader", |
73 | 74 |
|
74 | 75 |
TallyIIT(do.call(.bam_tally_C, c(list(x), param_list)), genome, |
75 | 76 |
as(x, "BamFile"), xs=param_list$xs, |
76 |
- read_pos=param_list$read_pos, nm=param_list$nm) |
|
77 |
+ read_pos=param_list$read_pos, nm=param_list$nm, |
|
78 |
+ codon=!is.null(param_list$exon_iit)) |
|
77 | 79 |
}) |
78 | 80 |
|
79 | 81 |
variantSummary <- function(x, read_pos_breaks = NULL, |
... | ... |
@@ -111,7 +113,7 @@ variantSummary <- function(x, read_pos_breaks = NULL, |
111 | 113 |
"count", "count.ref", "count.total", |
112 | 114 |
"count.plus", "count.plus.ref", |
113 | 115 |
"count.minus", "count.minus.ref", |
114 |
- "del.count.plus", "del.count.minus", |
|
116 |
+ "count.del.plus", "count.del.minus", |
|
115 | 117 |
"read.pos.mean", "read.pos.mean.ref", |
116 | 118 |
"read.pos.var", "read.pos.var.ref", |
117 | 119 |
"mdfne", "mdfne.ref", "codon.strand", |
... | ... |
@@ -119,8 +121,6 @@ variantSummary <- function(x, read_pos_breaks = NULL, |
119 | 121 |
"count.xs.minus", "count.xs.minus.ref", |
120 | 122 |
"count.high.nm", "count.high.nm.ref") |
121 | 123 |
|
122 |
- tally$codon.strand <- structure(tally$codon.strand, class="factor", |
|
123 |
- levels=strand()) |
|
124 | 124 |
break_names <- character() |
125 | 125 |
if (length(read_pos_breaks) > 0L) { |
126 | 126 |
read_pos_breaks <- as.integer(read_pos_breaks) |
... | ... |
@@ -129,7 +129,10 @@ variantSummary <- function(x, read_pos_breaks = NULL, |
129 | 129 |
tally_names <- c(tally_names, break_names) |
130 | 130 |
} |
131 | 131 |
names(tally) <- tally_names |
132 |
- tally <- Filter(Negate(is.null), tally) |
|
132 |
+ tally$codon.strand <- if (x@codon) { |
|
133 |
+ structure(tally$codon.strand, class="factor", levels=levels(strand())) |
|
134 |
+ } |
|
135 |
+ tally <- Filter(Negate(is.null), tally) |
|
133 | 136 |
|
134 | 137 |
if (!keep_ref_rows) { |
135 | 138 |
variant_rows <- !is.na(tally$alt) |
... | ... |
@@ -143,7 +146,8 @@ variantSummary <- function(x, read_pos_breaks = NULL, |
143 | 146 |
genome <- genome(x) |
144 | 147 |
indel <- nchar(tally$ref) == 0L | nchar(tally$alt) == 0L |
145 | 148 |
metacols <- DataFrame(tally[meta_names]) |
146 |
- mcols(metacols) <- variantSummaryColumnDescriptions(read_pos_breaks) |
|
149 |
+ mcols(metacols) <- variantSummaryColumnDescriptions(read_pos_breaks, x@xs, |
|
150 |
+ high_nm_score, x@codon) |
|
147 | 151 |
|
148 | 152 |
gr <- with(tally, |
149 | 153 |
VRanges(seqnames, |
... | ... |
@@ -270,32 +274,37 @@ normArgSingleCharacterOrNULL <- function(x) { |
270 | 274 |
### Column metadata |
271 | 275 |
### |
272 | 276 |
|
273 |
-variantSummaryColumnDescriptions <- function(read_pos_breaks) { |
|
277 |
+variantSummaryColumnDescriptions <- |
|
278 |
+ function(read_pos_breaks, xs, high_nm, codon) |
|
279 |
+{ |
|
274 | 280 |
desc <- c( |
275 | 281 |
n.read.pos = "Number of unique read positions for the ALT", |
276 | 282 |
n.read.pos.ref = "Number of unique read positions for the REF", |
277 |
- raw.count = "Raw ALT count", |
|
278 |
- raw.count.ref = "Raw REF count", |
|
279 |
- raw.count.total = "Raw total count", |
|
280 |
- mean.quality = "Average ALT base quality", |
|
281 |
- mean.quality.ref = "Average REF base quality", |
|
282 |
- count.plus = "Raw positive strand ALT count", |
|
283 |
- count.plus.ref = "Raw positive strand REF count", |
|
284 |
- count.minus = "Raw negative strand ALT count", |
|
285 |
- count.minus.ref = "Raw negative strand REF count", |
|
283 |
+ count.plus = "Positive strand ALT count", |
|
284 |
+ count.plus.ref = "Positive strand REF count", |
|
285 |
+ count.minus = "Negative strand ALT count", |
|
286 |
+ count.minus.ref = "Negative strand REF count", |
|
287 |
+ count.del.plus = "Plus strand deletion count", |
|
288 |
+ count.del.minus = "Count strand deletion count", |
|
286 | 289 |
read.pos.mean = "Average read position for the ALT", |
287 | 290 |
read.pos.mean.ref = "Average read position for the ALT", |
288 | 291 |
read.pos.var = "Variance in read position for the ALT", |
289 | 292 |
read.pos.var.ref = "Variance in read position for the REF", |
290 | 293 |
mdfne = "Median distance from nearest end of read for the ALT", |
291 | 294 |
mdfne.ref = "Median distance from nearest end of read for the REF", |
292 |
- codon.dir = "Direction of transcription for the codon. 0=positive, 1=negative, 2=NA (not a codon)", |
|
293 |
- del.count.plus = "plus strand deletion count", |
|
294 |
- del.count.minus = "minus strand deletion count", |
|
295 |
- count.xs.plus = "Plus strand XS counts", |
|
296 |
- count.xs.plus.ref = "Plus strand reference XS counts", |
|
297 |
- count.xs.minus = "Minus strand XS counts", |
|
298 |
- count.xs.minus.ref = "Minus strand reference XS counts") |
|
295 |
+ if (codon) { |
|
296 |
+ c(codon.strand = "Strand of transcription for the codon") |
|
297 |
+ }, |
|
298 |
+ if (xs) { |
|
299 |
+ c(count.xs.plus = "Plus strand XS counts", |
|
300 |
+ count.xs.plus.ref = "Plus strand reference XS counts", |
|
301 |
+ count.xs.minus = "Minus strand XS counts", |
|
302 |
+ count.xs.minus.ref = "Minus strand reference XS counts") |
|
303 |
+ }, |
|
304 |
+ count.high.nm = paste("Count of reads with >=", high_nm, |
|
305 |
+ "NM score for the ALT"), |
|
306 |
+ count.high.nm.ref = paste("Count of reads with >=", high_nm, |
|
307 |
+ "NM score for the REF")) |
|
299 | 308 |
if (length(read_pos_breaks) > 0L) { |
300 | 309 |
break_desc <- paste0("Raw ALT count in read position range [", |
301 | 310 |
head(read_pos_breaks, -1), ",", |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110593 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -11,11 +11,12 @@ setClass("TallyIIT", representation(ptr = "externalptr", |
11 | 11 |
genome = "GmapGenome", |
12 | 12 |
bam = "BamFile", |
13 | 13 |
xs = "logical", |
14 |
- read_pos = "logical")) |
|
14 |
+ read_pos = "logical", |
|
15 |
+ nm = "logical")) |
|
15 | 16 |
|
16 |
-TallyIIT <- function(ptr, genome, bam, xs, read_pos) { |
|
17 |
+TallyIIT <- function(ptr, genome, bam, xs, read_pos, nm) { |
|
17 | 18 |
new("TallyIIT", ptr = ptr, genome = genome, bam = bam, xs = xs, |
18 |
- read_pos = read_pos) |
|
19 |
+ read_pos = read_pos, nm = nm) |
|
19 | 20 |
} |
20 | 21 |
|
21 | 22 |
setMethod("genome", "TallyIIT", function(x) x@genome) |
... | ... |
@@ -72,16 +73,22 @@ setMethod("bam_tally", "GmapBamReader", |
72 | 73 |
|
73 | 74 |
TallyIIT(do.call(.bam_tally_C, c(list(x), param_list)), genome, |
74 | 75 |
as(x, "BamFile"), xs=param_list$xs, |
75 |
- read_pos=param_list$read_pos) |
|
76 |
+ read_pos=param_list$read_pos, nm=param_list$nm) |
|
76 | 77 |
}) |
77 | 78 |
|
78 |
-variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
|
79 |
- keep_ref_rows = FALSE, read_length = NA_integer_) |
|
79 |
+variantSummary <- function(x, read_pos_breaks = NULL, |
|
80 |
+ keep_ref_rows = FALSE, read_length = NA_integer_, |
|
81 |
+ high_nm_score = NA_integer_) |
|
80 | 82 |
{ |
81 | 83 |
read_length <- as.integer(read_length) |
82 | 84 |
if (length(read_length) != 1L) { |
83 | 85 |
stop("'read_length' must be a single integer") |
84 | 86 |
} |
87 |
+ high_nm_score <- as.integer(high_nm_score) |
|
88 |
+ stopifnot(length(high_nm_score) == 1L) |
|
89 |
+ if (!is.na(high_nm_score) && !x@nm) { |
|
90 |
+ stop("'high_nm_score is not NA but NM scores were not tallied") |
|
91 |
+ } |
|
85 | 92 |
if (length(read_pos_breaks) > 0L) { |
86 | 93 |
if (!x@read_pos) { |
87 | 94 |
stop("'read_pos_breaks' non-empty, but read positions were not tallied") |
... | ... |
@@ -97,25 +104,23 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
97 | 104 |
|
98 | 105 |
tally <- .Call(R_tally_iit_parse, x@ptr, |
99 | 106 |
read_pos_breaks, |
100 |
- normArgSingleInteger(high_base_quality), |
|
101 |
- NULL, read_length, x@xs) |
|
107 |
+ NULL, read_length, x@xs, high_nm_score) |
|
102 | 108 |
|
103 | 109 |
tally_names <- c("seqnames", "pos", "ref", "alt", |
104 | 110 |
"n.read.pos", "n.read.pos.ref", |
105 |
- "raw.count", "raw.count.ref", |
|
106 |
- "raw.count.total", |
|
107 |
- "high.quality", "high.quality.ref", |
|
108 |
- "high.quality.total", "mean.quality", |
|
109 |
- "mean.quality.ref", |
|
111 |
+ "count", "count.ref", "count.total", |
|
110 | 112 |
"count.plus", "count.plus.ref", |
111 | 113 |
"count.minus", "count.minus.ref", |
112 | 114 |
"del.count.plus", "del.count.minus", |
113 | 115 |
"read.pos.mean", "read.pos.mean.ref", |
114 | 116 |
"read.pos.var", "read.pos.var.ref", |
115 |
- "mdfne", "mdfne.ref", "codon.dir", |
|
117 |
+ "mdfne", "mdfne.ref", "codon.strand", |
|
116 | 118 |
"count.xs.plus", "count.xs.plus.ref", |
117 |
- "count.xs.minus", "count.xs.minus.ref") |
|
118 |
- |
|
119 |
+ "count.xs.minus", "count.xs.minus.ref", |
|
120 |
+ "count.high.nm", "count.high.nm.ref") |
|
121 |
+ |
|
122 |
+ tally$codon.strand <- structure(tally$codon.strand, class="factor", |
|
123 |
+ levels=strand()) |
|
119 | 124 |
break_names <- character() |
120 | 125 |
if (length(read_pos_breaks) > 0L) { |
121 | 126 |
read_pos_breaks <- as.integer(read_pos_breaks) |
... | ... |
@@ -124,6 +129,7 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
124 | 129 |
tally_names <- c(tally_names, break_names) |
125 | 130 |
} |
126 | 131 |
names(tally) <- tally_names |
132 |
+ tally <- Filter(Negate(is.null), tally) |
|
127 | 133 |
|
128 | 134 |
if (!keep_ref_rows) { |
129 | 135 |
variant_rows <- !is.na(tally$alt) |
... | ... |
@@ -131,23 +137,20 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
131 | 137 |
tally <- lapply(tally, `[`, variant_rows) |
132 | 138 |
} |
133 | 139 |
|
134 |
- meta_names <- setdiff(tally_names, |
|
135 |
- c("seqnames", "pos", "ref", "alt", "high.quality", |
|
136 |
- "high.quality.ref", "high.quality.total")) |
|
140 |
+ meta_names <- setdiff(names(tally), |
|
141 |
+ c("seqnames", "pos", "ref", "alt", "count", |
|
142 |
+ "count.ref", "count.total")) |
|
137 | 143 |
genome <- genome(x) |
138 | 144 |
indel <- nchar(tally$ref) == 0L | nchar(tally$alt) == 0L |
139 | 145 |
metacols <- DataFrame(tally[meta_names]) |
140 | 146 |
mcols(metacols) <- variantSummaryColumnDescriptions(read_pos_breaks) |
141 | 147 |
|
142 |
- samecols = tally[["ref"]] != tally[["alt"]] |
|
143 | 148 |
gr <- with(tally, |
144 | 149 |
VRanges(seqnames, |
145 | 150 |
IRanges(pos, |
146 | 151 |
width = ifelse(nchar(alt) == 0, nchar(ref), 1L)), |
147 | 152 |
ref, alt, |
148 |
- ifelse(indel, raw.count.total, high.quality.total), |
|
149 |
- ifelse(indel, raw.count.ref, high.quality.ref), |
|
150 |
- ifelse(indel, raw.count, high.quality), |
|
153 |
+ count.total, count.ref, count, |
|
151 | 154 |
seqlengths = seqlengths(genome))) |
152 | 155 |
mcols(gr) <- metacols |
153 | 156 |
checkTallyConsistency(gr) |
... | ... |
@@ -160,11 +163,8 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
160 | 163 |
|
161 | 164 |
checkTallyConsistency <- function(x) { |
162 | 165 |
with(mcols(x), { |
163 |
- stopifnot(all(raw.count + raw.count.ref <= raw.count.total, na.rm=TRUE)) |
|
164 |
- stopifnot(all(altDepth(x) <= raw.count, na.rm=TRUE)) |
|
165 |
- stopifnot(all(refDepth(x) <= raw.count.ref, na.rm=TRUE)) |
|
166 |
- stopifnot(all(count.plus + count.minus == raw.count, na.rm=TRUE)) |
|
167 |
- stopifnot(all(count.plus.ref + count.minus.ref == raw.count.ref)) |
|
166 |
+ stopifnot(all(count.plus + count.minus == altDepth(x), na.rm=TRUE)) |
|
167 |
+ stopifnot(all(count.plus.ref + count.minus.ref == refDepth(x))) |
|
168 | 168 |
}) |
169 | 169 |
} |
170 | 170 |
|
... | ... |
@@ -228,7 +228,7 @@ normArgSingleCharacterOrNULL <- function(x) { |
228 | 228 |
blocksize = 1000L, verbosep = FALSE, |
229 | 229 |
include_soft_clips = 0L, |
230 | 230 |
exon_iit = NULL, xs = FALSE, read_pos = FALSE, |
231 |
- min_base_quality = 0L, noncovered = FALSE) |
|
231 |
+ min_base_quality = 0L, noncovered = FALSE, nm = FALSE) |
|
232 | 232 |
{ |
233 | 233 |
if (!is(bamreader, "GmapBamReader")) |
234 | 234 |
stop("'bamreader' must be a GmapBamReader") |
... | ... |
@@ -262,7 +262,8 @@ normArgSingleCharacterOrNULL <- function(x) { |
262 | 262 |
normArgTRUEorFALSE(xs), |
263 | 263 |
normArgTRUEorFALSE(read_pos), |
264 | 264 |
normArgSingleInteger(min_base_quality), |
265 |
- normArgTRUEorFALSE(noncovered)) |
|
265 |
+ normArgTRUEorFALSE(noncovered), |
|
266 |
+ normArgTRUEorFALSE(nm)) |
|
266 | 267 |
} |
267 | 268 |
|
268 | 269 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110225 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -82,9 +82,9 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
82 | 82 |
if (length(read_length) != 1L) { |
83 | 83 |
stop("'read_length' must be a single integer") |
84 | 84 |
} |
85 |
- if (!is.null(read_pos_breaks)) { |
|
85 |
+ if (length(read_pos_breaks) > 0L) { |
|
86 | 86 |
if (!x@read_pos) { |
87 |
- stop("'read_pos_breaks' non-NULL, but read positions were not tallied") |
|
87 |
+ stop("'read_pos_breaks' non-empty, but read positions were not tallied") |
|
88 | 88 |
} |
89 | 89 |
read_pos_breaks <- as.integer(read_pos_breaks) |
90 | 90 |
if (any(is.na(read_pos_breaks))) |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110224 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -71,7 +71,8 @@ setMethod("bam_tally", "GmapBamReader", |
71 | 71 |
param_list$genome <- NULL |
72 | 72 |
|
73 | 73 |
TallyIIT(do.call(.bam_tally_C, c(list(x), param_list)), genome, |
74 |
- as(x, "BamFile"), param_list[c("xs", "read_pos")]) |
|
74 |
+ as(x, "BamFile"), xs=param_list$xs, |
|
75 |
+ read_pos=param_list$read_pos) |
|
75 | 76 |
}) |
76 | 77 |
|
77 | 78 |
variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110039 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -9,10 +9,13 @@ |
9 | 9 |
|
10 | 10 |
setClass("TallyIIT", representation(ptr = "externalptr", |
11 | 11 |
genome = "GmapGenome", |
12 |
- bam = "BamFile")) |
|
12 |
+ bam = "BamFile", |
|
13 |
+ xs = "logical", |
|
14 |
+ read_pos = "logical")) |
|
13 | 15 |
|
14 |
-TallyIIT <- function(ptr, genome, bam) { |
|
15 |
- new("TallyIIT", ptr = ptr, genome = genome, bam = bam) |
|
16 |
+TallyIIT <- function(ptr, genome, bam, xs, read_pos) { |
|
17 |
+ new("TallyIIT", ptr = ptr, genome = genome, bam = bam, xs = xs, |
|
18 |
+ read_pos = read_pos) |
|
16 | 19 |
} |
17 | 20 |
|
18 | 21 |
setMethod("genome", "TallyIIT", function(x) x@genome) |
... | ... |
@@ -68,7 +71,7 @@ setMethod("bam_tally", "GmapBamReader", |
68 | 71 |
param_list$genome <- NULL |
69 | 72 |
|
70 | 73 |
TallyIIT(do.call(.bam_tally_C, c(list(x), param_list)), genome, |
71 |
- as(x, "BamFile")) |
|
74 |
+ as(x, "BamFile"), param_list[c("xs", "read_pos")]) |
|
72 | 75 |
}) |
73 | 76 |
|
74 | 77 |
variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
... | ... |
@@ -78,10 +81,23 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
78 | 81 |
if (length(read_length) != 1L) { |
79 | 82 |
stop("'read_length' must be a single integer") |
80 | 83 |
} |
84 |
+ if (!is.null(read_pos_breaks)) { |
|
85 |
+ if (!x@read_pos) { |
|
86 |
+ stop("'read_pos_breaks' non-NULL, but read positions were not tallied") |
|
87 |
+ } |
|
88 |
+ read_pos_breaks <- as.integer(read_pos_breaks) |
|
89 |
+ if (any(is.na(read_pos_breaks))) |
|
90 |
+ stop("'read_pos_breaks' should not contain missing values") |
|
91 |
+ if (length(read_pos_breaks) < 2) |
|
92 |
+ stop("'read_pos_breaks' needs at least two elements to define a bin") |
|
93 |
+ if (is.unsorted(read_pos_breaks)) |
|
94 |
+ stop("'read_pos_breaks' must be sorted") |
|
95 |
+ } |
|
96 |
+ |
|
81 | 97 |
tally <- .Call(R_tally_iit_parse, x@ptr, |
82 | 98 |
read_pos_breaks, |
83 | 99 |
normArgSingleInteger(high_base_quality), |
84 |
- NULL, read_length) |
|
100 |
+ NULL, read_length, x@xs) |
|
85 | 101 |
|
86 | 102 |
tally_names <- c("seqnames", "pos", "ref", "alt", |
87 | 103 |
"n.read.pos", "n.read.pos.ref", |
... | ... |
@@ -92,9 +108,13 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
92 | 108 |
"mean.quality.ref", |
93 | 109 |
"count.plus", "count.plus.ref", |
94 | 110 |
"count.minus", "count.minus.ref", |
111 |
+ "del.count.plus", "del.count.minus", |
|
95 | 112 |
"read.pos.mean", "read.pos.mean.ref", |
96 | 113 |
"read.pos.var", "read.pos.var.ref", |
97 |
- "mdfne", "mdfne.ref", "codon.dir") |
|
114 |
+ "mdfne", "mdfne.ref", "codon.dir", |
|
115 |
+ "count.xs.plus", "count.xs.plus.ref", |
|
116 |
+ "count.xs.minus", "count.xs.minus.ref") |
|
117 |
+ |
|
98 | 118 |
break_names <- character() |
99 | 119 |
if (length(read_pos_breaks) > 0L) { |
100 | 120 |
read_pos_breaks <- as.integer(read_pos_breaks) |
... | ... |
@@ -194,7 +214,7 @@ normArgSingleCharacterOrNULL <- function(x) { |
194 | 214 |
} |
195 | 215 |
|
196 | 216 |
.bam_tally_C <- function(bamreader, genome_dir = NULL, db = NULL, |
197 |
- which = NULL, read_pos_breaks = NULL, |
|
217 |
+ which = NULL, |
|
198 | 218 |
high_base_quality = 0L, desired_read_group = NULL, |
199 | 219 |
alloclength = 200000L, |
200 | 220 |
minimum_mapq = 0L, good_unique_mapq = 35L, |
... | ... |
@@ -206,7 +226,8 @@ normArgSingleCharacterOrNULL <- function(x) { |
206 | 226 |
indels = FALSE, |
207 | 227 |
blocksize = 1000L, verbosep = FALSE, |
208 | 228 |
include_soft_clips = 0L, |
209 |
- exon_iit = NULL) |
|
229 |
+ exon_iit = NULL, xs = FALSE, read_pos = FALSE, |
|
230 |
+ min_base_quality = 0L, noncovered = FALSE) |
|
210 | 231 |
{ |
211 | 232 |
if (!is(bamreader, "GmapBamReader")) |
212 | 233 |
stop("'bamreader' must be a GmapBamReader") |
... | ... |
@@ -219,15 +240,6 @@ normArgSingleCharacterOrNULL <- function(x) { |
219 | 240 |
stop("'db' must be NULL or a single, non-NA string") |
220 | 241 |
if (!is.null(desired_read_group) && !isSingleString(desired_read_group)) |
221 | 242 |
stop("'desired_read_group' must be NULL or a single, non-NA string") |
222 |
- if (!is.null(read_pos_breaks)) { |
|
223 |
- read_pos_breaks <- as.integer(read_pos_breaks) |
|
224 |
- if (any(is.na(read_pos_breaks))) |
|
225 |
- stop("'read_pos_breaks' should not contain missing values") |
|
226 |
- if (length(read_pos_breaks) < 2) |
|
227 |
- stop("'read_pos_breaks' needs at least two elements to define a bin") |
|
228 |
- if (is.unsorted(read_pos_breaks)) |
|
229 |
- stop("'read_pos_breaks' must be sorted") |
|
230 |
- } |
|
231 | 243 |
.Call(R_Bamtally_iit, bamreader@.extptr, genome_dir, db, which, |
232 | 244 |
desired_read_group, |
233 | 245 |
normArgSingleInteger(alloclength), |
... | ... |
@@ -245,7 +257,11 @@ normArgSingleCharacterOrNULL <- function(x) { |
245 | 257 |
normArgSingleInteger(blocksize), |
246 | 258 |
normArgTRUEorFALSE(verbosep), |
247 | 259 |
normArgSingleInteger(include_soft_clips), |
248 |
- normArgSingleCharacterOrNULL(exon_iit)) |
|
260 |
+ normArgSingleCharacterOrNULL(exon_iit), |
|
261 |
+ normArgTRUEorFALSE(xs), |
|
262 |
+ normArgTRUEorFALSE(read_pos), |
|
263 |
+ normArgSingleInteger(min_base_quality), |
|
264 |
+ normArgTRUEorFALSE(noncovered)) |
|
249 | 265 |
} |
250 | 266 |
|
251 | 267 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
... | ... |
@@ -271,7 +287,13 @@ variantSummaryColumnDescriptions <- function(read_pos_breaks) { |
271 | 287 |
read.pos.var.ref = "Variance in read position for the REF", |
272 | 288 |
mdfne = "Median distance from nearest end of read for the ALT", |
273 | 289 |
mdfne.ref = "Median distance from nearest end of read for the REF", |
274 |
- codon.dir = "Direction of transcription for the codon. 0=positive, 1=negative, 2=NA (not a codon)" ) |
|
290 |
+ codon.dir = "Direction of transcription for the codon. 0=positive, 1=negative, 2=NA (not a codon)", |
|
291 |
+ del.count.plus = "plus strand deletion count", |
|
292 |
+ del.count.minus = "minus strand deletion count", |
|
293 |
+ count.xs.plus = "Plus strand XS counts", |
|
294 |
+ count.xs.plus.ref = "Plus strand reference XS counts", |
|
295 |
+ count.xs.minus = "Minus strand XS counts", |
|
296 |
+ count.xs.minus.ref = "Minus strand reference XS counts") |
|
275 | 297 |
if (length(read_pos_breaks) > 0L) { |
276 | 298 |
break_desc <- paste0("Raw ALT count in read position range [", |
277 | 299 |
head(read_pos_breaks, -1), ",", |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@102487 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -9,13 +9,10 @@ |
9 | 9 |
|
10 | 10 |
setClass("TallyIIT", representation(ptr = "externalptr", |
11 | 11 |
genome = "GmapGenome", |
12 |
- bam = "BamFile", |
|
13 |
- xs = "logical", |
|
14 |
- read_pos = "logical")) |
|
12 |
+ bam = "BamFile")) |
|
15 | 13 |
|
16 |
-TallyIIT <- function(ptr, genome, bam, xs, read_pos) { |
|
17 |
- new("TallyIIT", ptr = ptr, genome = genome, bam = bam, xs = xs, |
|
18 |
- read_pos = read_pos) |
|
14 |
+TallyIIT <- function(ptr, genome, bam) { |
|
15 |
+ new("TallyIIT", ptr = ptr, genome = genome, bam = bam) |
|
19 | 16 |
} |
20 | 17 |
|
21 | 18 |
setMethod("genome", "TallyIIT", function(x) x@genome) |
... | ... |
@@ -71,7 +68,7 @@ setMethod("bam_tally", "GmapBamReader", |
71 | 68 |
param_list$genome <- NULL |
72 | 69 |
|
73 | 70 |
TallyIIT(do.call(.bam_tally_C, c(list(x), param_list)), genome, |
74 |
- as(x, "BamFile"), param_list[c("xs", "read_pos")]) |
|
71 |
+ as(x, "BamFile")) |
|
75 | 72 |
}) |
76 | 73 |
|
77 | 74 |
variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
... | ... |
@@ -81,23 +78,10 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
81 | 78 |
if (length(read_length) != 1L) { |
82 | 79 |
stop("'read_length' must be a single integer") |
83 | 80 |
} |
84 |
- if (!is.null(read_pos_breaks)) { |
|
85 |
- if (!x@read_pos) { |
|
86 |
- stop("'read_pos_breaks' non-NULL, but read positions were not tallied") |
|
87 |
- } |
|
88 |
- read_pos_breaks <- as.integer(read_pos_breaks) |
|
89 |
- if (any(is.na(read_pos_breaks))) |
|
90 |
- stop("'read_pos_breaks' should not contain missing values") |
|
91 |
- if (length(read_pos_breaks) < 2) |
|
92 |
- stop("'read_pos_breaks' needs at least two elements to define a bin") |
|
93 |
- if (is.unsorted(read_pos_breaks)) |
|
94 |
- stop("'read_pos_breaks' must be sorted") |
|
95 |
- } |
|
96 |
- |
|
97 | 81 |
tally <- .Call(R_tally_iit_parse, x@ptr, |
98 | 82 |
read_pos_breaks, |
99 | 83 |
normArgSingleInteger(high_base_quality), |
100 |
- NULL, read_length, x@xs) |
|
84 |
+ NULL, read_length) |
|
101 | 85 |
|
102 | 86 |
tally_names <- c("seqnames", "pos", "ref", "alt", |
103 | 87 |
"n.read.pos", "n.read.pos.ref", |
... | ... |
@@ -108,13 +92,9 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
108 | 92 |
"mean.quality.ref", |
109 | 93 |
"count.plus", "count.plus.ref", |
110 | 94 |
"count.minus", "count.minus.ref", |
111 |
- "del.count.plus", "del.count.minus", |
|
112 | 95 |
"read.pos.mean", "read.pos.mean.ref", |
113 | 96 |
"read.pos.var", "read.pos.var.ref", |
114 |
- "mdfne", "mdfne.ref", "codon.dir", |
|
115 |
- "count.xs.plus", "count.xs.plus.ref", |
|
116 |
- "count.xs.minus", "count.xs.minus.ref") |
|
117 |
- |
|
97 |
+ "mdfne", "mdfne.ref", "codon.dir") |
|
118 | 98 |
break_names <- character() |
119 | 99 |
if (length(read_pos_breaks) > 0L) { |
120 | 100 |
read_pos_breaks <- as.integer(read_pos_breaks) |
... | ... |
@@ -214,7 +194,7 @@ normArgSingleCharacterOrNULL <- function(x) { |
214 | 194 |
} |
215 | 195 |
|
216 | 196 |
.bam_tally_C <- function(bamreader, genome_dir = NULL, db = NULL, |
217 |
- which = NULL, |
|
197 |
+ which = NULL, read_pos_breaks = NULL, |
|
218 | 198 |
high_base_quality = 0L, desired_read_group = NULL, |
219 | 199 |
alloclength = 200000L, |
220 | 200 |
minimum_mapq = 0L, good_unique_mapq = 35L, |
... | ... |
@@ -226,8 +206,7 @@ normArgSingleCharacterOrNULL <- function(x) { |
226 | 206 |
indels = FALSE, |
227 | 207 |
blocksize = 1000L, verbosep = FALSE, |
228 | 208 |
include_soft_clips = 0L, |
229 |
- exon_iit = NULL, xs = FALSE, read_pos = FALSE, |
|
230 |
- min_base_quality = 0L, noncovered = FALSE) |
|
209 |
+ exon_iit = NULL) |
|
231 | 210 |
{ |
232 | 211 |
if (!is(bamreader, "GmapBamReader")) |
233 | 212 |
stop("'bamreader' must be a GmapBamReader") |
... | ... |
@@ -240,6 +219,15 @@ normArgSingleCharacterOrNULL <- function(x) { |
240 | 219 |
stop("'db' must be NULL or a single, non-NA string") |
241 | 220 |
if (!is.null(desired_read_group) && !isSingleString(desired_read_group)) |
242 | 221 |
stop("'desired_read_group' must be NULL or a single, non-NA string") |
222 |
+ if (!is.null(read_pos_breaks)) { |
|
223 |
+ read_pos_breaks <- as.integer(read_pos_breaks) |
|
224 |
+ if (any(is.na(read_pos_breaks))) |
|
225 |
+ stop("'read_pos_breaks' should not contain missing values") |
|
226 |
+ if (length(read_pos_breaks) < 2) |
|
227 |
+ stop("'read_pos_breaks' needs at least two elements to define a bin") |
|
228 |
+ if (is.unsorted(read_pos_breaks)) |
|
229 |
+ stop("'read_pos_breaks' must be sorted") |
|
230 |
+ } |
|
243 | 231 |
.Call(R_Bamtally_iit, bamreader@.extptr, genome_dir, db, which, |
244 | 232 |
desired_read_group, |
245 | 233 |
normArgSingleInteger(alloclength), |
... | ... |
@@ -257,11 +245,7 @@ normArgSingleCharacterOrNULL <- function(x) { |
257 | 245 |
normArgSingleInteger(blocksize), |
258 | 246 |
normArgTRUEorFALSE(verbosep), |
259 | 247 |
normArgSingleInteger(include_soft_clips), |
260 |
- normArgSingleCharacterOrNULL(exon_iit), |
|
261 |
- normArgTRUEorFALSE(xs), |
|
262 |
- normArgTRUEorFALSE(read_pos), |
|
263 |
- normArgSingleInteger(min_base_quality), |
|
264 |
- normArgTRUEorFALSE(noncovered)) |
|
248 |
+ normArgSingleCharacterOrNULL(exon_iit)) |
|
265 | 249 |
} |
266 | 250 |
|
267 | 251 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
... | ... |
@@ -287,13 +271,7 @@ variantSummaryColumnDescriptions <- function(read_pos_breaks) { |
287 | 271 |
read.pos.var.ref = "Variance in read position for the REF", |
288 | 272 |
mdfne = "Median distance from nearest end of read for the ALT", |
289 | 273 |
mdfne.ref = "Median distance from nearest end of read for the REF", |
290 |
- codon.dir = "Direction of transcription for the codon. 0=positive, 1=negative, 2=NA (not a codon)", |
|
291 |
- del.count.plus = "plus strand deletion count", |
|
292 |
- del.count.minus = "minus strand deletion count", |
|
293 |
- count.xs.plus = "Plus strand XS counts", |
|
294 |
- count.xs.plus.ref = "Plus strand reference XS counts", |
|
295 |
- count.xs.minus = "Minus strand XS counts", |
|
296 |
- count.xs.minus.ref = "Minus strand reference XS counts") |
|
274 |
+ codon.dir = "Direction of transcription for the codon. 0=positive, 1=negative, 2=NA (not a codon)" ) |
|
297 | 275 |
if (length(read_pos_breaks) > 0L) { |
298 | 276 |
break_desc <- paste0("Raw ALT count in read position range [", |
299 | 277 |
head(read_pos_breaks, -1), ",", |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@102485 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -9,10 +9,13 @@ |
9 | 9 |
|
10 | 10 |
setClass("TallyIIT", representation(ptr = "externalptr", |
11 | 11 |
genome = "GmapGenome", |
12 |
- bam = "BamFile")) |
|
12 |
+ bam = "BamFile", |
|
13 |
+ xs = "logical", |
|
14 |
+ read_pos = "logical")) |
|
13 | 15 |
|
14 |
-TallyIIT <- function(ptr, genome, bam) { |
|
15 |
- new("TallyIIT", ptr = ptr, genome = genome, bam = bam) |
|
16 |
+TallyIIT <- function(ptr, genome, bam, xs, read_pos) { |
|
17 |
+ new("TallyIIT", ptr = ptr, genome = genome, bam = bam, xs = xs, |
|
18 |
+ read_pos = read_pos) |
|
16 | 19 |
} |
17 | 20 |
|
18 | 21 |
setMethod("genome", "TallyIIT", function(x) x@genome) |
... | ... |
@@ -68,7 +71,7 @@ setMethod("bam_tally", "GmapBamReader", |
68 | 71 |
param_list$genome <- NULL |
69 | 72 |
|
70 | 73 |
TallyIIT(do.call(.bam_tally_C, c(list(x), param_list)), genome, |
71 |
- as(x, "BamFile")) |
|
74 |
+ as(x, "BamFile"), param_list[c("xs", "read_pos")]) |
|
72 | 75 |
}) |
73 | 76 |
|
74 | 77 |
variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
... | ... |
@@ -78,10 +81,23 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
78 | 81 |
if (length(read_length) != 1L) { |
79 | 82 |
stop("'read_length' must be a single integer") |
80 | 83 |
} |
84 |
+ if (!is.null(read_pos_breaks)) { |
|
85 |
+ if (!x@read_pos) { |
|
86 |
+ stop("'read_pos_breaks' non-NULL, but read positions were not tallied") |
|
87 |
+ } |
|
88 |
+ read_pos_breaks <- as.integer(read_pos_breaks) |
|
89 |
+ if (any(is.na(read_pos_breaks))) |
|
90 |
+ stop("'read_pos_breaks' should not contain missing values") |
|
91 |
+ if (length(read_pos_breaks) < 2) |
|
92 |
+ stop("'read_pos_breaks' needs at least two elements to define a bin") |
|
93 |
+ if (is.unsorted(read_pos_breaks)) |
|
94 |
+ stop("'read_pos_breaks' must be sorted") |
|
95 |
+ } |
|
96 |
+ |
|
81 | 97 |
tally <- .Call(R_tally_iit_parse, x@ptr, |
82 | 98 |
read_pos_breaks, |
83 | 99 |
normArgSingleInteger(high_base_quality), |
84 |
- NULL, read_length) |
|
100 |
+ NULL, read_length, x@xs) |
|
85 | 101 |
|
86 | 102 |
tally_names <- c("seqnames", "pos", "ref", "alt", |
87 | 103 |
"n.read.pos", "n.read.pos.ref", |
... | ... |
@@ -92,9 +108,13 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
92 | 108 |
"mean.quality.ref", |
93 | 109 |
"count.plus", "count.plus.ref", |
94 | 110 |
"count.minus", "count.minus.ref", |
111 |
+ "del.count.plus", "del.count.minus", |
|
95 | 112 |
"read.pos.mean", "read.pos.mean.ref", |
96 | 113 |
"read.pos.var", "read.pos.var.ref", |
97 |
- "mdfne", "mdfne.ref", "codon.dir") |
|
114 |
+ "mdfne", "mdfne.ref", "codon.dir", |
|
115 |
+ "count.xs.plus", "count.xs.plus.ref", |
|
116 |
+ "count.xs.minus", "count.xs.minus.ref") |
|
117 |
+ |
|
98 | 118 |
break_names <- character() |
99 | 119 |
if (length(read_pos_breaks) > 0L) { |
100 | 120 |
read_pos_breaks <- as.integer(read_pos_breaks) |
... | ... |
@@ -194,7 +214,7 @@ normArgSingleCharacterOrNULL <- function(x) { |
194 | 214 |
} |
195 | 215 |
|
196 | 216 |
.bam_tally_C <- function(bamreader, genome_dir = NULL, db = NULL, |
197 |
- which = NULL, read_pos_breaks = NULL, |
|
217 |
+ which = NULL, |
|
198 | 218 |
high_base_quality = 0L, desired_read_group = NULL, |
199 | 219 |
alloclength = 200000L, |
200 | 220 |
minimum_mapq = 0L, good_unique_mapq = 35L, |
... | ... |
@@ -206,7 +226,8 @@ normArgSingleCharacterOrNULL <- function(x) { |
206 | 226 |
indels = FALSE, |
207 | 227 |
blocksize = 1000L, verbosep = FALSE, |
208 | 228 |
include_soft_clips = 0L, |
209 |
- exon_iit = NULL) |
|
229 |
+ exon_iit = NULL, xs = FALSE, read_pos = FALSE, |
|
230 |
+ min_base_quality = 0L, noncovered = FALSE) |
|
210 | 231 |
{ |
211 | 232 |
if (!is(bamreader, "GmapBamReader")) |
212 | 233 |
stop("'bamreader' must be a GmapBamReader") |
... | ... |
@@ -219,15 +240,6 @@ normArgSingleCharacterOrNULL <- function(x) { |
219 | 240 |
stop("'db' must be NULL or a single, non-NA string") |
220 | 241 |
if (!is.null(desired_read_group) && !isSingleString(desired_read_group)) |
221 | 242 |
stop("'desired_read_group' must be NULL or a single, non-NA string") |
222 |
- if (!is.null(read_pos_breaks)) { |
|
223 |
- read_pos_breaks <- as.integer(read_pos_breaks) |
|
224 |
- if (any(is.na(read_pos_breaks))) |
|
225 |
- stop("'read_pos_breaks' should not contain missing values") |
|
226 |
- if (length(read_pos_breaks) < 2) |
|
227 |
- stop("'read_pos_breaks' needs at least two elements to define a bin") |
|
228 |
- if (is.unsorted(read_pos_breaks)) |
|
229 |
- stop("'read_pos_breaks' must be sorted") |
|
230 |
- } |
|
231 | 243 |
.Call(R_Bamtally_iit, bamreader@.extptr, genome_dir, db, which, |
232 | 244 |
desired_read_group, |
233 | 245 |
normArgSingleInteger(alloclength), |
... | ... |
@@ -245,7 +257,11 @@ normArgSingleCharacterOrNULL <- function(x) { |
245 | 257 |
normArgSingleInteger(blocksize), |
246 | 258 |
normArgTRUEorFALSE(verbosep), |
247 | 259 |
normArgSingleInteger(include_soft_clips), |
248 |
- normArgSingleCharacterOrNULL(exon_iit)) |
|
260 |
+ normArgSingleCharacterOrNULL(exon_iit), |
|
261 |
+ normArgTRUEorFALSE(xs), |
|
262 |
+ normArgTRUEorFALSE(read_pos), |
|
263 |
+ normArgSingleInteger(min_base_quality), |
|
264 |
+ normArgTRUEorFALSE(noncovered)) |
|
249 | 265 |
} |
250 | 266 |
|
251 | 267 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
... | ... |
@@ -271,7 +287,13 @@ variantSummaryColumnDescriptions <- function(read_pos_breaks) { |
271 | 287 |
read.pos.var.ref = "Variance in read position for the REF", |
272 | 288 |
mdfne = "Median distance from nearest end of read for the ALT", |
273 | 289 |
mdfne.ref = "Median distance from nearest end of read for the REF", |
274 |
- codon.dir = "Direction of transcription for the codon. 0=positive, 1=negative, 2=NA (not a codon)" ) |
|
290 |
+ codon.dir = "Direction of transcription for the codon. 0=positive, 1=negative, 2=NA (not a codon)", |
|
291 |
+ del.count.plus = "plus strand deletion count", |
|
292 |
+ del.count.minus = "minus strand deletion count", |
|
293 |
+ count.xs.plus = "Plus strand XS counts", |
|
294 |
+ count.xs.plus.ref = "Plus strand reference XS counts", |
|
295 |
+ count.xs.minus = "Minus strand XS counts", |
|
296 |
+ count.xs.minus.ref = "Minus strand reference XS counts") |
|
275 | 297 |
if (length(read_pos_breaks) > 0L) { |
276 | 298 |
break_desc <- paste0("Raw ALT count in read position range [", |
277 | 299 |
head(read_pos_breaks, -1), ",", |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@93358 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -186,9 +186,9 @@ normArgTRUEorFALSE <- function(x) { |
186 | 186 |
x |
187 | 187 |
} |
188 | 188 |
|
189 |
-normArgSingleCharacter <- function(x) { |
|
189 |
+normArgSingleCharacterOrNULL <- function(x) { |
|
190 | 190 |
name <- deparse(substitute(x)) |
191 |
- if (!is(x, "character") || length(x) != 1) |
|
191 |
+ if (!is.null(x) && (!is(x, "character") || length(x) != 1)) |
|
192 | 192 |
stop("'", name, "' should be a single character value") |
193 | 193 |
x |
194 | 194 |
} |
... | ... |
@@ -206,7 +206,7 @@ normArgSingleCharacter <- function(x) { |
206 | 206 |
indels = FALSE, |
207 | 207 |
blocksize = 1000L, verbosep = FALSE, |
208 | 208 |
include_soft_clips = 0L, |
209 |
- cds_iit) |
|
209 |
+ exon_iit = NULL) |
|
210 | 210 |
{ |
211 | 211 |
if (!is(bamreader, "GmapBamReader")) |
212 | 212 |
stop("'bamreader' must be a GmapBamReader") |
... | ... |
@@ -245,7 +245,7 @@ normArgSingleCharacter <- function(x) { |
245 | 245 |
normArgSingleInteger(blocksize), |
246 | 246 |
normArgTRUEorFALSE(verbosep), |
247 | 247 |
normArgSingleInteger(include_soft_clips), |
248 |
- normArgSingleCharacter(cds_iit)) |
|
248 |
+ normArgSingleCharacterOrNULL(exon_iit)) |
|
249 | 249 |
} |
250 | 250 |
|
251 | 251 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@93316 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -9,11 +9,10 @@ |
9 | 9 |
|
10 | 10 |
setClass("TallyIIT", representation(ptr = "externalptr", |
11 | 11 |
genome = "GmapGenome", |
12 |
- bam = "BamFile", |
|
13 |
- xs = "logical")) |
|
12 |
+ bam = "BamFile")) |
|
14 | 13 |
|
15 |
-TallyIIT <- function(ptr, genome, bam, xs) { |
|
16 |
- new("TallyIIT", ptr = ptr, genome = genome, bam = bam, xs = xs) |
|
14 |
+TallyIIT <- function(ptr, genome, bam) { |
|
15 |
+ new("TallyIIT", ptr = ptr, genome = genome, bam = bam) |
|
17 | 16 |
} |
18 | 17 |
|
19 | 18 |
setMethod("genome", "TallyIIT", function(x) x@genome) |
... | ... |
@@ -67,8 +66,9 @@ setMethod("bam_tally", "GmapBamReader", |
67 | 66 |
} |
68 | 67 |
|
69 | 68 |
param_list$genome <- NULL |
69 |
+ |
|
70 | 70 |
TallyIIT(do.call(.bam_tally_C, c(list(x), param_list)), genome, |
71 |
- as(x, "BamFile"), param_list$count_xs) |
|
71 |
+ as(x, "BamFile")) |
|
72 | 72 |
}) |
73 | 73 |
|
74 | 74 |
variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
... | ... |
@@ -81,12 +81,12 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
81 | 81 |
tally <- .Call(R_tally_iit_parse, x@ptr, |
82 | 82 |
read_pos_breaks, |
83 | 83 |
normArgSingleInteger(high_base_quality), |
84 |
- NULL, read_length, x@xs) |
|
84 |
+ NULL, read_length) |
|
85 | 85 |
|
86 | 86 |
tally_names <- c("seqnames", "pos", "ref", "alt", |
87 | 87 |
"n.read.pos", "n.read.pos.ref", |
88 | 88 |
"raw.count", "raw.count.ref", |
89 |
- "raw.count.total", |
|
89 |
+ "raw.count.total", |
|
90 | 90 |
"high.quality", "high.quality.ref", |
91 | 91 |
"high.quality.total", "mean.quality", |
92 | 92 |
"mean.quality.ref", |
... | ... |
@@ -94,9 +94,7 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
94 | 94 |
"count.minus", "count.minus.ref", |
95 | 95 |
"read.pos.mean", "read.pos.mean.ref", |
96 | 96 |
"read.pos.var", "read.pos.var.ref", |
97 |
- "mdfne", "mdfne.ref", |
|
98 |
- if (x@xs) c("count.xs.plus", "count.xs.plus.ref", |
|
99 |
- "count.xs.minus", "count.xs.minus.ref")) |
|
97 |
+ "mdfne", "mdfne.ref", "codon.dir") |
|
100 | 98 |
break_names <- character() |
101 | 99 |
if (length(read_pos_breaks) > 0L) { |
102 | 100 |
read_pos_breaks <- as.integer(read_pos_breaks) |
... | ... |
@@ -118,8 +116,9 @@ variantSummary <- function(x, read_pos_breaks = NULL, high_base_quality = 0L, |
118 | 116 |
genome <- genome(x) |
119 | 117 |
indel <- nchar(tally$ref) == 0L | nchar(tally$alt) == 0L |
120 | 118 |
metacols <- DataFrame(tally[meta_names]) |
121 |
- mcols(metacols) <- |
|
122 |
- variantSummaryColumnDescriptions(read_pos_breaks)[meta_names,,drop=FALSE] |
|
119 |
+ mcols(metacols) <- variantSummaryColumnDescriptions(read_pos_breaks) |
|
120 |
+ |
|
121 |
+ samecols = tally[["ref"]] != tally[["alt"]] |
|
123 | 122 |
gr <- with(tally, |
124 | 123 |
VRanges(seqnames, |
125 | 124 |
IRanges(pos, |
... | ... |
@@ -187,6 +186,13 @@ normArgTRUEorFALSE <- function(x) { |
187 | 186 |
x |
188 | 187 |
} |
189 | 188 |
|
189 |
+normArgSingleCharacter <- function(x) { |
|
190 |
+ name <- deparse(substitute(x)) |
|
191 |
+ if (!is(x, "character") || length(x) != 1) |
|
192 |
+ stop("'", name, "' should be a single character value") |
|
193 |
+ x |
|
194 |
+} |
|
195 |
+ |
|
190 | 196 |
.bam_tally_C <- function(bamreader, genome_dir = NULL, db = NULL, |
191 | 197 |
which = NULL, read_pos_breaks = NULL, |
192 | 198 |
high_base_quality = 0L, desired_read_group = NULL, |
... | ... |
@@ -200,7 +206,7 @@ normArgTRUEorFALSE <- function(x) { |
200 | 206 |
indels = FALSE, |
201 | 207 |
blocksize = 1000L, verbosep = FALSE, |
202 | 208 |
include_soft_clips = 0L, |
203 |
- count_xs = FALSE, noncovered = FALSE) |
|
209 |
+ cds_iit) |
|
204 | 210 |
{ |
205 | 211 |
if (!is(bamreader, "GmapBamReader")) |
206 | 212 |
stop("'bamreader' must be a GmapBamReader") |
... | ... |
@@ -239,8 +245,7 @@ normArgTRUEorFALSE <- function(x) { |
239 | 245 |
normArgSingleInteger(blocksize), |
240 | 246 |
normArgTRUEorFALSE(verbosep), |
241 | 247 |
normArgSingleInteger(include_soft_clips), |
242 |
- normArgTRUEorFALSE(count_xs), |
|
243 |
- normArgTRUEorFALSE(noncovered)) |
|
248 |
+ normArgSingleCharacter(cds_iit)) |
|