Browse code

fixes for bam_tally update

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@110596 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 13/11/2015 22:40:27
Showing4 changed files

... ...
@@ -20,7 +20,7 @@ setClass("BamTallyParam",
20 20
                         ignore_query_Ns = "logical",
21 21
                         indels = "logical",
22 22
                         include_soft_clips = "integer",
23
-                        exon_iit = "ANY",
23
+                        exon_iit = "characterORNULL",
24 24
                         xs = "logical",
25 25
                         read_pos = "logical",
26 26
                         min_base_quality = "integer",
... ...
@@ -12,11 +12,12 @@ setClass("TallyIIT", representation(ptr = "externalptr",
12 12
                                     bam = "BamFile",
13 13
                                     xs = "logical",
14 14
                                     read_pos = "logical",
15
-                                    nm = "logical"))
15
+                                    nm = "logical",
16
+                                    codon = "logical"))
16 17
 
17
-TallyIIT <- function(ptr, genome, bam, xs, read_pos, nm) {
18
+TallyIIT <- function(ptr, genome, bam, xs, read_pos, nm, codon) {
18 19
   new("TallyIIT", ptr = ptr, genome = genome, bam = bam, xs = xs,
19
-      read_pos = read_pos, nm = nm)
20
+      read_pos = read_pos, nm = nm, codon = codon)
20 21
 }
21 22
 
22 23
 setMethod("genome", "TallyIIT", function(x) x@genome)
... ...
@@ -73,7 +74,8 @@ setMethod("bam_tally", "GmapBamReader",
73 74
             
74 75
             TallyIIT(do.call(.bam_tally_C, c(list(x), param_list)), genome,
75 76
                      as(x, "BamFile"), xs=param_list$xs,
76
-                     read_pos=param_list$read_pos, nm=param_list$nm)
77
+                     read_pos=param_list$read_pos, nm=param_list$nm,
78
+                     codon=!is.null(param_list$exon_iit))
77 79
           })
78 80
 
79 81
 variantSummary <- function(x, read_pos_breaks = NULL,
... ...
@@ -111,7 +113,7 @@ variantSummary <- function(x, read_pos_breaks = NULL,
111 113
                    "count", "count.ref", "count.total",
112 114
                    "count.plus", "count.plus.ref",
113 115
                    "count.minus", "count.minus.ref",
114
-                   "del.count.plus", "del.count.minus",
116
+                   "count.del.plus", "count.del.minus",
115 117
                    "read.pos.mean", "read.pos.mean.ref",
116 118
                    "read.pos.var", "read.pos.var.ref",
117 119
                    "mdfne", "mdfne.ref", "codon.strand",
... ...
@@ -119,8 +121,6 @@ variantSummary <- function(x, read_pos_breaks = NULL,
119 121
                    "count.xs.minus", "count.xs.minus.ref",
120 122
                    "count.high.nm", "count.high.nm.ref")
121 123
 
122
-  tally$codon.strand <- structure(tally$codon.strand, class="factor",
123
-                                  levels=strand())
124 124
   break_names <- character()
125 125
   if (length(read_pos_breaks) > 0L) {
126 126
     read_pos_breaks <- as.integer(read_pos_breaks)
... ...
@@ -129,7 +129,10 @@ variantSummary <- function(x, read_pos_breaks = NULL,
129 129
     tally_names <- c(tally_names, break_names)
130 130
   }
131 131
   names(tally) <- tally_names
132
-  tally <- Filter(Negate(is.null), tally)  
132
+  tally$codon.strand <- if (x@codon) {
133
+      structure(tally$codon.strand, class="factor", levels=levels(strand()))
134
+  }
135
+  tally <- Filter(Negate(is.null), tally)
133 136
 
134 137
   if (!keep_ref_rows) {
135 138
     variant_rows <- !is.na(tally$alt)
... ...
@@ -143,7 +146,8 @@ variantSummary <- function(x, read_pos_breaks = NULL,
143 146
   genome <- genome(x)
144 147
   indel <- nchar(tally$ref) == 0L | nchar(tally$alt) == 0L
145 148
   metacols <- DataFrame(tally[meta_names])
146
-  mcols(metacols) <- variantSummaryColumnDescriptions(read_pos_breaks)
149
+  mcols(metacols) <- variantSummaryColumnDescriptions(read_pos_breaks, x@xs,
150
+                                                      high_nm_score, x@codon)
147 151
 
148 152
   gr <- with(tally,
149 153
              VRanges(seqnames,
... ...
@@ -270,32 +274,37 @@ normArgSingleCharacterOrNULL <- function(x) {
270 274
 ### Column metadata
271 275
 ###
272 276
 
273
-variantSummaryColumnDescriptions <- function(read_pos_breaks) {
277
+variantSummaryColumnDescriptions <-
278
+    function(read_pos_breaks, xs, high_nm, codon)
279
+{
274 280
   desc <- c(
275 281
     n.read.pos = "Number of unique read positions for the ALT",
276 282
     n.read.pos.ref = "Number of unique read positions for the REF",
277
-    raw.count = "Raw ALT count",
278
-    raw.count.ref = "Raw REF count",
279
-    raw.count.total = "Raw total count",
280
-    mean.quality = "Average ALT base quality",
281
-    mean.quality.ref = "Average REF base quality",
282
-    count.plus = "Raw positive strand ALT count",
283
-    count.plus.ref = "Raw positive strand REF count",
284
-    count.minus = "Raw negative strand ALT count",
285
-    count.minus.ref = "Raw negative strand REF count",
283
+    count.plus = "Positive strand ALT count",
284
+    count.plus.ref = "Positive strand REF count",
285
+    count.minus = "Negative strand ALT count",
286
+    count.minus.ref = "Negative strand REF count",
287
+    count.del.plus = "Plus strand deletion count",
288
+    count.del.minus = "Count strand deletion count",
286 289
     read.pos.mean = "Average read position for the ALT",
287 290
     read.pos.mean.ref = "Average read position for the ALT",
288 291
     read.pos.var = "Variance in read position for the ALT",
289 292
     read.pos.var.ref = "Variance in read position for the REF",
290 293
     mdfne = "Median distance from nearest end of read for the ALT",
291 294
     mdfne.ref = "Median distance from nearest end of read for the REF",
292
-    codon.dir = "Direction of transcription for the codon. 0=positive, 1=negative, 2=NA (not a codon)",
293
-    del.count.plus = "plus strand deletion count",
294
-    del.count.minus = "minus strand deletion count",
295
-    count.xs.plus = "Plus strand XS counts",
296
-    count.xs.plus.ref = "Plus strand reference XS counts",
297
-    count.xs.minus = "Minus strand XS counts",
298
-    count.xs.minus.ref = "Minus strand reference XS counts")
295
+      if (codon) {
296
+          c(codon.strand = "Strand of transcription for the codon")
297
+      },
298
+      if (xs) {
299
+          c(count.xs.plus = "Plus strand XS counts",
300
+            count.xs.plus.ref = "Plus strand reference XS counts",
301
+            count.xs.minus = "Minus strand XS counts",
302
+            count.xs.minus.ref = "Minus strand reference XS counts")
303
+      },
304
+      count.high.nm = paste("Count of reads with >=", high_nm,
305
+          "NM score for the ALT"),
306
+      count.high.nm.ref = paste("Count of reads with >=", high_nm,
307
+          "NM score for the REF"))
299 308
   if (length(read_pos_breaks) > 0L) {
300 309
     break_desc <- paste0("Raw ALT count in read position range [",
301 310
                          head(read_pos_breaks, -1), ",",
... ...
@@ -24,7 +24,7 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R,
24 24
                 SEXP print_indels_p_R,
25 25
                 SEXP blocksize_R, 
26 26
                 SEXP verbosep_R, SEXP max_softclip_R,
27
-                SEXP genome_iit_file_R,
27
+                SEXP exon_iit_file_R,
28 28
                 SEXP print_xs_scores_p_R, SEXP print_cycles_p_R,
29 29
                 SEXP minimum_quality_score_R, SEXP noncovered_R,
30 30
                 SEXP print_nm_scores_p_R)
... ...
@@ -59,8 +59,8 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R,
59 59
 
60 60
   Genome_T genome = createGenome(genome_dir, db);
61 61
   IIT_T chromosome_iit = readChromosomeIIT(genome_dir, db);
62
-  IIT_T map_iit = genome_iit_file_R == R_NilValue ? NULL :
63
-    IIT_read((char*)CHAR(asChar(genome_iit_file_R)), /*name*/ NULL,
62
+  IIT_T map_iit = exon_iit_file_R == R_NilValue ? NULL :
63
+    IIT_read((char*)CHAR(asChar(exon_iit_file_R)), /*name*/ NULL,
64 64
              /*readonlyp*/true, /*divread*/READ_ALL, /*divstring*/NULL,
65 65
              /*add_iit_p*/false, /*labels_read_p*/true);
66 66
   const char *chr = NULL;
... ...
@@ -16,9 +16,9 @@ enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF,
16 16
        COUNT_XS_MINUS, COUNT_XS_MINUS_REF,
17 17
        COUNT_HIGH_NM, COUNT_HIGH_NM_REF, N_BASE_COLS };
18 18
 
19
-enum { CODON_PLUS = 1,
20
-       CODON_MINUS,
21
-       NON_CODON };
19
+enum { STRAND_PLUS = 1,
20
+       STRAND_MINUS,
21
+       STRAND_NONE };
22 22
 
23 23
 static char *codon_table[64] = 
24 24
   {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT",
... ...
@@ -287,7 +287,7 @@ parse_indels(unsigned char *bytes, int row,
287 287
     tally->count_ref[row] = tally->count_plus_ref[row] +
288 288
 	tally->count_minus_ref[row];
289 289
     tally->count_total[row] = tally->count_ref[row] + tally->count[row];
290
-    tally->strand[row] = NON_CODON;
290
+    tally->strand[row] = STRAND_NONE;
291 291
     SEXP seq_R = mkChar(read_string(&bytes));
292 292
     if (insertion) {
293 293
       SET_STRING_ELT(tally->read_R, row, seq_R);
... ...
@@ -328,7 +328,7 @@ read_allele_counts(unsigned char **bytes, int row, SEXP read_R,
328 328
 {
329 329
   int n_alleles = 0;
330 330
   char allele;
331
-  char stop = strand == 0 ? '\0' : (char) 255;
331
+  char stop = strand == STRAND_NONE ? '\0' : (char) 255;
332 332
   
333 333
   while((allele = (char)read_char(bytes)) != stop) {
334 334
     if (allele == '_') {
... ...
@@ -336,7 +336,7 @@ read_allele_counts(unsigned char **bytes, int row, SEXP read_R,
336 336
         *delcount_minus = read_int(bytes);
337 337
         continue;
338 338
     }
339
-    if(strand == 0)
339
+    if(strand == STRAND_NONE)
340 340
        SET_STRING_ELT(read_R, row, mkCharLen(&allele, 1));
341 341
     else
342 342
        SET_STRING_ELT(read_R, row, mkCharLen(codon_table[(int)allele], 3));
... ...
@@ -504,7 +504,7 @@ static int parse_interval(IIT_T tally_iit, int index,
504 504
     bytes -= 4; /* rewind from read-ahead */
505 505
     if (codon_plus_offset - allele_offset > 0)
506 506
       row += parse_alleles(base + allele_offset, row, ref_row,
507
-                           param, tally, NON_CODON);
507
+                           param, tally, STRAND_NONE);
508 508
     if (deletion_offset - insertion_offset > 0) 
509 509
       row += parse_indels(base + insertion_offset, row,
510 510
                           param, tally, true);
... ...
@@ -514,11 +514,11 @@ static int parse_interval(IIT_T tally_iit, int index,
514 514
 
515 515
     if (codon_minus_offset - codon_plus_offset > 0) {
516 516
       row += parse_alleles(base + codon_plus_offset, row, row,
517
-                           param, tally, CODON_PLUS);
517
+                           param, tally, STRAND_PLUS);
518 518
     }
519 519
     if (next_offset - codon_minus_offset > 0) { 
520 520
       row += parse_alleles(base + codon_minus_offset, row, row,
521
-                           param, tally, CODON_MINUS);
521
+                           param, tally, STRAND_MINUS);
522 522
     }
523 523
     /* fill in position information */
524 524
     for (int r = ref_row; r < row; r++) {