Browse code

bplapply instead of srapply

- deprecate srapply


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@83156 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 11/11/2013 15:01:45
Showing1 changed files
... ...
@@ -430,7 +430,7 @@ setMethod(qa2, "QACollate",
430 430
 {
431 431
     if (verbose) message("qa2,QACollate-method")
432 432
 
433
-    qas <- srapply(seq_along(object@src@con), function(i, object, ...) {
433
+    qas <- bplapply(seq_along(object@src@con), function(i, object, ...) {
434 434
         object@src@con <- object@src@con[i]
435 435
         .qa2_do_collate1(object, ...)
436 436
     }, object, ..., verbose=verbose)
Browse code

various code updates

- NAMESPACE imports
- avoid RColorBrewer dependence
- auto-open gzfile, avoiding 'error' about illegal seek
- fully resolve functions for discovering formals()


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@71329 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 19/11/2012 15:04:07
Showing1 changed files
... ...
@@ -552,11 +552,12 @@ setMethod(report, "QASource",
552 552
 {
553 553
     df <- as(values(x), "data.frame")
554 554
     df$Id <- as.integer(as.character(df$Id))
555
+    pal <- c("#D73027", "#4575B4") # brewer.pal(9, "RdYlBu")[c(1, 9)]
555 556
     plt <-
556 557
         dotplot(Id ~ SourceN, df,
557 558
                 type = "b", pch = 20, col = .dnaCol,
558 559
                 rng = x@flagNSequencesRange,
559
-                rngcol = brewer.pal(9, "RdYlBu")[c(1, 9)],
560
+                rngcol = pal,
560 561
                 panel=function(x, y, ..., rng, rngcol) {
561 562
                     panel.dotplot(x, y, ...)
562 563
                     yy <- c(min(y), max(y))
... ...
@@ -618,7 +619,11 @@ setMethod(report, "QAQualityUse",
618 619
                             lapply(split(Count, Id), cumsum),
619 620
                             lapply(split(Count, Id), sum)),
620 621
                         use.names=FALSE))
621
-    col <- colorRampPalette(brewer.pal(9, "RdYlBu"))(length(levels(q)))
622
+
623
+    pal <-       # brewer.pal(9, "RdYlBu")
624
+        c("#D73027", "#F46D43", "#FDAE61", "#FEE090", "#FFFFBF",
625
+           "#E0F3F8", "#ABD9E9", "#74ADD1", "#4575B4")
626
+    col <- colorRampPalette(pal)(length(levels(q)))
622 627
     plt <- dotplot(Id ~ Proportion, group=Quality, df,
623 628
                    type = "b", pch = 20, col = col,
624 629
                    xlab="Cummulative Proportion",
... ...
@@ -638,8 +643,11 @@ setMethod(report, "QAReadQuality",
638 643
     df$Id <- factor(df$Id, levels=c(lvl[!lvl %in% flag], flag))
639 644
     xmin <- min(df$Score)
640 645
     ymax <- max(df$Density)
646
+    pal <-       # brewer.pal(8, "Set1")
647
+        c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00",
648
+          "#FFFF33", "#A65628", "#F781BF")
641 649
     col <- c(rep("gray", length(lvl) - length(flag)),
642
-             brewer.pal(8, "Set1")[1 + (seq_along(flag) - 1) %% 8])
650
+             pal[1 + (seq_along(flag) - 1) %% 8])
643 651
     plt <-
644 652
         xyplot(Density ~ Score, group=Id, df,
645 653
                type = "l", xlab = "Average (calibrated) base quality", 
Browse code

documentation update, part 1

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@64828 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 03/04/2012 11:45:19
Showing1 changed files
... ...
@@ -23,6 +23,12 @@ setMethod(.filter, "QAData", function(object, useFilter, ...) {
23 23
 
24 24
 ## QASummary
25 25
 
26
+.show_KoverA <-
27
+    function(object, K=object@flagK, A=object@flagA)
28
+{
29
+    cat(sprintf("flag: K over A = (%.2f x 100)%% over %d\n", K, A))
30
+}
31
+
26 32
 .QASummary <- 
27 33
     function (class, useFilter = TRUE, addFilter = TRUE, ..., html) 
28 34
 {
... ...
@@ -76,8 +82,7 @@ QAReadQuality <-
76 82
 
77 83
 setMethod(show, "QAReadQuality", function(object) {
78 84
     callNextMethod()
79
-    cat(sprintf("flag: K over A = (%.2f x 100)%% over %d\n",
80
-                object@flagK, object@flagA))
85
+    .show_KoverA(object)
81 86
 })
82 87
 
83 88
 QAAdapterContamination <-
... ...
@@ -117,12 +122,13 @@ setMethod(show, "QAAdapterContamination", function(object) {
117 122
 
118 123
 QAFrequentSequence <-
119 124
     function (useFilter = TRUE, addFilter = TRUE,
120
-              n = NA_integer_, k = NA_integer_,
121
-              reportSequences = FALSE, ...) 
125
+              n = NA_integer_, a = NA_integer_,
126
+              flagK=.8, reportSequences = FALSE, ...) 
122 127
 {
123 128
     .QASummary("QAFrequentSequence",
124 129
                addFilter = addFilter, useFilter = useFilter, 
125
-               n = mkScalar(as.integer(n)), k = mkScalar(as.integer(k)), 
130
+               n = mkScalar(as.integer(n)), a = mkScalar(as.integer(a)),
131
+               flagK = mkScalar(as.numeric(flagK)),
126 132
                reportSequences = mkScalar(as.logical(reportSequences)), 
127 133
                ...)
128 134
 }
... ...
@@ -132,8 +138,9 @@ setMethod(show, "QAFrequentSequence", function(object) {
132 138
     if (!is.na(object@n))
133 139
         cat("n: ", object@n, "; ", sep="")
134 140
     else
135
-        cat("k: ", object@k, "; ", sep="")
141
+        cat("a: ", object@a, "; ", sep="")
136 142
     cat("reportSequences:", object@reportSequences, "\n")
143
+    .show_KoverA(object, object@flagK, object@a)
137 144
 })
138 145
 
139 146
 QANucleotideByCycle <- .QASummaryFactory("QANucleotideByCycle")
... ...
@@ -257,9 +264,10 @@ setMethod(qa2, "QAFastqSource",
257 264
     if (1 != length(object@con))
258 265
         .throw(SRError("InternalError",
259 266
                        "'QAFastqSource' source length != 1"))
260
-    df <- qa2(FastqSampler(object@con, object@n,
261
-                           object@readerBlockSize),
262
-              object@data, verbose=verbose)
267
+    sampler <- FastqSampler(object@con, object@n,
268
+                            object@readerBlockSize)
269
+    on.exit(close(sampler))
270
+    df <- qa2(sampler, object@data, verbose=verbose)
263 271
     values <-
264 272
         cbind(df, DataFrame(AccessTimestamp=date(),
265 273
                             FileName=basename(object@con)))
... ...
@@ -312,10 +320,11 @@ setMethod(qa2, "QAQualityUse",
312 320
     obj <- .filter(state@data, object@useFilter)
313 321
     alf <- .qa_alphabetFrequency(quality(obj), collapse=TRUE)
314 322
     alf <- alf[alf != 0]
315
-    quality <- factor(names(alf), levels=alphabet(quality(obj)))
316
-    q0 <- 1 + 32 * is(quality, "SFastqQuality")
323
+    alphabet <- alphabet(quality(obj))
324
+    quality <- factor(names(alf), levels=alphabet)
325
+    q0 <- as(do.call(class(quality(obj)), list(alphabet)), "matrix")
317 326
     values <- DataFrame(Quality=quality,
318
-                        Score=as.numeric(quality) - q0,
327
+                        Score=as.integer(q0)[quality],
319 328
                         Count=as.vector(alf))
320 329
     metadata(values) <- list(NumberOfRecords=length(obj))
321 330
     renew(object, values=values)
... ...
@@ -342,7 +351,7 @@ setMethod(qa2, "QAFrequentSequence",
342 351
         n <- thresh <- object@n
343 352
     } else {
344 353
         n <- 10L
345
-        thresh <- object@k
354
+        thresh <- object@a
346 355
     }
347 356
 
348 357
     obj <- .filter(state@data, object@useFilter)
... ...
@@ -357,7 +366,8 @@ setMethod(qa2, "QAFrequentSequence",
357 366
     } else r %in% which(t >= thresh)
358 367
     .filterUpdate(state@data, object@addFilter, filt)
359 368
 
360
-    values <- DataFrame(Threshold=thresh, Count=sum(filt),
369
+    values <- DataFrame(Threshold=thresh,
370
+                        Records=length(r), Count=sum(filt),
361 371
                         TopCount=IntegerList(topCount))
362 372
     metadata(values) <- list(NumberOfRecords=length(obj))
363 373
     renew(object, values=values)
... ...
@@ -393,10 +403,11 @@ setMethod(qa2, "QAQualityByCycle",
393 403
     if (verbose) message("qa2,QAQualityByCycle-method")
394 404
     obj <- .filter(state@data, object@useFilter)
395 405
     abc <- alphabetByCycle(quality(obj))
396
-    q <- factor(rownames(abc)[row(abc)], levels = rownames(abc))
397
-    q0 <- 1 + 32 * is(quality, "SFastqQuality")
406
+    alphabet <- rownames(abc)
407
+    q <- factor(rownames(abc)[row(abc)], levels = alphabet)
408
+    q0 <- as(do.call(class(quality(obj)), list(alphabet)), "matrix")
398 409
     values <- DataFrame(Cycle=seq_len(ncol(abc))[col(abc)],
399
-                        Quality=q, Score=as.numeric(q) - q0,
410
+                        Quality=q, Score=as.integer(q0)[q],
400 411
                         Count=as.vector(abc), row.names=NULL)
401 412
     metadata(values) <- list(NumberOfRecords=length(obj))
402 413
     renew(object, values=values[values$Count != 0,])
... ...
@@ -502,7 +513,8 @@ setMethod(flag, "QASource",
502 513
     object
503 514
 })
504 515
 
505
-setMethod(flag, "QAReadQuality", function(object, verbose=FALSE)
516
+setMethod(flag, "QAReadQuality",
517
+          function(object, ..., verbose=FALSE)
506 518
 {
507 519
     if (verbose) message("flag,QAReadQuality-method")
508 520
     df <- as(values(object), "data.frame")
... ...
@@ -519,6 +531,14 @@ setMethod(flag, "QAReadQuality", function(object, verbose=FALSE)
519 531
     object
520 532
 })
521 533
 
534
+setMethod(flag, "QAFrequentSequence",
535
+          function(object, ..., verbose=FALSE)
536
+{
537
+    ppn <- values(object)[["Count"]] / values(object)[["Records"]]
538
+    object@flag <- which(ppn > object@flagK )
539
+    object
540
+})
541
+
522 542
 ## report
523 543
 
524 544
 .hwrite <-
... ...
@@ -663,8 +683,8 @@ setMethod(report, "QASequenceUse",
663 683
 setMethod(report, "QAFrequentSequence",
664 684
           function(x, ..., dest=tempfile(), type="html")
665 685
 {
666
-    thresholdLabel <- if (is.finite(x@n)) "n" else "k"
667
-    threshold <- as.character(if (is.finite(x@n)) x@n else x@k)
686
+    thresholdLabel <- if (is.finite(x@n)) "n" else "a"
687
+    threshold <- as.character(if (is.finite(x@n)) x@n else x@a)
668 688
     freqseq <- if (x@reportSequences) {
669 689
         seqdf <- lapply(with(values(x), { #with() gets wrong env for .hwrite
670 690
             lapply(split(TopCount, Id), function(elt) {
Browse code

QA* methods

- flag as part of each QA*
- QAReadQuality flag
- QANucleotideUse update display


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@61549 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 23/12/2011 01:39:05
Showing1 changed files
... ...
@@ -63,7 +63,22 @@ QAQualityUse <- .QASummaryFactory("QAQualityUse")
63 63
 
64 64
 QASequenceUse <- .QASummaryFactory("QASequenceUse")
65 65
 
66
-QAReadQuality <- .QASummaryFactory("QAReadQuality")
66
+QAReadQuality <- 
67
+    function(useFilter=TRUE, addFilter=TRUE,
68
+             flagK=.2, flagA=30L, ...)
69
+{
70
+    .QASummary("QAReadQuality",
71
+               useFilter=useFilter, addFilter=addFilter,
72
+               flagK=mkScalar(flagK),
73
+               flagA=mkScalar(as.integer(flagA)),
74
+               ...)
75
+}
76
+
77
+setMethod(show, "QAReadQuality", function(object) {
78
+    callNextMethod()
79
+    cat(sprintf("flag: K over A = (%.2f x 100)%% over %d\n",
80
+                object@flagK, object@flagA))
81
+})
67 82
 
68 83
 QAAdapterContamination <-
69 84
     function (useFilter=TRUE, addFilter=TRUE,
... ...
@@ -298,7 +313,10 @@ setMethod(qa2, "QAQualityUse",
298 313
     alf <- .qa_alphabetFrequency(quality(obj), collapse=TRUE)
299 314
     alf <- alf[alf != 0]
300 315
     quality <- factor(names(alf), levels=alphabet(quality(obj)))
301
-    values <- DataFrame(Quality=quality, Count=as.vector(alf))
316
+    q0 <- 1 + 32 * is(quality, "SFastqQuality")
317
+    values <- DataFrame(Quality=quality,
318
+                        Score=as.numeric(quality) - q0,
319
+                        Count=as.vector(alf))
302 320
     metadata(values) <- list(NumberOfRecords=length(obj))
303 321
     renew(object, values=values)
304 322
 })
... ...
@@ -408,7 +426,8 @@ setMethod(qa2, "QACollate",
408 426
 
409 427
     ## collapse summary
410 428
     df <- do.call(rbind, Map(function(elt) values(elt@src), qas))
411
-    df[["Id"]] <- seq_along(qas)
429
+    df[["Id"]] <- factor(seq_along(qas),
430
+                         levels=as.character(seq_along(qas)))
412 431
     ncol <- ncol(df)
413 432
     values(object@src) <- df[, c(ncol, seq_len(ncol - 1L))]
414 433
 
... ...
@@ -438,25 +457,27 @@ setMethod(qa2, "QACollate",
438 457
     }, qas))
439 458
 
440 459
     ## flag
441
-    flag <- Reduce(rbind, Map(function(x, verbose) {
442
-        nm <- class(x)
443
-        f <- flag(x, verbose=verbose)
444
-        if (length(f)) DataFrame(Flag=f, Summary=nm)
460
+    object@src <- flag(object@src, verbose=verbose)
461
+    values <- lapply(values, flag, verbose=verbose)
462
+
463
+    flagged <- Reduce(rbind, Map(function(x) {
464
+        f <- x@flag
465
+        if (length(f)) DataFrame(Flag=f, Summary=class(x))
445 466
         else DataFrame(Flag=integer(), Summary=character())
446
-    }, c(list(object@src), values), MoreArgs=list(verbose=verbose)))
467
+    }, c(list(object@src), values)))
447 468
 
448 469
     QA(object@src, QAFiltered(values=filtered),
449
-       QAFlagged(values=flag),
470
+       QAFlagged(values=flagged),
450 471
        do.call(SimpleList, values))
451 472
 })
452 473
 
453 474
 ## flag
454 475
 
455
-setMethod(flag, "ANY",
476
+setMethod(flag, ".QA2",
456 477
           function(object, ..., verbose=FALSE)
457 478
 {
458 479
     if (verbose) message("flag,ANY-method")
459
-    integer()
480
+    object
460 481
 })
461 482
 
462 483
 setMethod(flag, "QASource",
... ...
@@ -467,20 +488,35 @@ setMethod(flag, "QASource",
467 488
     rng <- object@flagNSequencesRange
468 489
     x <- values(object)[["SourceN"]]
469 490
 
470
-    out <- if (1L == length(rng) && is.na(rng)) {
491
+    if (1L == length(rng) && is.na(rng)) {
471 492
         ## default -- outliers
472 493
         stats <- stats::fivenum(x, na.rm = TRUE)
473 494
         iqr <- diff(stats[c(2, 4)])
474 495
         coef <- 1.5
475
-        if (!is.na(iqr)) {
476
-            x < (stats[2L] - coef * iqr) |
477
-            x > (stats[4L] + coef * iqr)
478
-        } else !is.finite(x)
479
-    } else {
480
-        !is.finite(x) | x < rng[1] | x > rng[2]
496
+        object@flagNSequencesRange <- rng <-
497
+            c(as.integer(floor(stats[2L] - coef * iqr)),
498
+              as.integer(ceiling(stats[4L] + coef * iqr)))
481 499
     }
482 500
 
483
-    which(out)
501
+    object@flag <- which(!is.finite(x) | x < rng[1] | x > rng[2])
502
+    object
503
+})
504
+
505
+setMethod(flag, "QAReadQuality", function(object, verbose=FALSE)
506
+{
507
+    if (verbose) message("flag,QAReadQuality-method")
508
+    df <- as(values(object), "data.frame")
509
+    object@flag <- which(unname(unlist(with(df, {
510
+        Map(function(score, density, A, K) {
511
+            dx <- diff(score)
512
+            x <- score[-length(score)] + dx / 2
513
+            y <- density[-length(density)] + diff(density) / 2
514
+            k <- approxfun(x, cumsum(y * dx))(A)
515
+            is.na(k) || k < K
516
+        }, split(Score, Id), split(Density, Id),
517
+            MoreArgs=list(A = object@flagA, K = object@flagK))
518
+    }))))
519
+    object
484 520
 })
485 521
 
486 522
 ## report
... ...
@@ -494,8 +530,19 @@ setMethod(flag, "QASource",
494 530
 setMethod(report, "QASource",
495 531
           function(x, ..., dest=tempfile(), type="html")
496 532
 {
497
-    plt <- dotplot(Id ~ SourceN, as(values(x), "data.frame"),
498
-                   type = "b", pch = 20, col = .dnaCol)
533
+    df <- as(values(x), "data.frame")
534
+    df$Id <- as.integer(as.character(df$Id))
535
+    plt <-
536
+        dotplot(Id ~ SourceN, df,
537
+                type = "b", pch = 20, col = .dnaCol,
538
+                rng = x@flagNSequencesRange,
539
+                rngcol = brewer.pal(9, "RdYlBu")[c(1, 9)],
540
+                panel=function(x, y, ..., rng, rngcol) {
541
+                    panel.dotplot(x, y, ...)
542
+                    yy <- c(min(y), max(y))
543
+                    llines(rng[1], yy, col=rngcol[1], lty=2)
544
+                    llines(rng[2], yy, col=rngcol[2], lty=2)
545
+                })
499 546
     list(SAMPLE_KEY=.hwrite(values(x)),
500 547
          PPN_COUNT=.html_img(dest, "readCounts", plt))
501 548
 })
... ...
@@ -521,12 +568,19 @@ setMethod(report, "QAAdapterContamination",
521 568
 setMethod(report, "QANucleotideUse",
522 569
           function(x, ..., dest=tempfile(), type="html")
523 570
 {
524
-    plt <- dotplot(Id ~ Count, group = Nucleotide,
525
-                   as(values(x), "data.frame"),
526
-                   type = "b", pch = 20, col = .dnaCol,
527
-                   key = list(space = "top", lines = list(col = .dnaCol), 
528
-                     text = list(lab = levels(values(x)[["Nucleotide"]])),
529
-                       columns = 5L))
571
+    df <- as(values(x), "data.frame")
572
+    df$Id <- as.integer(as.character(df$Id))
573
+    plt <-
574
+        dotplot(Id ~ Count|factor(ifelse(df$Nucleotide == "N", "N", "O")),
575
+                group=Nucleotide, df,
576
+                base=df$Nucleotide,
577
+                type = "b", pch = 20, col = .dnaCol,
578
+                key = list(space = "top", lines = list(col = .dnaCol), 
579
+                  text = list(lab = levels(values(x)[["Nucleotide"]])),
580
+                  columns = 5L),
581
+                strip=FALSE, scale=list(relation="free"),
582
+                par.settings=list(layout.widths = list(panel = c(1, 2))))
583
+
530 584
     list(BASE_CALL_COUNT=.html_img(dest, "baseCalls", plt))
531 585
 })
532 586
 
... ...
@@ -551,7 +605,7 @@ setMethod(report, "QAQualityUse",
551 605
                    key = list(space = "top",
552 606
                      lines = list(col = col, size=3L), 
553 607
                      text = list(lab = levels(df[["Quality"]])),
554
-                     columns = 10L, cex=.6))
608
+                     columns = min(length(col), 10L), cex=.6))
555 609
     list(QUALITY_SCORE_COUNT=.html_img(dest, "qualityCalls", plt))
556 610
 })
557 611
 
... ...
@@ -559,16 +613,21 @@ setMethod(report, "QAReadQuality",
559 613
     function(x, ..., dest=tempfile(), type="html")
560 614
 {
561 615
     df <- as(values(x), "data.frame")
616
+    lvl <- levels(df$Id)
617
+    flag <- lvl[x@flag]
618
+    df$Id <- factor(df$Id, levels=c(lvl[!lvl %in% flag], flag))
562 619
     xmin <- min(df$Score)
563 620
     ymax <- max(df$Density)
564
-    plt <- xyplot(Density ~ Score | Id, df,
565
-                  type = "l", xlab = "Average (calibrated) base quality", 
566
-                  ylab = "Proportion of reads", aspect = 2,
567
-                  panel = function(..., subscripts) {
568
-                      panel.xyplot(...)
569
-                      lbl <- as.character(unique(df$Id[subscripts]))
570
-                      ltext(xmin, ymax, lbl, adj = c(0, 1))
571
-                  }, strip = FALSE)
621
+    col <- c(rep("gray", length(lvl) - length(flag)),
622
+             brewer.pal(8, "Set1")[1 + (seq_along(flag) - 1) %% 8])
623
+    plt <-
624
+        xyplot(Density ~ Score, group=Id, df,
625
+               type = "l", xlab = "Average (calibrated) base quality", 
626
+               nylab = "Proportion of reads", col = col, strip=FALSE,
627
+               key = list(space = "top",
628
+                 lines = list(col=tail(col, length(flag)), size=3L, lwd=2),
629
+                 text = list(lab=tail(lvl, length(flag))),
630
+                 columns=min(length(col), 10L), cex=.6))
572 631
     list(READ_QUALITY_FIGURE=.html_img(dest, "readQuality", plt))
573 632
 })
574 633
 
Browse code

start flag,QA*-methods

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@61522 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 21/12/2011 19:40:09
Showing1 changed files
... ...
@@ -21,23 +21,6 @@ setMethod(.filter, "QAData", function(object, useFilter, ...) {
21 21
     object
22 22
 }
23 23
 
24
-## QASource
25
-
26
-QAFastqSource <-
27
-    function(con=character(), n=1e6, readerBlockSize=1e8, ...)
28
-{
29
-    new("QAFastqSource", con=as.character(con),
30
-        n=mkScalar(as.integer(n)),
31
-        readerBlockSize=mkScalar(as.integer(readerBlockSize)),
32
-        ...)
33
-}
34
-
35
-setMethod(show, "QAFastqSource", function(object) {
36
-    callNextMethod()
37
-    cat("n: ", object@n, ";",
38
-        " readerBlockSize: ", object@readerBlockSize, "\n", sep="")
39
-})
40
-
41 24
 ## QASummary
42 25
 
43 26
 .QASummary <- 
... ...
@@ -65,11 +48,12 @@ setMethod(show, "QAFastqSource", function(object) {
65 48
 
66 49
 setMethod(show, "QASummary", function(object) {
67 50
     callNextMethod()
51
+    cat(Rsamtools:::.ppath("html template", object@html))
68 52
     cat("useFilter: ", object@useFilter, "; ",
69 53
         "addFilter: ", object@addFilter, "\n", sep="")
70 54
 })
71 55
 
72
-QASources <- .QASummaryFactory("QASources")
56
+QAFlagged <- .QASummaryFactory("QAFlagged")
73 57
 
74 58
 QAFiltered <- .QASummaryFactory("QAFiltered")
75 59
 
... ...
@@ -141,6 +125,28 @@ QANucleotideByCycle <- .QASummaryFactory("QANucleotideByCycle")
141 125
 
142 126
 QAQualityByCycle <- .QASummaryFactory("QAQualityByCycle")
143 127
 
128
+## QASource
129
+
130
+QAFastqSource <-
131
+    function(con=character(), n=1e6, readerBlockSize=1e8,
132
+             flagNSequencesRange=NA_integer_, ...,
133
+             html=system.file("template", "QASources.html",
134
+               package="ShortRead"))
135
+{
136
+    .QASummary("QAFastqSource", con=as.character(con),
137
+        n=mkScalar(as.integer(n)),
138
+        readerBlockSize=mkScalar(as.integer(readerBlockSize)),
139
+        flagNSequencesRange=as.integer(flagNSequencesRange),
140
+        ..., html=mkScalar(html))
141
+}
142
+
143
+setMethod(show, "QAFastqSource", function(object) {
144
+    callNextMethod()
145
+    cat("length:", length(object@con), "\n")
146
+    cat("n: ", object@n, ";",
147
+        " readerBlockSize: ", object@readerBlockSize, "\n", sep="")
148
+})
149
+
144 150
 ## QACollate
145 151
 
146 152
 setMethod(QACollate, "missing",
... ...
@@ -171,9 +177,10 @@ setMethod(show, "QACollate", function(object) {
171 177
 ## QA
172 178
 
173 179
 QA <- 
174
-    function (sources, filtered, ...) 
180
+    function (src, filtered, flagged, ...) 
175 181
 {
176
-    new("QA", sources = sources, filtered = filtered, ...)
182
+    new("QA", src = src, filtered = filtered, flagged=flagged,
183
+        ...)
177 184
 }
178 185
 
179 186
 ## .clone
... ...
@@ -400,12 +407,10 @@ setMethod(qa2, "QACollate",
400 407
     }, object, ..., verbose=verbose)
401 408
 
402 409
     ## collapse summary
403
-    srcValues <- do.call(rbind, lapply(qas, function(elt) {
404
-        values(elt@src)
405
-    }))
406
-    srcValues[["Id"]] <- seq_along(qas)
407
-    ncol <- ncol(srcValues)
408
-    srcValues <- srcValues[, c(ncol, seq_len(ncol - 1L))]
410
+    df <- do.call(rbind, Map(function(elt) values(elt@src), qas))
411
+    df[["Id"]] <- seq_along(qas)
412
+    ncol <- ncol(df)
413
+    values(object@src) <- df[, c(ncol, seq_len(ncol - 1L))]
409 414
 
410 415
     ## collect NumberOfRecords
411 416
     filtered <- as(t(sapply(qas, function(lst) {
... ...
@@ -413,7 +418,7 @@ setMethod(qa2, "QACollate",
413 418
             metadata(values(elt))[["NumberOfRecords"]]
414 419
         })
415 420
     })), "DataFrame")
416
-    filtered[["Id"]] <- srcValues[["Id"]]
421
+    filtered[["Id"]] <- values(object@src)[["Id"]]
417 422
     ncol <- ncol(filtered)
418 423
     filtered <- filtered[,c(ncol, seq_len(ncol - 1L))]
419 424
 
... ...
@@ -425,17 +430,59 @@ setMethod(qa2, "QACollate",
425 430
         rotate <- c(ncol(df), seq_len(ncol - 1L))
426 431
         values(elt) <- df[,rotate]
427 432
         elt
428
-    }, id), qas, srcValues[["Id"]])
433
+    }, id), qas, values(object@src)[["Id"]])
429 434
 
430 435
     ## collapse
431
-    values <- do.call(mapply, c(function(...) {
436
+    values <- do.call(Map, c(function(...) {
432 437
         do.call(rbind, list(...))
433 438
     }, qas))
434 439
 
435
-    QA(QASources(values=srcValues), QAFiltered(values=filtered),
440
+    ## flag
441
+    flag <- Reduce(rbind, Map(function(x, verbose) {
442
+        nm <- class(x)
443
+        f <- flag(x, verbose=verbose)
444
+        if (length(f)) DataFrame(Flag=f, Summary=nm)
445
+        else DataFrame(Flag=integer(), Summary=character())
446
+    }, c(list(object@src), values), MoreArgs=list(verbose=verbose)))
447
+
448
+    QA(object@src, QAFiltered(values=filtered),
449
+       QAFlagged(values=flag),
436 450
        do.call(SimpleList, values))
437 451
 })
438 452
 
453
+## flag
454
+
455
+setMethod(flag, "ANY",
456
+          function(object, ..., verbose=FALSE)
457
+{
458
+    if (verbose) message("flag,ANY-method")
459
+    integer()
460
+})
461
+
462
+setMethod(flag, "QASource",
463
+          function(object, ..., verbose=FALSE)
464
+{
465
+    if (verbose) message("flag,QASource-method")
466
+
467
+    rng <- object@flagNSequencesRange
468
+    x <- values(object)[["SourceN"]]
469
+
470
+    out <- if (1L == length(rng) && is.na(rng)) {
471
+        ## default -- outliers
472
+        stats <- stats::fivenum(x, na.rm = TRUE)
473
+        iqr <- diff(stats[c(2, 4)])
474
+        coef <- 1.5
475
+        if (!is.na(iqr)) {
476
+            x < (stats[2L] - coef * iqr) |
477
+            x > (stats[4L] + coef * iqr)
478
+        } else !is.finite(x)
479
+    } else {
480
+        !is.finite(x) | x < rng[1] | x > rng[2]
481
+    }
482
+
483
+    which(out)
484
+})
485
+
439 486
 ## report
440 487
 
441 488
 .hwrite <-
... ...
@@ -444,7 +491,7 @@ setMethod(qa2, "QACollate",
444 491
     hwrite(as(df, "data.frame"), border=0)
445 492
 }
446 493
 
447
-setMethod(report, "QASources",
494
+setMethod(report, "QASource",
448 495
           function(x, ..., dest=tempfile(), type="html")
449 496
 {
450 497
     plt <- dotplot(Id ~ SourceN, as(values(x), "data.frame"),
... ...
@@ -453,6 +500,12 @@ setMethod(report, "QASources",
453 500
          PPN_COUNT=.html_img(dest, "readCounts", plt))
454 501
 })
455 502
 
503
+setMethod(report, "QAFlagged",
504
+          function(x, ...., dest=tempfile(), type="html")
505
+{
506
+    list(FLAGGED=.hwrite(values(x)))
507
+})
508
+
456 509
 setMethod(report, "QAFiltered",
457 510
           function(x, ..., dest=tempfile(), type="html")
458 511
 {
... ...
@@ -643,15 +696,18 @@ setMethod(report, "QA", function(x, ..., dest=tempfile(), type="html") {
643 696
         .throw(SRError("UserArgumentMismatch", "'type' must be 'html'"))
644 697
     dir.create(dest, recursive=TRUE)
645 698
 
646
-    sections <- c(system.file("template", "QAHeader.html",
647
-                              package="ShortRead", mustWork=TRUE),
648
-                  x@sources@html, x@filtered@html,
699
+    header <- system.file("template", "QAHeader.html",
700
+                       package="ShortRead", mustWork=TRUE)
701
+    footer <- system.file("template", "QAFooter.html",
702
+                          package="ShortRead", mustWork=TRUE)
703
+    sections <- c(header,
704
+                  x@src@html, x@filtered@html, x@flagged@html,
649 705
                   sapply(x, slot, "html"),
650
-                  system.file("template", "QAFooter.html",
651
-                              package="ShortRead", mustWork=TRUE))
706
+                  footer)
652 707
 
653
-    values0 <- c(list(report(x@sources, dest=dest),
654
-                      report(x@filtered, dest=dest)),
708
+    values0 <- c(list(report(x@src, dest=dest),
709
+                      report(x@filtered, dest=dest),
710
+                      report(x@flagged, dest=dest)),
655 711
                  lapply(x, report, dest=dest))
656 712
     values <- setNames(unlist(values0, recursive=FALSE, use.names=FALSE),
657 713
                        unlist(lapply(values0, names)))
Browse code

work on updated approach to QA reports

- equivalent of qa() on Fastq files


git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/ShortRead@61496 bc3139a8-67e5-0310-9ffc-ced21a209358

Martin Morgan authored on 20/12/2011 21:10:19
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,660 @@
1
+## QAData
2
+
3
+QAData <-
4
+    function(seq=ShortReadQ(), filter=logical(length(seq)), ...)
5
+{
6
+    .QAData$new(seq=seq, filter=filter, ...)
7
+}
8
+
9
+setMethod(.filter, "QAData", function(object, useFilter, ...) {
10
+    if (useFilter)
11
+        object$seq[!object$filter]
12
+    else
13
+        object$seq
14
+})
15
+
16
+.filterUpdate <-
17
+    function(object, add, value)
18
+{
19
+    if (add)
20
+        object$filter <- object$filter | value
21
+    object
22
+}
23
+
24
+## QASource
25
+
26
+QAFastqSource <-
27
+    function(con=character(), n=1e6, readerBlockSize=1e8, ...)
28
+{
29
+    new("QAFastqSource", con=as.character(con),
30
+        n=mkScalar(as.integer(n)),
31
+        readerBlockSize=mkScalar(as.integer(readerBlockSize)),
32
+        ...)
33
+}
34
+
35
+setMethod(show, "QAFastqSource", function(object) {
36
+    callNextMethod()
37
+    cat("n: ", object@n, ";",
38
+        " readerBlockSize: ", object@readerBlockSize, "\n", sep="")
39
+})
40
+
41
+## QASummary
42
+
43
+.QASummary <- 
44
+    function (class, useFilter = TRUE, addFilter = TRUE, ..., html) 
45
+{
46
+    if (missing(html)) 
47
+        html <- file.path(system.file("template", package = "ShortRead"), 
48
+                          sprintf("%s.html", class))
49
+    if (!is.na(html) && (!file.exists(html) || !nzchar(html))) 
50
+        .throw(SRError("UserArgumentMismatch",
51
+                       "'html' file does not exist:\n    %s",
52
+                       html))
53
+    new(class, useFilter = mkScalar(as.logical(useFilter)),
54
+        addFilter = mkScalar(as.logical(addFilter)), 
55
+        html = mkScalar(html), ...)
56
+}
57
+
58
+.QASummaryFactory <-
59
+    function(summaryName)
60
+{
61
+    function (useFilter = TRUE, addFilter = TRUE, ...) 
62
+        .QASummary(summaryName, useFilter = useFilter,
63
+                   addFilter = addFilter, ...)
64
+}
65
+
66
+setMethod(show, "QASummary", function(object) {
67
+    callNextMethod()
68
+    cat("useFilter: ", object@useFilter, "; ",
69
+        "addFilter: ", object@addFilter, "\n", sep="")
70
+})
71
+
72
+QASources <- .QASummaryFactory("QASources")
73
+
74
+QAFiltered <- .QASummaryFactory("QAFiltered")
75
+
76
+QANucleotideUse <- .QASummaryFactory("QANucleotideUse")
77
+
78
+QAQualityUse <- .QASummaryFactory("QAQualityUse")
79
+
80
+QASequenceUse <- .QASummaryFactory("QASequenceUse")
81
+
82
+QAReadQuality <- .QASummaryFactory("QAReadQuality")
83
+
84
+QAAdapterContamination <-
85
+    function (useFilter=TRUE, addFilter=TRUE,
86
+              Lpattern = NA_character_, Rpattern = NA_character_, 
87
+              max.Lmismatch = 0.1, max.Rmismatch = 0.2, min.trim = 9L,
88
+              ...)
89
+{
90
+    fmt <- "QAAdapterContamination not a DNA sequence\n  %s=\"%s\""
91
+    if (!is.na(Lpattern)) 
92
+        tryCatch(DNAString(Lpattern), error = function(e) {
93
+            .throw(SRError("UserArgumentMismatch", fmt, "Lpattern", 
94
+                Lpattern))
95
+        })
96
+    if (!is.na(Rpattern)) 
97
+        tryCatch(DNAString(Rpattern), error = function(e) {
98
+            .throw(SRError("UserArgumentMismatch", fmt, "Rpattern", 
99
+                Rpattern))
100
+        })
101
+    .QASummary("QAAdapterContamination",
102
+               useFilter=useFilter, addFilter=addFilter,
103
+               Lpattern = mkScalar(toupper(as.character(Lpattern))), 
104
+               Rpattern = mkScalar(toupper(as.character(Rpattern))), 
105
+               max.Lmismatch = mkScalar(as.numeric(max.Lmismatch)), 
106
+               max.Rmismatch = mkScalar(as.numeric(max.Rmismatch)), 
107
+               min.trim = mkScalar(as.integer(min.trim)), ...)
108
+}
109
+
110
+setMethod(show, "QAAdapterContamination", function(object) {
111
+    callNextMethod()
112
+    cat("Lpattern:", object@Lpattern, "\n")
113
+    cat("Rpattern:", object@Rpattern, "\n")
114
+    cat("max.Lmismatch: ", object@max.Lmismatch, "; ",
115
+        "max.Rmismatch: ", object@max.Rmismatch, "; ",
116
+        "min.trim: ", object@min.trim, "\n", sep="")
117
+})
118
+
119
+QAFrequentSequence <-
120
+    function (useFilter = TRUE, addFilter = TRUE,
121
+              n = NA_integer_, k = NA_integer_,
122
+              reportSequences = FALSE, ...) 
123
+{
124
+    .QASummary("QAFrequentSequence",
125
+               addFilter = addFilter, useFilter = useFilter, 
126
+               n = mkScalar(as.integer(n)), k = mkScalar(as.integer(k)), 
127
+               reportSequences = mkScalar(as.logical(reportSequences)), 
128
+               ...)
129
+}
130
+
131
+setMethod(show, "QAFrequentSequence", function(object) {
132
+    callNextMethod()
133
+    if (!is.na(object@n))
134
+        cat("n: ", object@n, "; ", sep="")
135
+    else
136
+        cat("k: ", object@k, "; ", sep="")
137
+    cat("reportSequences:", object@reportSequences, "\n")
138
+})
139
+
140
+QANucleotideByCycle <- .QASummaryFactory("QANucleotideByCycle")
141
+
142
+QAQualityByCycle <- .QASummaryFactory("QAQualityByCycle")
143
+
144
+## QACollate
145
+
146
+setMethod(QACollate, "missing",
147
+          function(src, ...)
148
+{
149
+    QACollate(QAFastqSource(), ...)
150
+})
151
+
152
+setMethod(QACollate, "QAFastqSource",
153
+          function(src, ...)
154
+{
155
+    if (1L == length(list(...)) && is(..1, "QACollate"))
156
+        renew(..1, src=src)
157
+    else
158
+        new("QACollate", src=src, SimpleList(...))
159
+})
160
+
161
+setMethod(show, "QACollate", function(object) {
162
+    callNextMethod()
163
+    cat("source:", class(object@src),
164
+        "of length", length(object@src@con), "\n")
165
+    elts <- paste(sapply(object, class), collapse = " ")
166
+    txt <- paste(strwrap(sprintf("elements: %s", elts), exdent = 2), 
167
+                 collapse = "\n  ")
168
+    cat(txt, "\n")
169
+})
170
+
171
+## QA
172
+
173
+QA <- 
174
+    function (sources, filtered, ...) 
175
+{
176
+    new("QA", sources = sources, filtered = filtered, ...)
177
+}
178
+
179
+## .clone
180
+
181
+setMethod(.clone, "QAData",
182
+          function (object, ...) 
183
+{
184
+    .QAData$new(seq = object$seq, filter = object$filter, ...)
185
+})
186
+
187
+setMethod(.clone, "QASource",
188
+          function (object, ...) 
189
+{
190
+    object@data <- .clone(object@data)
191
+    object
192
+})
193
+
194
+## values
195
+
196
+setMethod(values, "QASummary",
197
+          function(x, ...)
198
+{
199
+    x@values
200
+})
201
+
202
+setReplaceMethod("values", c("QASummary", "DataFrame"),
203
+                 function (x, ..., value) 
204
+{
205
+    x@values <- value
206
+    x
207
+})
208
+
209
+## rbind
210
+
211
+setMethod(rbind, "QASummary",
212
+          function(..., deparse.level=1)
213
+{
214
+    class <- class(..1)
215
+    values <- do.call(rbind, lapply(list(...), values))
216
+    renew(..1, values = values)
217
+})
218
+
219
+## qa2
220
+
221
+setMethod(qa2, "FastqSampler",
222
+          function(object, state, ..., verbose=FALSE)
223
+{
224
+    if (verbose) message("qa2,FastqSampler-method")
225
+    state$seq <- yield(object)
226
+    state$filter <- rep(FALSE, length(state$seq))
227
+    DataFrame(SourceN=object$status()[["total"]],
228
+              SampleN=length(state$seq))
229
+})
230
+
231
+setMethod(qa2, "QAFastqSource",
232
+          function(object, state, ..., verbose=FALSE) 
233
+{
234
+    if (verbose) message("qa2,QAFastqSource-method")
235
+    if (1 != length(object@con))
236
+        .throw(SRError("InternalError",
237
+                       "'QAFastqSource' source length != 1"))
238
+    df <- qa2(FastqSampler(object@con, object@n,
239
+                           object@readerBlockSize),
240
+              object@data, verbose=verbose)
241
+    values <-
242
+        cbind(df, DataFrame(AccessTimestamp=date(),
243
+                            FileName=basename(object@con)))
244
+                            ## Path=dirname(path(object@con))))
245
+    metadata(values) <- list(NumberOfRecords=length(object@data$seq))
246
+    renew(object, values=values)
247
+})
248
+
249
+setMethod(qa2, "QAAdapterContamination",
250
+          function(object, state, ..., verbose=FALSE)
251
+{
252
+    if (verbose) message("qa2,QAAdapterContamination-method")
253
+    obj <- .filter(state@data, object@useFilter)
254
+
255
+    Lpattern <-
256
+        if (is.na(object@Lpattern)) "" else object@Lpattern
257
+    Rpattern <-
258
+        if (is.na(object@Rpattern)) "" else object@Rpattern
259
+
260
+    trim <- trimLRPatterns(Lpattern, Rpattern, sread(obj),
261
+                           object@max.Lmismatch,
262
+                           object@max.Rmismatch,
263
+                           ranges=TRUE)
264
+    filt <- width(trim) < (width(obj) - object@min.trim)
265
+
266
+    .filterUpdate(state@data, object@addFilter, filt)
267
+    values <- DataFrame(Contaminants=sum(filt))
268
+    metadata(values) <- list(NumberOfRecords=length(filt))
269
+    renew(object, values=values)
270
+})
271
+
272
+setMethod(qa2, "QANucleotideUse",
273
+          function(object, state, ..., verbose=FALSE)
274
+{
275
+    if (verbose) message("qa2,QANucleotideUse-method")
276
+    obj <- .filter(state@data, object@useFilter)
277
+    alf <- .qa_alphabetFrequency(sread(obj), baseOnly=TRUE,
278
+                                 collapse=TRUE)
279
+    values <- DataFrame(Nucleotide=factor(sub("other", "N", names(alf)),
280
+                          levels=c("A", "C", "G", "T", "N")),
281
+                         Count=as.vector(alf))
282
+    metadata(values) <- list(NumberOfRecords=length(obj))
283
+    renew(object, values=values)
284
+})
285
+
286
+setMethod(qa2, "QAQualityUse",
287
+          function(object, state, ..., verbose=FALSE)
288
+{
289
+    if (verbose) message("qa2,QAQualityUse-method")
290
+    obj <- .filter(state@data, object@useFilter)
291
+    alf <- .qa_alphabetFrequency(quality(obj), collapse=TRUE)
292
+    alf <- alf[alf != 0]
293
+    quality <- factor(names(alf), levels=alphabet(quality(obj)))
294
+    values <- DataFrame(Quality=quality, Count=as.vector(alf))
295
+    metadata(values) <- list(NumberOfRecords=length(obj))
296
+    renew(object, values=values)
297
+})
298
+
299
+setMethod(qa2, "QASequenceUse",
300
+          function(object, state, ..., verbose=FALSE)
301
+{
302
+    if (verbose) message("qa2,QASequenceUse-method")
303
+    obj <- .filter(state@data, object@useFilter)
304
+    t <- tabulate(tabulate(srrank(sread(obj))))
305
+    values <- DataFrame(Occurrences=seq_along(t)[t!=0],
306
+                        Reads=t[t!=0])
307
+    metadata(values) <- list(NumberOfRecords=length(obj))
308
+    renew(object, values=values)
309
+})
310
+
311
+setMethod(qa2, "QAFrequentSequence",
312
+          function(object, state, ..., verbose=FALSE)
313
+{
314
+    if (verbose) message("qa2,QAFrequentSequence-method")
315
+
316
+    if (is.finite(object@n)) {
317
+        n <- thresh <- object@n
318
+    } else {
319
+        n <- 10L
320
+        thresh <- object@k
321
+    }
322
+
323
+    obj <- .filter(state@data, object@useFilter)
324
+    r <- srrank(sread(obj))
325
+    t <- tabulate(r)
326
+    ttop <- head(order(t, decreasing=TRUE), n)
327
+    topCount <-
328
+        setNames(t[ttop], as.character(sread(obj)[match(ttop, r)]))
329
+
330
+    filt <- if (is.finite(object@n)) {
331
+        r %in% ttop
332
+    } else r %in% which(t >= thresh)
333
+    .filterUpdate(state@data, object@addFilter, filt)
334
+
335
+    values <- DataFrame(Threshold=thresh, Count=sum(filt),
336
+                        TopCount=IntegerList(topCount))
337
+    metadata(values) <- list(NumberOfRecords=length(obj))
338
+    renew(object, values=values)
339
+})
340
+
341
+setMethod(qa2, "QAReadQuality",
342
+          function(object, state, ..., verbose=FALSE)
343
+{
344
+    if (verbose) message("qa2,QAReadQuality-method")
345
+    obj <- .filter(state@data, object@useFilter)
346
+    dens <- .qa_qdensity(quality(obj))
347
+    values <- DataFrame(Score=dens$x, Density=dens$y)
348
+    metadata(values) <- list(NumberOfRecords=length(obj))
349
+    renew(object, values=values)
350
+})
351
+
352
+setMethod(qa2, "QANucleotideByCycle",
353
+          function(object, state, ..., verbose=FALSE)
354
+{
355
+    if (verbose) message("qa2,QANucleotideByCycle-method")
356
+    obj <- .filter(state@data, object@useFilter)
357
+    abc <- alphabetByCycle(sread(obj))
358
+    values <- DataFrame(Cycle=seq_len(ncol(abc))[col(abc)],
359
+                         Base=factor(rownames(abc)[row(abc)]),
360
+                         Count=as.vector(abc), row.names=NULL)
361
+    metadata(values) <- list(NumberOfRecords=length(obj))
362
+    renew(object, values=values[values$Count != 0,])
363
+})
364
+
365
+setMethod(qa2, "QAQualityByCycle",
366
+          function(object, state, ..., verbose=FALSE)
367
+{
368
+    if (verbose) message("qa2,QAQualityByCycle-method")
369
+    obj <- .filter(state@data, object@useFilter)
370
+    abc <- alphabetByCycle(quality(obj))
371
+    q <- factor(rownames(abc)[row(abc)], levels = rownames(abc))
372
+    q0 <- 1 + 32 * is(quality, "SFastqQuality")
373
+    values <- DataFrame(Cycle=seq_len(ncol(abc))[col(abc)],
374
+                        Quality=q, Score=as.numeric(q) - q0,
375
+                        Count=as.vector(abc), row.names=NULL)
376
+    metadata(values) <- list(NumberOfRecords=length(obj))
377
+    renew(object, values=values[values$Count != 0,])
378
+})
379
+
380
+.qa2_do_collate1 <-
381
+    function(object, state, ..., verbose=FALSE)
382
+{
383
+    if (verbose) message("qa2,QACollate1-method")
384
+    src <- .clone(object@src)
385
+    srcelt <- qa2(src, verbose=verbose) # side effect -- populate seq
386
+    elts <- endoapply(as(object, "SimpleList"), qa2, src, ...,
387
+                      verbose=verbose)
388
+    names(elts) <- sapply(object, class)
389
+    renew(object, elts, src=renew(object@src, values=values(srcelt)))
390
+}
391
+
392
+setMethod(qa2, "QACollate",
393
+    function(object, state, ..., verbose=FALSE)
394
+{
395
+    if (verbose) message("qa2,QACollate-method")
396
+
397
+    qas <- srapply(seq_along(object@src@con), function(i, object, ...) {
398
+        object@src@con <- object@src@con[i]
399
+        .qa2_do_collate1(object, ...)
400
+    }, object, ..., verbose=verbose)
401
+
402
+    ## collapse summary
403
+    srcValues <- do.call(rbind, lapply(qas, function(elt) {
404
+        values(elt@src)
405
+    }))
406
+    srcValues[["Id"]] <- seq_along(qas)
407
+    ncol <- ncol(srcValues)
408
+    srcValues <- srcValues[, c(ncol, seq_len(ncol - 1L))]
409
+
410
+    ## collect NumberOfRecords
411
+    filtered <- as(t(sapply(qas, function(lst) {
412
+        sapply(lst, function(elt) {
413
+            metadata(values(elt))[["NumberOfRecords"]]
414
+        })
415
+    })), "DataFrame")
416
+    filtered[["Id"]] <- srcValues[["Id"]]
417
+    ncol <- ncol(filtered)
418
+    filtered <- filtered[,c(ncol, seq_len(ncol - 1L))]
419
+
420
+    ## add Id
421
+    qas <- Map(function(elt, id) endoapply(elt, function(elt, id) {
422
+        df <- values(elt)
423
+        df[["Id"]] <- id
424
+        ncol <- ncol(df)
425
+        rotate <- c(ncol(df), seq_len(ncol - 1L))
426
+        values(elt) <- df[,rotate]
427
+        elt
428
+    }, id), qas, srcValues[["Id"]])
429
+
430
+    ## collapse
431
+    values <- do.call(mapply, c(function(...) {
432
+        do.call(rbind, list(...))
433
+    }, qas))
434
+
435
+    QA(QASources(values=srcValues), QAFiltered(values=filtered),
436
+       do.call(SimpleList, values))
437
+})
438
+
439
+## report
440
+
441
+.hwrite <-
442
+    function(df)
443
+{
444
+    hwrite(as(df, "data.frame"), border=0)
445
+}
446
+
447
+setMethod(report, "QASources",
448
+          function(x, ..., dest=tempfile(), type="html")
449
+{
450
+    plt <- dotplot(Id ~ SourceN, as(values(x), "data.frame"),
451
+                   type = "b", pch = 20, col = .dnaCol)
452
+    list(SAMPLE_KEY=.hwrite(values(x)),
453
+         PPN_COUNT=.html_img(dest, "readCounts", plt))
454
+})
455
+
456
+setMethod(report, "QAFiltered",
457
+          function(x, ..., dest=tempfile(), type="html")
458
+{
459
+    list(FILTERED=.hwrite(values(x)))
460
+})
461
+
462
+setMethod(report, "QAAdapterContamination",
463
+          function(x, ..., dest=tempfile(), type="html")
464
+{
465
+    list(ADAPTER_CONTAMINATION=.hwrite(values(x)))
466
+})
467
+
468
+setMethod(report, "QANucleotideUse",
469
+          function(x, ..., dest=tempfile(), type="html")
470
+{
471
+    plt <- dotplot(Id ~ Count, group = Nucleotide,
472
+                   as(values(x), "data.frame"),
473
+                   type = "b", pch = 20, col = .dnaCol,
474
+                   key = list(space = "top", lines = list(col = .dnaCol), 
475
+                     text = list(lab = levels(values(x)[["Nucleotide"]])),
476
+                       columns = 5L))
477
+    list(BASE_CALL_COUNT=.html_img(dest, "baseCalls", plt))
478
+})
479
+
480
+setMethod(report, "QAQualityUse",
481
+          function(x, ..., dest=tempfile(), type="html")
482
+{
483
+    df <- as(values(x), "data.frame")
484
+    id <- df[["Id"]]
485
+    q <- df[["Quality"]]
486
+    q <- factor(q, levels=levels(q)[min(as.integer(q)):max(as.integer(q))]) 
487
+    df[["Quality"]] <- q
488
+    df <- df[order(df$Id, df$Quality),]
489
+    df[["Proportion"]] <-
490
+        with(df, unlist(Map("/",
491
+                            lapply(split(Count, Id), cumsum),
492
+                            lapply(split(Count, Id), sum)),
493
+                        use.names=FALSE))
494
+    col <- colorRampPalette(brewer.pal(9, "RdYlBu"))(length(levels(q)))
495
+    plt <- dotplot(Id ~ Proportion, group=Quality, df,
496
+                   type = "b", pch = 20, col = col,
497
+                   xlab="Cummulative Proportion",
498
+                   key = list(space = "top",
499
+                     lines = list(col = col, size=3L), 
500
+                     text = list(lab = levels(df[["Quality"]])),
501
+                     columns = 10L, cex=.6))
502
+    list(QUALITY_SCORE_COUNT=.html_img(dest, "qualityCalls", plt))
503
+})
504
+
505
+setMethod(report, "QAReadQuality",
506
+    function(x, ..., dest=tempfile(), type="html")
507
+{
508
+    df <- as(values(x), "data.frame")
509
+    xmin <- min(df$Score)
510
+    ymax <- max(df$Density)
511
+    plt <- xyplot(Density ~ Score | Id, df,
512
+                  type = "l", xlab = "Average (calibrated) base quality", 
513
+                  ylab = "Proportion of reads", aspect = 2,
514
+                  panel = function(..., subscripts) {
515
+                      panel.xyplot(...)
516
+                      lbl <- as.character(unique(df$Id[subscripts]))
517
+                      ltext(xmin, ymax, lbl, adj = c(0, 1))
518
+                  }, strip = FALSE)
519
+    list(READ_QUALITY_FIGURE=.html_img(dest, "readQuality", plt))
520
+})
521
+
522
+setMethod(report, "QASequenceUse",
523
+          function(x, ..., dest=tempfile(), type="html")
524
+{
525
+    
526
+    df <- with(values(x), {
527
+        nOccur <- tapply(Occurrences, Id, c)
528
+        cumulative <- tapply(Occurrences * Reads, Id, function(elt) {
529
+            cs <- cumsum(elt)
530
+            (cs - cs[1] + 1)/(diff(range(cs)) + 1L)
531
+        })
532
+        id <- tapply(Id, Id, c)
533
+        data.frame(Occurrences = unlist(nOccur),
534
+                   Cumulative = unlist(cumulative), 
535
+                   Id = unlist(id), row.names = NULL)
536
+    })
537
+    xmax <- log10(max(df$Occurrences))
538
+    plt <- xyplot(Cumulative ~ log10(Occurrences) | factor(Id), df,
539
+           xlab = expression(paste("Number of occurrences of each sequence (",
540
+               log[10], ")", sep = "")),
541
+           ylab = "Cumulative proportion of reads", 
542
+           aspect = 2, panel = function(x, y, ..., subscripts, type) {
543
+               lbl <- unique(df$Id[subscripts])
544
+               ltext(xmax, 0.05, lbl, adj = c(1, 0))
545
+               type <- if (1L == length(x)) "p" else "l"
546
+               panel.xyplot(x, y, ..., type = type)
547
+           }, strip = FALSE)
548
+    list(SEQUENCE_USE=.html_img(dest, "sequenceUse", plt))
549
+})
550
+
551
+setMethod(report, "QAFrequentSequence",
552
+          function(x, ..., dest=tempfile(), type="html")
553
+{
554
+    thresholdLabel <- if (is.finite(x@n)) "n" else "k"
555
+    threshold <- as.character(if (is.finite(x@n)) x@n else x@k)
556
+    freqseq <- if (x@reportSequences) {
557
+        seqdf <- lapply(with(values(x), { #with() gets wrong env for .hwrite
558
+            lapply(split(TopCount, Id), function(elt) {
559
+                data.frame(Sequence=names(elt[[1]]),
560
+                           Count=unname(elt[[1]]))
561
+            })
562
+        }), .hwrite)
563
+        paste(Map(function(id, seq) {
564
+            sprintf("<p>Id: %s</p>%s", id, seq)
565
+        }, names(seqdf), seqdf), collapse="\n")
566
+    } else ""
567
+
568
+    df <- values(x)[, c("Id", "Count")]
569
+    list(THRESHOLD_LABEL=thresholdLabel, THRESHOLD=threshold,
570
+         FREQUENT_SEQUENCE_COUNT=.hwrite(df),
571
+         FREQUENT_SEQUENCES=freqseq)
572
+})
573
+
574
+setMethod(report, "QANucleotideByCycle",
575
+          function(x, ..., dest=tempfile(), type="html")
576
+{
577
+    df <- as(values(x), "data.frame")
578
+    df <- df[df$Base != "N" & df$Base != "-", ]
579
+    df$Base <- factor(df$Base)
580
+    xmax <- max(df$Cycle)
581
+    ymax <- log10(max(df$Count))
582
+    plt <-
583
+        xyplot(log10(Count) ~ as.integer(Cycle) | Id,
584
+               group = factor(Base),
585
+               df[order(df$Id, df$Base, df$Cycle),],
586
+               panel = function(..., subscripts) {
587
+                   lbl <- as.character(unique(df$Id[subscripts]))
588
+                   ltext(xmax, ymax, lbl, adj = c(1, 1))
589
+                   panel.xyplot(..., subscripts = subscripts)
590
+               },
591
+               type = "l", col = .dnaCol[1:4],
592
+               key = list(
593
+                 space = "top", lines = list(col = .dnaCol[1:4]),
594
+                 text = list(lab = levels(df$Base)),
595
+                 columns = length(levels(df$Base))),
596
+               xlab = "Cycle", aspect = 2, strip = FALSE)
597
+    list(CYCLE_BASE_CALL=.html_img(dest, "cycleBaseCall", plt))
598
+})
599
+
600
+setMethod(report, "QAQualityByCycle",
601
+          function(x, ..., dest=tempfile(), type="html")
602
+{
603
+    calc_means <- function(x, y, z)
604
+        rowsum(y * z, x)/rowsum(z, x)
605
+    calc_quantile <- function(x, y, z, q = c(0.25, 0.5, 0.75))
606
+        by(list(y, z), x, function(x) {
607
+            scoreRle <- Rle(x[[1]], x[[2]])
608
+            quantile(scoreRle, q)
609
+        })
610
+    df <- as(values(x), "data.frame")
611
+    Id <- df$Id
612
+    pal <- c("#66C2A5", "#FC8D62")
613
+    lvlPal <- c("#F5F5F5", "black")
614
+    rng <- range(df$Count)
615
+    at <- seq(rng[1], rng[2], length.out = 512)
616
+    np <- length(unique(Id))
617
+    nrow <- ceiling(np/4)
618
+    layout <- c(ceiling(np/nrow), nrow)
619
+    ymin <- min(df$Score)
620
+    plt <- xyplot(Score ~ Cycle | Id, df,
621
+           panel = function(x, y, ..., subscripts) {
622
+               z <- df$Count[subscripts]
623
+               mean <- calc_means(x, y, z)
624
+               qtiles <- calc_quantile(x, y, z)
625
+               sxi <- sort(unique(x))
626
+               panel.levelplot(x, y, z, subscripts = TRUE, at = at, 
627
+                               col.regions = colorRampPalette(lvlPal))
628
+               llines(sxi, mean, type = "l", col = pal[[1]], lwd = 1)
629
+               llines(sxi, sapply(qtiles, "[[", 1), type = "l",
630
+                      col = pal[[2]], lwd = 1, lty = 3)
631
+               llines(sxi, sapply(qtiles, "[[", 2), type = "l",
632
+                      col = pal[[2]], lwd = 1)
633
+               llines(sxi, sapply(qtiles, "[[", 3), type = "l",
634
+                      col = pal[[2]], lwd = 1, lty = 3)
635
+               lbl <- as.character(unique(df$Id[subscripts]))
636
+               ltext(1, ymin, lbl, adj = c(0, 0))
637
+           }, ylab = "Quality Score", layout = layout, strip = FALSE)
638
+    list(CYCLE_QUALITY=.html_img(dest, "cycleQualityCall", plt))
639
+})
640
+
641
+setMethod(report, "QA", function(x, ..., dest=tempfile(), type="html") {
642
+    if (any(type != "html"))
643
+        .throw(SRError("UserArgumentMismatch", "'type' must be 'html'"))
644
+    dir.create(dest, recursive=TRUE)
645
+
646
+    sections <- c(system.file("template", "QAHeader.html",
647
+                              package="ShortRead", mustWork=TRUE),
648
+                  x@sources@html, x@filtered@html,
649
+                  sapply(x, slot, "html"),
650
+                  system.file("template", "QAFooter.html",
651
+                              package="ShortRead", mustWork=TRUE))
652
+
653
+    values0 <- c(list(report(x@sources, dest=dest),
654
+                      report(x@filtered, dest=dest)),
655
+                 lapply(x, report, dest=dest))
656
+    values <- setNames(unlist(values0, recursive=FALSE, use.names=FALSE),
657
+                       unlist(lapply(values0, names)))
658
+
659
+    .report_html_do(dest, sections, values, ...)
660
+})