Browse code

general cleanup and fixes, doc updates

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@111183 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 03/12/2015 21:11:09
Showing 13 changed files

... ...
@@ -9,7 +9,7 @@ Description: GSNAP and GMAP are a pair of tools to align short-read
9 9
         methods to work with GMAP and GSNAP from within R. In addition,
10 10
         it provides methods to tally alignment results on a
11 11
         per-nucleotide basis using the bam_tally tool.
12
-Version: 1.13.5
12
+Version: 1.13.6
13 13
 Depends: R (>= 2.15.0), methods, GenomeInfoDb (>= 1.1.3),
14 14
         GenomicRanges (>= 1.17.12)
15 15
 Imports: S4Vectors, IRanges, Rsamtools (>= 1.17.8), rtracklayer (>= 1.31.2),
... ...
@@ -17,12 +17,13 @@ importFrom(Biostrings, getSeq, readDNAStringSet, DNAStringSet)
17 17
 importMethodsFrom(GenomicRanges, seqnames, strand)
18 18
 importMethodsFrom(Rsamtools, asBam)
19 19
 importClassesFrom(GenomicFeatures, TxDb)
20
-importFrom(GenomicFeatures, transcripts, exons)
20
+importFrom(GenomicFeatures, transcripts, exons, exonsBy)
21 21
 importClassesFrom(rtracklayer, RTLFile, FastaFile, RTLFileList)
22
-importFrom(rtracklayer, "referenceSequence<-", import, export, FastaFile)
22
+importFrom(rtracklayer, "referenceSequence<-", import, export, FastaFile,
23
+           FileForFormat)
23 24
 importMethodsFrom(rtracklayer, export)
24 25
 importFrom(VariantAnnotation, readVcf, ScanVcfParam, fixed, VRanges, ref, alt,
25
-           "ref<-", "alt<-", altDepth, refDepth)
26
+           "ref<-", "alt<-", altDepth, refDepth, "vcfWhich<-")
26 27
 importClassesFrom(VariantAnnotation, "VCF", "VRanges")
27 28
 importFrom(BSgenome, getSeq, providerVersion)
28 29
 importFrom(GenomicAlignments, readGAlignments)
... ...
@@ -73,7 +73,7 @@ setMethod("normArgCdsIIT", "GmapGenome", function(exon_iit, genome, BPPARAM) {
73 73
         stop("No map matching the pattern 'genes.iit' found for this GmapGenome")
74 74
     if(length(exon_iit) > 1)
75 75
         stop("Multipe map matching the pattern 'genes.iit' found for this GmapGenome")
76
-    normArgsCdsIIT(exon_iit)
76
+    normArgCdsIIT(exon_iit)
77 77
 })
78 78
 
79 79
 .makeCdsIIT <- function(exons,
... ...
@@ -203,7 +203,7 @@ setAs("GsnapParam", "list", function(from) {
203 203
           to
204 204
       })
205 205
 
206
-as.list.GmapAlignerParam <- function(x) as(x, "list")
206
+as.list.GmapAlignerParam <- function(x, ...) as(x, "list")
207 207
 
208 208
 setAs("ANY", "characterORNULL", function(from) {
209 209
   if (is.null(from))
... ...
@@ -38,7 +38,7 @@ GeneGenome <- function(gene) {
38 38
 exonsToGene <- range
39 39
 
40 40
 getExons <- function(txdb, orgdb, gene) {
41
-  eg <- select(orgdb, gene, "ENTREZID", "SYMBOL")$ENTREZID
41
+  eg <- AnnotationDbi::select(orgdb, gene, "ENTREZID", "SYMBOL")$ENTREZID
42 42
   exons(txdb, vals = list(gene_id=eg))
43 43
 }
44 44
 
... ...
@@ -21,7 +21,7 @@ setMethod("get_genome", "GmapGenome",
21 21
               snpsdir <- directory(snps)
22 22
             }
23 23
             tmpfile <- file.path(tempdir(), "get_genome.fasta")
24
-            .get_genome(path(directory(db)), genome(db), snpsdir = snpsdir,
24
+            .get_genome(path(directory(x)), genome(x), snpsdir = snpsdir,
25 25
                         usesnps = usesnps, snpformat = "2", ...,
26 26
                         .range = range, .redirect = tmpfile)
27 27
             readDNAStringSet(tmpfile)
... ...
@@ -7,7 +7,8 @@
7 7
 ### High-level wrapper
8 8
 ###
9 9
 
10
-setGeneric("iit_store", function(x, dest, BPPARAM=MulticoreParam(1), ...) standardGeneric("iit_store"))
10
+setGeneric("iit_store", function(x, dest, BPPARAM=MulticoreParam(1), ...)
11
+    standardGeneric("iit_store"))
11 12
 
12 13
 gmapRange <- function(x) {
13 14
   pos <- strand(x) == "+"
... ...
@@ -28,7 +29,8 @@ gmapRange <- function(x) {
28 29
     
29 30
     strtpos = if(pos) min(strts) else max(ends)
30 31
     endpos = if(!pos) min(strts) else max(ends)
31
-    hdr = paste0(">", name, " ", runValue(seqnames(grange))[1], ":", strtpos, "..", endpos )
32
+    hdr = paste0(">", name, " ", runValue(seqnames(grange))[1], ":", strtpos,
33
+        "..", endpos)
32 34
 
33 35
     datline = paste(name, name, "NA")
34 36
     rngst = if(pos) strts[o] else ends[o]
... ...
@@ -69,7 +71,7 @@ setMethod("iit_store", c("character"),
69 71
                    label = if (gff) "ID" else NULL)
70 72
           {
71 73
             .iit_store(gff = gff, label = label, sort = "none",
72
-                       output = dest, inputfile = x)
74
+                       output = dest, .inputfile = x)
73 75
           })
74 76
 
75 77
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
... ...
@@ -1,48 +1 @@
1
-.test <- function(dir, pattern = "^test_.*\\.R$")
2
-{
3
-    .failure_details <- function(result) {
4
-        res <- result[[1L]]
5
-        if (res$nFail > 0 || res$nErr > 0) {
6
-            Filter(function(x) length(x) > 0,
7
-                   lapply(res$sourceFileResults,
8
-                          function(fileRes) {
9
-                              names(Filter(function(x) x$kind != "success",
10
-                                           fileRes))
11
-                          }))
12
-        } else list()
13
-    }
14
-
15
-    if (missing(dir)) {
16
-        dir <- system.file("unitTests", package="gmapR")
17
-        if (!length(dir)) {
18
-            dir <- system.file("UnitTests", package="gmapR")
19
-            if (!length(dir))
20
-                stop("unable to find unit tests, no 'unitTests' dir")
21
-        }
22
-    }
23
-    require("RUnit", quietly=TRUE) || stop("RUnit package not found")
24
-    RUnit_opts <- getOption("RUnit", list())
25
-    RUnit_opts$verbose <- 0L
26
-    RUnit_opts$silent <- TRUE
27
-    RUnit_opts$verbose_fail_msg <- TRUE
28
-    options(RUnit = RUnit_opts)
29
-    suite <- defineTestSuite(name="gmapR RUnit Tests", dirs=dir,
30
-                             testFileRegexp=pattern,
31
-                             rngKind="default",
32
-                             rngNormalKind="default")
33
-    result <- runTestSuite(suite)
34
-    cat("\n\n")
35
-    printTextProtocol(result, showDetails=FALSE)
36
-    if (length(details <- .failure_details(result)) >0) {
37
-        cat("\nTest files with failing tests\n")
38
-        for (i in seq_along(details)) {
39
-            cat("\n  ", basename(names(details)[[i]]), "\n")
40
-            for (j in seq_along(details[[i]])) {
41
-                cat("    ", details[[i]][[j]], "\n")
42
-            }
43
-        }
44
-        cat("\n\n")
45
-        stop("unit tests failed for package gmapR")
46
-    }
47
-    result
48
-}
1
+.test <- function() BiocGenerics:::testPackage("gmapR")
... ...
@@ -10,7 +10,9 @@ test_BamTallyParam <- function() {
10 10
                primary_only = FALSE, ignore_duplicates = FALSE,
11 11
                min_depth = 0L, variant_strand = 0L,
12 12
                ignore_query_Ns = FALSE,
13
-               indels = FALSE, include_soft_clips = 0L)
13
+               indels = FALSE, include_soft_clips = 0L,
14
+               xs = FALSE, read_pos = FALSE, min_base_quality = 0L,
15
+               noncovered = FALSE, nm = FALSE)
14 16
   which <- TP53Which()
15 17
   wicked.which <- renameSeqlevels(which, c(TP53 = "chr1"))
16 18
   
... ...
@@ -49,6 +49,7 @@ test_GmapGenome_accessors <- function() {
49 49
   if (file.exists(genomeDir)) unlink(genomeDir, recursive=TRUE)
50 50
   dir.create(genomeDir, recursive=TRUE)
51 51
   on.exit(unlink(genomeDir, recursive=TRUE))
52
+  genomeDir <- normalizePath(genomeDir)
52 53
   gmapGenome <- GmapGenome(genome=dna, directory=genomeDir,
53 54
                            name=genomeName, create=FALSE, k=12)
54 55
   checkIdentical(path(gmapGenome), file.path(genomeDir, genomeName))
... ...
@@ -10,16 +10,5 @@ test_GmapGenomeDirectory <- function() {
10 10
   
11 11
   ##test methods
12 12
   checkIdentical(genome(ggd), character(0))
13
-
14
-  ##for compatibility with Macs
15
-  .unSymLink <- function(d) {
16
-    pieces <- unlist(strsplit(d, "/"))
17
-    unSymed <- Sys.readlink(file.path(pieces[1], pieces[2]))
18
-    if (unSymed != "") {
19
-      pieces[2] <- sub("^/", "", unSymed)
20
-      d <- paste(pieces, collapse="/")
21
-    }
22
-    return(d)
23
-  }
24
-  checkIdentical(.unSymLink(path(ggd)), .unSymLink(genomeDir))
13
+  checkIdentical(path(ggd), normalizePath(genomeDir))
25 14
 }
... ...
@@ -22,7 +22,9 @@
22 22
                 min_depth = 0L, variant_strand = 0L,
23 23
                 ignore_query_Ns = FALSE,
24 24
                 indels = FALSE, include_soft_clips = 0L,
25
-                exon_iit = NULL, IIT_BPPARAM = NULL)
25
+                exon_iit = NULL, IIT_BPPARAM = NULL,
26
+                xs = FALSE, read_pos = FALSE,
27
+                min_base_quality = 0L, noncovered = FALSE, nm = FALSE)
26 28
 }
27 29
 \arguments{
28 30
   \item{genome}{A \code{GmapGenome} object, or something coercible to one.}
... ...
@@ -74,6 +76,20 @@
74 76
     generating the iit file from an R object. Ignored if \code{exon_iit}
75 77
     is a character vector or \code{NULL}
76 78
   }
79
+  \item{xs}{Whether to tabulate reads by XS tag, the aligner's best
80
+    guess about the strand of transcription.
81
+  }
82
+  \item{read_pos}{Whether to tabulate by read position.
83
+  }
84
+  \item{min_base_quality}{Minimum base quality cutoff. Calls of lower
85
+    quality are not counted, except in the total raw depth.
86
+  }
87
+  \item{noncovered}{
88
+    Whether to report zero tallies, where there is no coverage.
89
+  }
90
+  \item{nm}{
91
+    Whether to tally by NM tag, the number of mismatches for a read.
92
+  }
77 93
 }
78 94
 \seealso{
79 95
   \code{\link{bam_tally}}
... ...
@@ -19,8 +19,9 @@
19 19
 \usage{
20 20
 \S4method{bam_tally}{BamFile}(x, param, ...)
21 21
 \S4method{bam_tally}{character}(x, param, ...)
22
-variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
23
-               keep_ref_rows = FALSE, read_length = NA_integer_)
22
+variantSummary(x, read_pos_breaks = NULL,
23
+               keep_ref_rows = FALSE, read_length = NA_integer_,
24
+               high_nm_score = NA_integer_)
24 25
 }
25 26
 
26 27
 \arguments{
... ...
@@ -30,8 +31,6 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
30 31
   \item{read_pos_breaks}{The breaks, like those passed to \code{\link{cut}}
31 32
     for aggregating the per-read position counts. If \code{NULL}, no per-cycle
32 33
     counts are returned.}
33
-  \item{high_base_quality}{The minimum mapping quality for a
34
-    read to be counted as high quality.}
35 34
   \item{keep_ref_rows}{Whether to keep the rows describing only the
36 35
     reference calls, i.e., where ref and alt are the same. These are
37 36
     useful when one needs the reference counts even when there are no
... ...
@@ -39,6 +38,7 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
39 38
   \item{read_length}{The expected read length. If the read length is NA,
40 39
     the MDFNE (median distance from nearest end) statistic will NOT be
41 40
     calculated.}
41
+  \item{high_nm_score}{The value at which an NM value is considered high.}
42 42
   \item{...}{Arguments that override settings in \code{param}.}
43 43
 }
44 44
 
... ...
@@ -55,15 +55,8 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
55 55
   columns are also present:
56 56
   \item{n.read.pos}{The number of unique read positions for the alt allele.}
57 57
   \item{n.read.pos.ref}{The number of unique read positions for the ref allele.}
58
-  \item{raw.count}{The number of reads with the alternate allele,
59
-    \code{NA} for the reference allele row.}
60
-  \item{raw.count.ref}{The number of reads with the reference allele.}
61 58
   \item{raw.count.total}{The total number of reads at that position,
62 59
     including reference and all alternates.}
63
-  \item{mean.quality}{The mean base quality for the alt allele,
64
-    truncated at \code{high_base_quality}.}
65
-  \item{mean.quality.ref}{The mean base quality for the ref allele,
66
-    truncated at \code{high_base_quality}.}
67 60
   \item{count.plus}{The number of positive strand reads for the alternate
68 61
     allele, \code{NA} for the reference allele row.}
69 62
   \item{count.plus.ref}{The number of positive strand reads for the reference
... ...
@@ -72,12 +65,28 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L,
72 65
     allele, \code{NA} for the reference allele row.}
73 66
   \item{count.minus.ref}{The number of negative strand reads for the reference
74 67
     allele.}
68
+  \item{count.del.plus}{The plus strand deletion count over the
69
+    position.}
70
+  \item{count.del.minus}{The minus strand deletion count over the
71
+    position.}
75 72
   \item{read.pos.mean}{Mean read position for the alt allele.}
76 73
   \item{read.pos.mean.ref}{Mean read position for the ref allele.}
77 74
   \item{read.pos.var}{Variance in the read positions for the alt allele.}
78 75
   \item{read.pos.var.ref}{Variance in the read positions for the ref allele.}
79 76
   \item{mdfne}{Median distance from nearest end for the alt allele.}
80 77
   \item{mdfne.ref}{Median distance from nearest end for the ref allele.}
78
+  \item{count.high.nm}{The number of alt reads with an NM value at or above the
79
+    \code{high_nm_score} cutoff.}
80
+  \item{count.high.nm.ref}{The number of ref reads with an NM value at
81
+    or above the \code{high_nm_score} cutoff.}
82
+  
83
+  If codon counting was enabled, there will be a column giving the codon
84
+  strand: \code{codon.strand}.
85
+  
86
+  If the \code{xs} parameter was \code{TRUE}, there will be four
87
+  additional columns giving the counts by aligner-determined
88
+  strand: \code{count.xs.plus}, \code{count.xs.plus.ref},
89
+  \code{count.xs.minus}, and \code{count.xs.minus.ref}.
81 90
   
82 91
   An additional column is present for each bin formed by
83 92
   the \code{read_pos_breaks} parameter, with the read count for that bin.