git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@111183 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -9,7 +9,7 @@ Description: GSNAP and GMAP are a pair of tools to align short-read |
9 | 9 |
methods to work with GMAP and GSNAP from within R. In addition, |
10 | 10 |
it provides methods to tally alignment results on a |
11 | 11 |
per-nucleotide basis using the bam_tally tool. |
12 |
-Version: 1.13.5 |
|
12 |
+Version: 1.13.6 |
|
13 | 13 |
Depends: R (>= 2.15.0), methods, GenomeInfoDb (>= 1.1.3), |
14 | 14 |
GenomicRanges (>= 1.17.12) |
15 | 15 |
Imports: S4Vectors, IRanges, Rsamtools (>= 1.17.8), rtracklayer (>= 1.31.2), |
... | ... |
@@ -17,12 +17,13 @@ importFrom(Biostrings, getSeq, readDNAStringSet, DNAStringSet) |
17 | 17 |
importMethodsFrom(GenomicRanges, seqnames, strand) |
18 | 18 |
importMethodsFrom(Rsamtools, asBam) |
19 | 19 |
importClassesFrom(GenomicFeatures, TxDb) |
20 |
-importFrom(GenomicFeatures, transcripts, exons) |
|
20 |
+importFrom(GenomicFeatures, transcripts, exons, exonsBy) |
|
21 | 21 |
importClassesFrom(rtracklayer, RTLFile, FastaFile, RTLFileList) |
22 |
-importFrom(rtracklayer, "referenceSequence<-", import, export, FastaFile) |
|
22 |
+importFrom(rtracklayer, "referenceSequence<-", import, export, FastaFile, |
|
23 |
+ FileForFormat) |
|
23 | 24 |
importMethodsFrom(rtracklayer, export) |
24 | 25 |
importFrom(VariantAnnotation, readVcf, ScanVcfParam, fixed, VRanges, ref, alt, |
25 |
- "ref<-", "alt<-", altDepth, refDepth) |
|
26 |
+ "ref<-", "alt<-", altDepth, refDepth, "vcfWhich<-") |
|
26 | 27 |
importClassesFrom(VariantAnnotation, "VCF", "VRanges") |
27 | 28 |
importFrom(BSgenome, getSeq, providerVersion) |
28 | 29 |
importFrom(GenomicAlignments, readGAlignments) |
... | ... |
@@ -73,7 +73,7 @@ setMethod("normArgCdsIIT", "GmapGenome", function(exon_iit, genome, BPPARAM) { |
73 | 73 |
stop("No map matching the pattern 'genes.iit' found for this GmapGenome") |
74 | 74 |
if(length(exon_iit) > 1) |
75 | 75 |
stop("Multipe map matching the pattern 'genes.iit' found for this GmapGenome") |
76 |
- normArgsCdsIIT(exon_iit) |
|
76 |
+ normArgCdsIIT(exon_iit) |
|
77 | 77 |
}) |
78 | 78 |
|
79 | 79 |
.makeCdsIIT <- function(exons, |
... | ... |
@@ -203,7 +203,7 @@ setAs("GsnapParam", "list", function(from) { |
203 | 203 |
to |
204 | 204 |
}) |
205 | 205 |
|
206 |
-as.list.GmapAlignerParam <- function(x) as(x, "list") |
|
206 |
+as.list.GmapAlignerParam <- function(x, ...) as(x, "list") |
|
207 | 207 |
|
208 | 208 |
setAs("ANY", "characterORNULL", function(from) { |
209 | 209 |
if (is.null(from)) |
... | ... |
@@ -38,7 +38,7 @@ GeneGenome <- function(gene) { |
38 | 38 |
exonsToGene <- range |
39 | 39 |
|
40 | 40 |
getExons <- function(txdb, orgdb, gene) { |
41 |
- eg <- select(orgdb, gene, "ENTREZID", "SYMBOL")$ENTREZID |
|
41 |
+ eg <- AnnotationDbi::select(orgdb, gene, "ENTREZID", "SYMBOL")$ENTREZID |
|
42 | 42 |
exons(txdb, vals = list(gene_id=eg)) |
43 | 43 |
} |
44 | 44 |
|
... | ... |
@@ -21,7 +21,7 @@ setMethod("get_genome", "GmapGenome", |
21 | 21 |
snpsdir <- directory(snps) |
22 | 22 |
} |
23 | 23 |
tmpfile <- file.path(tempdir(), "get_genome.fasta") |
24 |
- .get_genome(path(directory(db)), genome(db), snpsdir = snpsdir, |
|
24 |
+ .get_genome(path(directory(x)), genome(x), snpsdir = snpsdir, |
|
25 | 25 |
usesnps = usesnps, snpformat = "2", ..., |
26 | 26 |
.range = range, .redirect = tmpfile) |
27 | 27 |
readDNAStringSet(tmpfile) |
... | ... |
@@ -7,7 +7,8 @@ |
7 | 7 |
### High-level wrapper |
8 | 8 |
### |
9 | 9 |
|
10 |
-setGeneric("iit_store", function(x, dest, BPPARAM=MulticoreParam(1), ...) standardGeneric("iit_store")) |
|
10 |
+setGeneric("iit_store", function(x, dest, BPPARAM=MulticoreParam(1), ...) |
|
11 |
+ standardGeneric("iit_store")) |
|
11 | 12 |
|
12 | 13 |
gmapRange <- function(x) { |
13 | 14 |
pos <- strand(x) == "+" |
... | ... |
@@ -28,7 +29,8 @@ gmapRange <- function(x) { |
28 | 29 |
|
29 | 30 |
strtpos = if(pos) min(strts) else max(ends) |
30 | 31 |
endpos = if(!pos) min(strts) else max(ends) |
31 |
- hdr = paste0(">", name, " ", runValue(seqnames(grange))[1], ":", strtpos, "..", endpos ) |
|
32 |
+ hdr = paste0(">", name, " ", runValue(seqnames(grange))[1], ":", strtpos, |
|
33 |
+ "..", endpos) |
|
32 | 34 |
|
33 | 35 |
datline = paste(name, name, "NA") |
34 | 36 |
rngst = if(pos) strts[o] else ends[o] |
... | ... |
@@ -69,7 +71,7 @@ setMethod("iit_store", c("character"), |
69 | 71 |
label = if (gff) "ID" else NULL) |
70 | 72 |
{ |
71 | 73 |
.iit_store(gff = gff, label = label, sort = "none", |
72 |
- output = dest, inputfile = x) |
|
74 |
+ output = dest, .inputfile = x) |
|
73 | 75 |
}) |
74 | 76 |
|
75 | 77 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
... | ... |
@@ -1,48 +1 @@ |
1 |
-.test <- function(dir, pattern = "^test_.*\\.R$") |
|
2 |
-{ |
|
3 |
- .failure_details <- function(result) { |
|
4 |
- res <- result[[1L]] |
|
5 |
- if (res$nFail > 0 || res$nErr > 0) { |
|
6 |
- Filter(function(x) length(x) > 0, |
|
7 |
- lapply(res$sourceFileResults, |
|
8 |
- function(fileRes) { |
|
9 |
- names(Filter(function(x) x$kind != "success", |
|
10 |
- fileRes)) |
|
11 |
- })) |
|
12 |
- } else list() |
|
13 |
- } |
|
14 |
- |
|
15 |
- if (missing(dir)) { |
|
16 |
- dir <- system.file("unitTests", package="gmapR") |
|
17 |
- if (!length(dir)) { |
|
18 |
- dir <- system.file("UnitTests", package="gmapR") |
|
19 |
- if (!length(dir)) |
|
20 |
- stop("unable to find unit tests, no 'unitTests' dir") |
|
21 |
- } |
|
22 |
- } |
|
23 |
- require("RUnit", quietly=TRUE) || stop("RUnit package not found") |
|
24 |
- RUnit_opts <- getOption("RUnit", list()) |
|
25 |
- RUnit_opts$verbose <- 0L |
|
26 |
- RUnit_opts$silent <- TRUE |
|
27 |
- RUnit_opts$verbose_fail_msg <- TRUE |
|
28 |
- options(RUnit = RUnit_opts) |
|
29 |
- suite <- defineTestSuite(name="gmapR RUnit Tests", dirs=dir, |
|
30 |
- testFileRegexp=pattern, |
|
31 |
- rngKind="default", |
|
32 |
- rngNormalKind="default") |
|
33 |
- result <- runTestSuite(suite) |
|
34 |
- cat("\n\n") |
|
35 |
- printTextProtocol(result, showDetails=FALSE) |
|
36 |
- if (length(details <- .failure_details(result)) >0) { |
|
37 |
- cat("\nTest files with failing tests\n") |
|
38 |
- for (i in seq_along(details)) { |
|
39 |
- cat("\n ", basename(names(details)[[i]]), "\n") |
|
40 |
- for (j in seq_along(details[[i]])) { |
|
41 |
- cat(" ", details[[i]][[j]], "\n") |
|
42 |
- } |
|
43 |
- } |
|
44 |
- cat("\n\n") |
|
45 |
- stop("unit tests failed for package gmapR") |
|
46 |
- } |
|
47 |
- result |
|
48 |
-} |
|
1 |
+.test <- function() BiocGenerics:::testPackage("gmapR") |
... | ... |
@@ -10,7 +10,9 @@ test_BamTallyParam <- function() { |
10 | 10 |
primary_only = FALSE, ignore_duplicates = FALSE, |
11 | 11 |
min_depth = 0L, variant_strand = 0L, |
12 | 12 |
ignore_query_Ns = FALSE, |
13 |
- indels = FALSE, include_soft_clips = 0L) |
|
13 |
+ indels = FALSE, include_soft_clips = 0L, |
|
14 |
+ xs = FALSE, read_pos = FALSE, min_base_quality = 0L, |
|
15 |
+ noncovered = FALSE, nm = FALSE) |
|
14 | 16 |
which <- TP53Which() |
15 | 17 |
wicked.which <- renameSeqlevels(which, c(TP53 = "chr1")) |
16 | 18 |
|
... | ... |
@@ -49,6 +49,7 @@ test_GmapGenome_accessors <- function() { |
49 | 49 |
if (file.exists(genomeDir)) unlink(genomeDir, recursive=TRUE) |
50 | 50 |
dir.create(genomeDir, recursive=TRUE) |
51 | 51 |
on.exit(unlink(genomeDir, recursive=TRUE)) |
52 |
+ genomeDir <- normalizePath(genomeDir) |
|
52 | 53 |
gmapGenome <- GmapGenome(genome=dna, directory=genomeDir, |
53 | 54 |
name=genomeName, create=FALSE, k=12) |
54 | 55 |
checkIdentical(path(gmapGenome), file.path(genomeDir, genomeName)) |
... | ... |
@@ -10,16 +10,5 @@ test_GmapGenomeDirectory <- function() { |
10 | 10 |
|
11 | 11 |
##test methods |
12 | 12 |
checkIdentical(genome(ggd), character(0)) |
13 |
- |
|
14 |
- ##for compatibility with Macs |
|
15 |
- .unSymLink <- function(d) { |
|
16 |
- pieces <- unlist(strsplit(d, "/")) |
|
17 |
- unSymed <- Sys.readlink(file.path(pieces[1], pieces[2])) |
|
18 |
- if (unSymed != "") { |
|
19 |
- pieces[2] <- sub("^/", "", unSymed) |
|
20 |
- d <- paste(pieces, collapse="/") |
|
21 |
- } |
|
22 |
- return(d) |
|
23 |
- } |
|
24 |
- checkIdentical(.unSymLink(path(ggd)), .unSymLink(genomeDir)) |
|
13 |
+ checkIdentical(path(ggd), normalizePath(genomeDir)) |
|
25 | 14 |
} |
... | ... |
@@ -22,7 +22,9 @@ |
22 | 22 |
min_depth = 0L, variant_strand = 0L, |
23 | 23 |
ignore_query_Ns = FALSE, |
24 | 24 |
indels = FALSE, include_soft_clips = 0L, |
25 |
- exon_iit = NULL, IIT_BPPARAM = NULL) |
|
25 |
+ exon_iit = NULL, IIT_BPPARAM = NULL, |
|
26 |
+ xs = FALSE, read_pos = FALSE, |
|
27 |
+ min_base_quality = 0L, noncovered = FALSE, nm = FALSE) |
|
26 | 28 |
} |
27 | 29 |
\arguments{ |
28 | 30 |
\item{genome}{A \code{GmapGenome} object, or something coercible to one.} |
... | ... |
@@ -74,6 +76,20 @@ |
74 | 76 |
generating the iit file from an R object. Ignored if \code{exon_iit} |
75 | 77 |
is a character vector or \code{NULL} |
76 | 78 |
} |
79 |
+ \item{xs}{Whether to tabulate reads by XS tag, the aligner's best |
|
80 |
+ guess about the strand of transcription. |
|
81 |
+ } |
|
82 |
+ \item{read_pos}{Whether to tabulate by read position. |
|
83 |
+ } |
|
84 |
+ \item{min_base_quality}{Minimum base quality cutoff. Calls of lower |
|
85 |
+ quality are not counted, except in the total raw depth. |
|
86 |
+ } |
|
87 |
+ \item{noncovered}{ |
|
88 |
+ Whether to report zero tallies, where there is no coverage. |
|
89 |
+ } |
|
90 |
+ \item{nm}{ |
|
91 |
+ Whether to tally by NM tag, the number of mismatches for a read. |
|
92 |
+ } |
|
77 | 93 |
} |
78 | 94 |
\seealso{ |
79 | 95 |
\code{\link{bam_tally}} |
... | ... |
@@ -19,8 +19,9 @@ |
19 | 19 |
\usage{ |
20 | 20 |
\S4method{bam_tally}{BamFile}(x, param, ...) |
21 | 21 |
\S4method{bam_tally}{character}(x, param, ...) |
22 |
-variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L, |
|
23 |
- keep_ref_rows = FALSE, read_length = NA_integer_) |
|
22 |
+variantSummary(x, read_pos_breaks = NULL, |
|
23 |
+ keep_ref_rows = FALSE, read_length = NA_integer_, |
|
24 |
+ high_nm_score = NA_integer_) |
|
24 | 25 |
} |
25 | 26 |
|
26 | 27 |
\arguments{ |
... | ... |
@@ -30,8 +31,6 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L, |
30 | 31 |
\item{read_pos_breaks}{The breaks, like those passed to \code{\link{cut}} |
31 | 32 |
for aggregating the per-read position counts. If \code{NULL}, no per-cycle |
32 | 33 |
counts are returned.} |
33 |
- \item{high_base_quality}{The minimum mapping quality for a |
|
34 |
- read to be counted as high quality.} |
|
35 | 34 |
\item{keep_ref_rows}{Whether to keep the rows describing only the |
36 | 35 |
reference calls, i.e., where ref and alt are the same. These are |
37 | 36 |
useful when one needs the reference counts even when there are no |
... | ... |
@@ -39,6 +38,7 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L, |
39 | 38 |
\item{read_length}{The expected read length. If the read length is NA, |
40 | 39 |
the MDFNE (median distance from nearest end) statistic will NOT be |
41 | 40 |
calculated.} |
41 |
+ \item{high_nm_score}{The value at which an NM value is considered high.} |
|
42 | 42 |
\item{...}{Arguments that override settings in \code{param}.} |
43 | 43 |
} |
44 | 44 |
|
... | ... |
@@ -55,15 +55,8 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L, |
55 | 55 |
columns are also present: |
56 | 56 |
\item{n.read.pos}{The number of unique read positions for the alt allele.} |
57 | 57 |
\item{n.read.pos.ref}{The number of unique read positions for the ref allele.} |
58 |
- \item{raw.count}{The number of reads with the alternate allele, |
|
59 |
- \code{NA} for the reference allele row.} |
|
60 |
- \item{raw.count.ref}{The number of reads with the reference allele.} |
|
61 | 58 |
\item{raw.count.total}{The total number of reads at that position, |
62 | 59 |
including reference and all alternates.} |
63 |
- \item{mean.quality}{The mean base quality for the alt allele, |
|
64 |
- truncated at \code{high_base_quality}.} |
|
65 |
- \item{mean.quality.ref}{The mean base quality for the ref allele, |
|
66 |
- truncated at \code{high_base_quality}.} |
|
67 | 60 |
\item{count.plus}{The number of positive strand reads for the alternate |
68 | 61 |
allele, \code{NA} for the reference allele row.} |
69 | 62 |
\item{count.plus.ref}{The number of positive strand reads for the reference |
... | ... |
@@ -72,12 +65,28 @@ variantSummary(x, read_pos_breaks = NULL, high_base_quality = 0L, |
72 | 65 |
allele, \code{NA} for the reference allele row.} |
73 | 66 |
\item{count.minus.ref}{The number of negative strand reads for the reference |
74 | 67 |
allele.} |
68 |
+ \item{count.del.plus}{The plus strand deletion count over the |
|
69 |
+ position.} |
|
70 |
+ \item{count.del.minus}{The minus strand deletion count over the |
|
71 |
+ position.} |
|
75 | 72 |
\item{read.pos.mean}{Mean read position for the alt allele.} |
76 | 73 |
\item{read.pos.mean.ref}{Mean read position for the ref allele.} |
77 | 74 |
\item{read.pos.var}{Variance in the read positions for the alt allele.} |
78 | 75 |
\item{read.pos.var.ref}{Variance in the read positions for the ref allele.} |
79 | 76 |
\item{mdfne}{Median distance from nearest end for the alt allele.} |
80 | 77 |
\item{mdfne.ref}{Median distance from nearest end for the ref allele.} |
78 |
+ \item{count.high.nm}{The number of alt reads with an NM value at or above the |
|
79 |
+ \code{high_nm_score} cutoff.} |
|
80 |
+ \item{count.high.nm.ref}{The number of ref reads with an NM value at |
|
81 |
+ or above the \code{high_nm_score} cutoff.} |
|
82 |
+ |
|
83 |
+ If codon counting was enabled, there will be a column giving the codon |
|
84 |
+ strand: \code{codon.strand}. |
|
85 |
+ |
|
86 |
+ If the \code{xs} parameter was \code{TRUE}, there will be four |
|
87 |
+ additional columns giving the counts by aligner-determined |
|
88 |
+ strand: \code{count.xs.plus}, \code{count.xs.plus.ref}, |
|
89 |
+ \code{count.xs.minus}, and \code{count.xs.minus.ref}. |
|
81 | 90 |
|
82 | 91 |
An additional column is present for each bin formed by |
83 | 92 |
the \code{read_pos_breaks} parameter, with the read count for that bin. |