git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@74875 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -136,6 +136,17 @@ setReplaceMethod("spliceSites", c("GmapGenome", "TranscriptDb"), |
136 | 136 |
x |
137 | 137 |
}) |
138 | 138 |
|
139 |
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
|
140 |
+### Sequence access |
|
141 |
+### |
|
142 |
+ |
|
143 |
+setMethod("getSeq", "GmapGenome", function(x, which) { |
|
144 |
+ which <- as(which, "GRanges") |
|
145 |
+ .Call(R_Genome_getSeq, path(directory(x)), genome(x), |
|
146 |
+ as.character(seqnames(which)), start(which), width(which), |
|
147 |
+ as.character(strand(which))) |
|
148 |
+}) |
|
149 |
+ |
|
139 | 150 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
140 | 151 |
### Coerce |
141 | 152 |
### |
... | ... |
@@ -93,3 +93,25 @@ test_GmapGenome_spliceSites_replacement <- function() { |
93 | 93 |
test_if_GmapGenome_dir_does_not_exist <- function() { |
94 | 94 |
checkException(GmapGenome(genome="NoGenome", directory = file.path(tempdir(), "DoesNotExist"))) |
95 | 95 |
} |
96 |
+ |
|
97 |
+test_GmapGenome_getSeq <- function() { |
|
98 |
+ genomeName <- "testGenome" |
|
99 |
+ dna <- Biostrings::DNAStringSet(c(testA = "ACTGTGTCAGTTCATGGGACCGTTGC", |
|
100 |
+ testB = "CAACAAATCCGGG")) |
|
101 |
+ genomeDir <- tempfile() |
|
102 |
+ if (file.exists(genomeDir)) |
|
103 |
+ unlink(genomeDir, recursive=TRUE) |
|
104 |
+ dir.create(genomeDir, recursive=TRUE) |
|
105 |
+ on.exit(unlink(genomeDir, recursive=TRUE)) |
|
106 |
+ genome <- GmapGenome(genome=dna, directory=genomeDir, |
|
107 |
+ name=genomeName, create=TRUE, k=12) |
|
108 |
+ |
|
109 |
+ gr <- GRanges(rep(c("testA", "testB"), 2), |
|
110 |
+ IRanges(c(1, 5, 5, 11), c(1, 10, 7, 10)), |
|
111 |
+ c("+", "-", "*", "+")) |
|
112 |
+ seqs <- getSeq(genome, gr) |
|
113 |
+ checkIdentical(seqs, c("A", "GGATTT", "TGT", "")) |
|
114 |
+ |
|
115 |
+ seqs <- getSeq(genome, GRanges()) |
|
116 |
+ checkIdentical(seqs, character()) |
|
117 |
+} |
... | ... |
@@ -5,6 +5,7 @@ |
5 | 5 |
\alias{path,GmapGenome-method} |
6 | 6 |
\alias{genome,GmapGenome-method} |
7 | 7 |
\alias{seqinfo,GmapGenome-method} |
8 |
+\alias{getSeq,GmapGenome-method} |
|
8 | 9 |
\alias{GmapGenome} |
9 | 10 |
\alias{snps<-,GmapGenome,ANY,ANY-method} |
10 | 11 |
\alias{spliceSites<-,GmapGenome,GRangesList-method} |
... | ... |
@@ -55,6 +56,14 @@ |
55 | 56 |
} |
56 | 57 |
} |
57 | 58 |
|
59 |
+\section{Extracting Genomic Sequence}{ |
|
60 |
+ \item{}{\code{getSeq(x, which)}: Extracts the genomic sequence for |
|
61 |
+ each region in \code{which} (something coercible to |
|
62 |
+ \code{GRanges}). The result is a character vector for now. This is |
|
63 |
+ implemented in C and is very efficient. |
|
64 |
+ } |
|
65 |
+} |
|
66 |
+ |
|
58 | 67 |
\section{Accessors}{ |
59 | 68 |
\describe{ |
60 | 69 |
\item{}{\code{path(object)}: returns the path to the directory |