Browse code

Add getSeq,GmapGenome method

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@74875 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 26/03/2013 21:44:17
Showing 3 changed files

... ...
@@ -136,6 +136,17 @@ setReplaceMethod("spliceSites", c("GmapGenome", "TranscriptDb"),
136 136
                    x
137 137
                  })
138 138
 
139
+### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
140
+### Sequence access
141
+###
142
+
143
+setMethod("getSeq", "GmapGenome", function(x, which) {
144
+  which <- as(which, "GRanges")
145
+  .Call(R_Genome_getSeq, path(directory(x)), genome(x),
146
+        as.character(seqnames(which)), start(which), width(which),
147
+        as.character(strand(which)))
148
+})
149
+
139 150
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
140 151
 ### Coerce
141 152
 ###
... ...
@@ -93,3 +93,25 @@ test_GmapGenome_spliceSites_replacement <- function() {
93 93
 test_if_GmapGenome_dir_does_not_exist <- function() {
94 94
   checkException(GmapGenome(genome="NoGenome", directory = file.path(tempdir(), "DoesNotExist")))
95 95
 }
96
+
97
+test_GmapGenome_getSeq <- function() {
98
+  genomeName <- "testGenome"
99
+  dna <- Biostrings::DNAStringSet(c(testA = "ACTGTGTCAGTTCATGGGACCGTTGC",
100
+                                    testB = "CAACAAATCCGGG"))
101
+  genomeDir <- tempfile()
102
+  if (file.exists(genomeDir))
103
+    unlink(genomeDir, recursive=TRUE)
104
+  dir.create(genomeDir, recursive=TRUE)
105
+  on.exit(unlink(genomeDir, recursive=TRUE))
106
+  genome <- GmapGenome(genome=dna, directory=genomeDir,
107
+                       name=genomeName, create=TRUE, k=12)
108
+  
109
+  gr <- GRanges(rep(c("testA", "testB"), 2),
110
+                IRanges(c(1, 5, 5, 11), c(1, 10, 7, 10)),
111
+                c("+", "-", "*", "+"))
112
+  seqs <- getSeq(genome, gr)
113
+  checkIdentical(seqs, c("A", "GGATTT", "TGT", ""))
114
+
115
+  seqs <- getSeq(genome, GRanges())
116
+  checkIdentical(seqs, character())
117
+}
... ...
@@ -5,6 +5,7 @@
5 5
 \alias{path,GmapGenome-method}
6 6
 \alias{genome,GmapGenome-method}
7 7
 \alias{seqinfo,GmapGenome-method}
8
+\alias{getSeq,GmapGenome-method}
8 9
 \alias{GmapGenome}
9 10
 \alias{snps<-,GmapGenome,ANY,ANY-method}
10 11
 \alias{spliceSites<-,GmapGenome,GRangesList-method}
... ...
@@ -55,6 +56,14 @@
55 56
   }
56 57
 }
57 58
 
59
+\section{Extracting Genomic Sequence}{
60
+  \item{}{\code{getSeq(x, which)}: Extracts the genomic sequence for
61
+    each region in \code{which} (something coercible to
62
+    \code{GRanges}). The result is a character vector for now. This is
63
+    implemented in C and is very efficient.
64
+  }
65
+}
66
+
58 67
 \section{Accessors}{
59 68
   \describe{
60 69
     \item{}{\code{path(object)}: returns the path to the directory