b22faf0d |
### =========================================================================
### GmapGenome class
### -------------------------------------------------------------------------
###
### Database of reference sequence used by the GMAP suite.
###
setClass("GmapGenome", representation(name = "character",
directory = "GmapGenomeDirectory"))
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Accessors
###
directory <- function(x) {
retVal <- NULL
if (!is.null(x)) {
retVal <- x@directory ## 'dir' is already taken
}
return(retVal)
}
setMethod("genome", "GmapGenome", function(x) x@name)
setMethod("path", "GmapGenome",
function(object) file.path(path(directory(object)), genome(object)))
mapsDirectory <- function(x) {
file.path(path(x), paste(genome(x), "maps", sep = "."))
}
setMethod("seqinfo", "GmapGenome", function(x) {
|
d157a850 |
if (!.gmapGenomeCreated(x))
stop("GmapGenome index '", genome(x), "' does not exist")
|
6bf0a27e |
suppressWarnings({ # warning when colClasses is too long, even when fill=TRUE!
tab <- read.table(.get_genome(path(directory(x)), genome(x),
chromosomes = TRUE),
colClasses = c("character", "NULL", "integer",
"character"),
fill = TRUE)
})
|
e0faa99c |
Seqinfo(tab[,1], tab[,2], nzchar(tab[,3]), genome = genome(x))
|
b22faf0d |
})
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Constructor
###
setGeneric("genomeName", function(x) standardGeneric("genomeName"))
setMethod("genomeName", "character", function(x) x)
setMethod("genomeName", "BSgenome", function(x) providerVersion(x))
|
2332d266 |
setMethods("genomeName", list("RTLFile", "RsamtoolsFile"),
function(x) file_path_sans_ext(basename(path(x)), TRUE))
|
0771cfdc |
setMethod("genomeName", "ANY", function(x) {
if (hasMethod("seqinfo", class(x))) {
ans <- unique(genome(x))
if (length(ans) > 1L) {
stop("genome is ambiguous")
}
ans
} else {
stop("cannot derive a genome name")
}
})
|
b22faf0d |
|
66e08db9 |
file_path_is_absolute <- function(x) {
## hack that is unlikely to work on e.g. Windows
identical(substring(x, 1, 1), .Platform$file.sep)
}
file_path_is_dir <- function(x) {
isTRUE(file.info(x)[,"isdir"])
}
|
b22faf0d |
GmapGenome <- function(genome,
directory = GmapGenomeDirectory(create = create),
name = genomeName(genome), create = FALSE, ...)
{
if (!isTRUEorFALSE(create))
stop("'create' must be TRUE or FALSE")
|
66e08db9 |
if (isSingleString(genome) && file_path_is_dir(genome)) {
genome <- path.expand(genome)
if (file_path_is_absolute(genome)) {
if (!missing(directory))
stop("'directory' should be missing when 'genome' is an absolute path")
directory <- dirname(genome)
genome <- basename(genome)
}
}
|
b22faf0d |
if (isSingleString(directory))
directory <- GmapGenomeDirectory(directory, create = create)
if (is(genome, "DNAStringSet")) {
if (missing(name))
stop("If the genome argument is a DNAStringSet object",
|
fa2ff4e6 |
"the name argument must be provided")
if (is.null(names(genome))) {
stop("If the genome is provided as a DNAStringSet, ",
"the genome needs to have names. ",
"E.g., \"names(genome) <- someSeqNames")
}
|
b22faf0d |
}
if (!isSingleString(name))
stop("'name' must be a single, non-NA string")
if (!is(directory, "GmapGenomeDirectory"))
stop("'directory' must be a GmapGenomeDirectory object or path to one")
db <- new("GmapGenome", name = name, directory = directory)
if (create) {
if (name %in% genome(directory))
message("NOTE: genome '", name, "' already exists, not overwriting")
else referenceSequence(db, ...) <- genome
}
db
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Building
###
setReplaceMethod("referenceSequence",
signature(x = "GmapGenome", value = "ANY"),
function(x, name, ..., value)
{
|
7545fca9 |
gmap_build(value, x, ...)
|
b22faf0d |
})
|
997c9d1a |
setGeneric("snps<-", function(x, name, ..., value) standardGeneric("snps<-"))
|
b22faf0d |
setReplaceMethod("snps", c("GmapGenome", "ANY"),
|
9c8da223 |
function(x, name, ..., value) {
|
b22faf0d |
snpDir <- GmapSnpDirectory(x)
snps(snpDir, name = name, genome = x,
iitPath = mapsDirectory(x), ...) <- value
x
})
setGeneric("spliceSites<-",
function(x, ..., value) standardGeneric("spliceSites<-"))
|
5fdf6426 |
setReplaceMethod("spliceSites", c("GmapGenome", "GRangesList"),
|
b22faf0d |
function(x, name, value) {
|
5fdf6426 |
exonsFlat <- unlist(value, use.names=FALSE)
|
6dc12e0e |
exonsPart <- PartitioningByWidth(value)
|
b22faf0d |
exonsHead <- exonsFlat[-end(exonsPart)]
|
5fdf6426 |
donors <- flank(exonsHead, 1L, start = FALSE)
|
b22faf0d |
exonsTail <- exonsFlat[-start(exonsPart)]
|
5fdf6426 |
acceptors <- flank(exonsTail, 1L, start = TRUE)
sites <- c(resize(donors, 2L, fix = "end"),
resize(acceptors, 2L, fix = "start"))
|
b22faf0d |
names(sites) <- values(sites)$exon_id
info <- rep(c("donor", "acceptor"), each = length(donors))
|
f08d5803 |
intronWidths <- abs(start(acceptors) - start(donors)) + 1L
|
5fdf6426 |
info <- paste(info, intronWidths)
|
b22faf0d |
values(sites) <- DataFrame(info)
iit_store(sites, file.path(mapsDirectory(x), name))
x
})
|
c78be696 |
#setReplaceMethod("spliceSites", c("GmapGenome", "TranscriptDb"),
|
c0cce6c7 |
setReplaceMethod("spliceSites", c("GmapGenome", "TxDb"),
|
5fdf6426 |
function(x, name, value) {
spliceSites(x, name) <- exonsBy(value)
x
})
|
5db28c49 |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Sequence access
###
|
8edb187c |
setMethod("getSeq", "GmapGenome", function(x, which = seqinfo(x)) {
|
b19add7c |
if (!.gmapGenomeCreated(x))
|
4f2f27e4 |
stop("Genome index does not exist")
if (is.character(which)) {
which <- seqinfo(x)[which]
}
|
5db28c49 |
which <- as(which, "GRanges")
|
53f63871 |
merge(seqinfo(x), seqinfo(which)) # for the checks
|
2f697caa |
ans <- .Call(R_Genome_getSeq, path(directory(x)), genome(x),
as.character(seqnames(which)), start(which), width(which),
as.character(strand(which)))
|
609d2fce |
if (!is.null(names(which)))
names(ans) <- names(which)
|
2f697caa |
ans
|
5db28c49 |
})
|
77a739af |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Coerce
###
setAs("ANY", "GmapGenome", function(from) GmapGenome(from))
|
2f697caa |
setAs("GmapGenome", "DNAStringSet", function(from) {
DNAStringSet(getSeq(from))
})
|
b22faf0d |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Show
###
setMethod("show", "GmapGenome", function(object) {
cat("GmapGenome object\ngenome:", genome(object), "\ndirectory:",
path(directory(object)), "\n")
})
|
f18863b8 |
setMethod("showAsCell", "GmapGenome", function(object) genome(object))
|
b22faf0d |
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Utilities
###
.normArgDb <- function(db, dir) {
if (!is(db, "GmapGenome"))
db <- GmapGenome(db, dir)
db
}
|
b19add7c |
.gmapGenomeCreated <- function(genome) {
##existance means the GENOME_NAME.chromosome exists
d <- path(directory(genome))
if (!file.exists(d))
return(FALSE)
chromosome.file <- paste(genome(genome), "chromosome", sep=".")
possibleLoc1 <- file.path(d, chromosome.file)
possibleLoc2 <- file.path(d, genome(genome), chromosome.file)
if (!(file.exists(possibleLoc1) || file.exists(possibleLoc2)))
return(FALSE)
return(TRUE)
}
|