R/GmapGenome-class.R
b22faf0d
 ### =========================================================================
 ### GmapGenome class
 ### -------------------------------------------------------------------------
 ###
 ### Database of reference sequence used by the GMAP suite.
 ###
 
 setClass("GmapGenome", representation(name = "character",
                                       directory = "GmapGenomeDirectory"))
 
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### Accessors
 ###
 
 directory <- function(x) {
   retVal <- NULL
   if (!is.null(x)) {
     retVal <- x@directory ## 'dir' is already taken
   }
   return(retVal)
 }
 
 setMethod("genome", "GmapGenome", function(x) x@name)
 
 setMethod("path", "GmapGenome",
           function(object) file.path(path(directory(object)), genome(object)))
 
 mapsDirectory <- function(x) {
   file.path(path(x), paste(genome(x), "maps", sep = "."))
 }
 
 setMethod("seqinfo", "GmapGenome", function(x) {
d157a850
   if (!.gmapGenomeCreated(x))
     stop("GmapGenome index '", genome(x), "' does not exist")
6bf0a27e
   suppressWarnings({ # warning when colClasses is too long, even when fill=TRUE!
     tab <- read.table(.get_genome(path(directory(x)), genome(x),
                                   chromosomes = TRUE),
                       colClasses = c("character", "NULL", "integer",
                         "character"),
                       fill = TRUE)
   })
e0faa99c
   Seqinfo(tab[,1], tab[,2], nzchar(tab[,3]), genome = genome(x))
b22faf0d
 })
 
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### Constructor
 ###
 
 setGeneric("genomeName", function(x) standardGeneric("genomeName"))
 
 setMethod("genomeName", "character", function(x) x)
 setMethod("genomeName", "BSgenome", function(x) providerVersion(x))
2332d266
 setMethods("genomeName", list("RTLFile", "RsamtoolsFile"),
            function(x) file_path_sans_ext(basename(path(x)), TRUE))
0771cfdc
 setMethod("genomeName", "ANY", function(x) {
     if (hasMethod("seqinfo", class(x))) {
         ans <- unique(genome(x))
         if (length(ans) > 1L) {
             stop("genome is ambiguous")
         }
         ans
     } else {
         stop("cannot derive a genome name")
     }
 })
b22faf0d
 
66e08db9
 file_path_is_absolute <- function(x) {
   ## hack that is unlikely to work on e.g. Windows
   identical(substring(x, 1, 1), .Platform$file.sep)
 }
 
 file_path_is_dir <- function(x) {
   isTRUE(file.info(x)[,"isdir"])
 }
 
b22faf0d
 GmapGenome <- function(genome,
                        directory = GmapGenomeDirectory(create = create), 
                        name = genomeName(genome), create = FALSE, ...)
 {
   if (!isTRUEorFALSE(create))
     stop("'create' must be TRUE or FALSE")
66e08db9
   if (isSingleString(genome) && file_path_is_dir(genome)) {
     genome <- path.expand(genome)
     if (file_path_is_absolute(genome)) {
       if (!missing(directory))
         stop("'directory' should be missing when 'genome' is an absolute path")
       directory <- dirname(genome)
       genome <- basename(genome)
     }
   }
b22faf0d
   if (isSingleString(directory))
     directory <- GmapGenomeDirectory(directory, create = create)
   if (is(genome, "DNAStringSet")) {
     if (missing(name))
       stop("If the genome argument is a DNAStringSet object",
fa2ff4e6
            "the name argument must be provided")
     if (is.null(names(genome))) {
       stop("If the genome is provided as a DNAStringSet, ",
            "the genome needs to have names. ",
            "E.g., \"names(genome) <- someSeqNames")
     }
b22faf0d
   }
   if (!isSingleString(name))
     stop("'name' must be a single, non-NA string")
   if (!is(directory, "GmapGenomeDirectory"))
     stop("'directory' must be a GmapGenomeDirectory object or path to one")
   db <- new("GmapGenome", name = name, directory = directory)
   if (create) {
     if (name %in% genome(directory))
       message("NOTE: genome '", name, "' already exists, not overwriting")
     else referenceSequence(db, ...) <- genome
   }
   db
 }
 
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### Building
 ###
 
 setReplaceMethod("referenceSequence",
                  signature(x = "GmapGenome", value = "ANY"),
                  function(x, name, ..., value)
                  {
7545fca9
                    gmap_build(value, x, ...)
b22faf0d
                  })
 
997c9d1a
 setGeneric("snps<-", function(x, name, ..., value) standardGeneric("snps<-"))
b22faf0d
 
 setReplaceMethod("snps", c("GmapGenome", "ANY"),
9c8da223
                  function(x, name, ..., value) {
b22faf0d
                    snpDir <- GmapSnpDirectory(x)
                    snps(snpDir, name = name, genome = x,
                         iitPath = mapsDirectory(x), ...) <- value
                    x
                  })
 
 setGeneric("spliceSites<-",
            function(x, ..., value) standardGeneric("spliceSites<-"))
 
5fdf6426
 setReplaceMethod("spliceSites", c("GmapGenome", "GRangesList"),
b22faf0d
                  function(x, name, value) {
5fdf6426
                    exonsFlat <- unlist(value, use.names=FALSE)
6dc12e0e
                    exonsPart <- PartitioningByWidth(value)
b22faf0d
                    exonsHead <- exonsFlat[-end(exonsPart)]
5fdf6426
                    donors <- flank(exonsHead, 1L, start = FALSE)
b22faf0d
                    exonsTail <- exonsFlat[-start(exonsPart)]
5fdf6426
                    acceptors <- flank(exonsTail, 1L, start = TRUE)
                    sites <- c(resize(donors, 2L, fix = "end"),
                               resize(acceptors, 2L, fix = "start"))
b22faf0d
                    names(sites) <- values(sites)$exon_id
                    info <- rep(c("donor", "acceptor"), each = length(donors))
f08d5803
                    intronWidths <- abs(start(acceptors) - start(donors)) + 1L
5fdf6426
                    info <- paste(info, intronWidths)
b22faf0d
                    values(sites) <- DataFrame(info)
                    iit_store(sites, file.path(mapsDirectory(x), name))
                    x
                  })
 
c78be696
 #setReplaceMethod("spliceSites", c("GmapGenome", "TranscriptDb"),
c0cce6c7
 setReplaceMethod("spliceSites", c("GmapGenome", "TxDb"),
5fdf6426
                  function(x, name, value) {
                    spliceSites(x, name) <- exonsBy(value)
                    x
                  })
 
5db28c49
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### Sequence access
 ###
 
8edb187c
 setMethod("getSeq", "GmapGenome", function(x, which = seqinfo(x)) {
b19add7c
   if (!.gmapGenomeCreated(x))
4f2f27e4
       stop("Genome index does not exist")
   if (is.character(which)) {
       which <- seqinfo(x)[which]
   }
5db28c49
   which <- as(which, "GRanges")
53f63871
   merge(seqinfo(x), seqinfo(which)) # for the checks
2f697caa
   ans <- .Call(R_Genome_getSeq, path(directory(x)), genome(x),
                as.character(seqnames(which)), start(which), width(which),
                as.character(strand(which)))
609d2fce
   if (!is.null(names(which)))
     names(ans) <- names(which)
2f697caa
   ans
5db28c49
 })
 
77a739af
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### Coerce
 ###
 
 setAs("ANY", "GmapGenome", function(from) GmapGenome(from))
 
2f697caa
 setAs("GmapGenome", "DNAStringSet", function(from) {
   DNAStringSet(getSeq(from))
 })
 
b22faf0d
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### Show
 ###
 
 setMethod("show", "GmapGenome", function(object) {
   cat("GmapGenome object\ngenome:", genome(object), "\ndirectory:",
       path(directory(object)), "\n")
 })
 
f18863b8
 setMethod("showAsCell", "GmapGenome", function(object) genome(object))
 
b22faf0d
 ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 ### Utilities
 ###
 
 .normArgDb <- function(db, dir) {
   if (!is(db, "GmapGenome"))
     db <- GmapGenome(db, dir)
   db
 }
 
b19add7c
 
 .gmapGenomeCreated <- function(genome) {
   ##existance means the GENOME_NAME.chromosome exists
   
   d <- path(directory(genome))
   if (!file.exists(d))
     return(FALSE)
 
   chromosome.file <- paste(genome(genome), "chromosome", sep=".")
   possibleLoc1 <- file.path(d, chromosome.file)
   possibleLoc2 <- file.path(d, genome(genome), chromosome.file)
   if (!(file.exists(possibleLoc1) || file.exists(possibleLoc2)))
     return(FALSE)
 
   return(TRUE)
 }