############################################################################# ############################################################################# ### ### AnnDbPkg-maker.R file ### ############################################################################# ############################################################################# ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### The "AnnDbPkgSeed" class. ### setClass( "AnnDbPkgSeed", representation( Package="character", # e.g. "hgu133a2.db" Title="character", Version="character", # e.g. "0.0.99" License="character", Author="character", Maintainer="character", PkgTemplate="character", # e.g. "HUMANCHIP.DB" DBschema="character", # e.g. "HUMANCHIP_DB" AnnObjPrefix="character", # e.g. "hgu133a2" AnnObjTarget="character", # e.g. "chip hgu133a2" organism="character", # e.g. "Homo sapiens" species="character", # e.g. "Human" manufacturer="character", # e.g. "Affymetrix" chipName="character", # e.g. "Human Genome U133A 2.0 Array" manufacturerUrl="character", # e.g. "http://www.affymetrix.com/support/technical/byproduct.affx?product=hgu133-20" biocViews="character" ), prototype( Title=as.character(NA), License="Artistic-2.0", Author="Marc Carlson", Maintainer="Bioconductor Package Maintainer <maintainer@bioconductor.org>", DBschema=as.character(NA), AnnObjPrefix=as.character(NA), AnnObjTarget=as.character(NA), organism=as.character(NA), species=as.character(NA), manufacturer=as.character(NA), chipName=as.character(NA), manufacturerUrl=as.character(NA), biocViews=as.character(NA) ) ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Some helper functions. ### initWithDbMetada <- function(x, dbfile) { metadata2slot <- c( DBSCHEMA="DBschema", ORGANISM="organism", SPECIES="species", MANUFACTURER="manufacturer", CHIPNAME="chipName", MANUFACTURERURL="manufacturerUrl" ) dbconn <- dbFileConnect(dbfile) on.exit(dbFileDisconnect(dbconn)) metadata <- AnnotationDbi:::dbGetTable(dbconn, "metadata") if (!identical(colnames(metadata), c("name", "value"))) stop("\"metadata\" table has unexpected col names") if (any(duplicated(metadata$name))) { stop("col \"name\" in \"metadata\" table has duplicated values\n", " (this would never happen if \"name\" was defined as a PRIMARY KEY!)") } row.names(metadata) <- metadata$name for (i in seq_len(length(metadata2slot))) { metadata_name <- names(metadata2slot)[i] if (!(metadata_name %in% row.names(metadata))) { if (metadata_name == "DBSCHEMA") stop("'DBSCHEMA' not found in \"metadata\" table") next } slot_name <- metadata2slot[i] val <- metadata[metadata_name, "value"] if (is.na(slot(x, slot_name))) { slot(x, slot_name) <- val next } if (slot(x, slot_name) != val) stop(metadata_name, " specified in '", dbfile, "' (\"", val, "\") ", "doesn't match 'x@", slot_name, "' (\"", slot(x, slot_name), "\")") } if (is.na(x@manufacturerUrl)) { x@manufacturerUrl <- "" warning("no manufacturerUrl for package ", x@Package) } x } initComputedSlots <- function(x) { if (is.na(x@AnnObjPrefix)) stop("'AnnObjPrefix' slot must be set for package ", x@Package) ## Automatic default for "AnnObjTarget" slot if (is.na(x@AnnObjTarget)) x@AnnObjTarget <- paste("chip", x@AnnObjPrefix) ## Automatic default for "Title" slot if (is.na(x@Title)) { if (is.na(x@manufacturer) || is.na(x@chipName) || is.na(x@AnnObjTarget)) { warning("not enough information to set the 'Title' slot for package ", x@Package) } else { x@Title <- paste(x@manufacturer, " ", x@chipName, " annotation data (", x@AnnObjTarget, ")", sep="") } } ## Automatic default for "biocViews" slot if (is.na(x@biocViews) && !is.na(x@organism) && !is.na(x@manufacturer)) { chip_view <- paste(x@manufacturer, "Chip", sep="") org_view <- chartr(" ", "_", x@organism) x@biocViews <- paste("AnnotationData", chip_view, org_view, x@AnnObjPrefix, sep=", ") } x } initWithDbDoc <- function(dbfile) { dbconn <- dbFileConnect(dbfile) on.exit(dbFileDisconnect(dbconn)) map_metadata <- AnnotationDbi:::dbGetTable(dbconn, "map_metadata") map_metadata } getSymbolValuesForManPages <- function(map_names, dbfile) { map_metadata <- initWithDbDoc(dbfile) map_source <- sapply(map_names, function(this_map) { map_index <- which(map_metadata$map_name == this_map) if (length(map_index) > 0) { this_source <- paste( map_metadata[map_index, "source_name"], " \n ", map_metadata[map_index, "source_url"], " \n With a date stamp from the source of:", map_metadata[map_index, "source_date"], sep=" ", collapse=" and ") } else { this_source <- NA } this_source }) map_source <- gsub("_", "\\_", map_source, fixed=TRUE) names(map_source) <- paste(map_names, "SOURCE", sep="") map_source } removeCommentsFromFile <- function(infile, outfile) { if (!is.character(infile) || length(infile) != 1 || is.na(infile)) stop("'infile' must be a character string naming a file") if (!is.character(outfile) || length(outfile) != 1 || is.na(outfile)) stop("'outfile' must be a character string naming a file") if (file.exists(outfile)) stop("file '", outfile, "' already exists") infile <- file(infile, "r") #on.exit(close(infile)) outfile <- file(outfile, "w") #on.exit(close(outfile)) # doesn't seem to work while (TRUE) { text <- readLines(infile, n=1) if (length(text) == 0) break if (substr(text, 1, 1) != "#") writeLines(text, outfile) } close(outfile) close(infile) } loadAnnDbPkgIndex <- function(file) { if (missing(file)) { file <- system.file("extdata", "GentlemanLab", "ANNDBPKG-INDEX.TXT", package="AnnotationForge") } else { if (!is.character(file) || length(file) != 1 || is.na(file)) stop("'file' must be a character string naming a file") } tmp_file <- file.path(tempdir(), paste(basename(file), "tmp", sep=".")) removeCommentsFromFile(file, tmp_file) index <- read.dcf(tmp_file) file.remove(tmp_file) index } ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Helpers for filtering out innapropriate manual pages from a template. ### ## This function takes the seed and lists the Mappings listMappings <- function(x, type){ ## get seeds ## seeds <- eval(parse(text=paste("AnnotationDbi:::",x@DBschema, ## "_AnnDbBimap_seeds", sep=""))) schema <- x@DBschema ## schema will be like "HUMANCHIP_DB" or "HUMAN_DB" if(type=="ChipDb"){ orgDbName <- AnnotationDbi:::getOrgPkgForSchema(schema) allSeeds <- AnnotationDbi:::NCBICHIP_DB_SeedGenerator(orgDbName) }else if(type=="OrgDb"){ allSeeds <- AnnotationDbi:::NCBIORG_DB_SeedGenerator() } seeds <- AnnotationDbi:::.filterSeeds(allSeeds, schema, type) ## Then get the names unlist(lapply(seeds, function(x){return(x$objName)})) } ## This function will translate the from the actual mappings to the requisite ## man pages. The whole point is to get rid of things like flybase from ## humans etc. ## ALSO problematic: CHRLENGTHS.Rd, GO2ALLEGS.Rd UCSCGENES.Rd filterManPages <- function(doc_template_names, maps, x){ docs <- sub("\\.Rd$", "", doc_template_names) docs <- docs[docs %in% maps] ## Add things that will always be needed but are not themselves really bimaps docs <- c(docs, "_dbconn" ,"BASE","ORGANISM","MAPCOUNTS") if(!any(c("ECOLI_DB","XENOPUS_DB","ECOLICHIP_DB","XENOPUSCHIP_DB","PIG_DB" ,"PIGCHIP_DB") %in% x@DBschema)){ docs <- c(docs, "CHRLENGTHS") } paste(docs, ".Rd", sep="") } ## And I need a wrapper function to help me filter out things that are not in ## the manList when I call createPackage. .createAnnotPackage <-function(pkgname,destinationDir,originDir,symbolValues, manList, unlink=FALSE, quiet=FALSE){ tdir <- file.path("TEMPANNOTPACKAGEDIRFORFILTERING") dir.create(tdir) # tdir <- file.path(tempdir()) ## tempdir() causes strange errors... :( file.copy(from = dir(originDir, full.names = TRUE), to = tdir, recursive = TRUE) ## Then unlink unwanted man pages from tdir manDir <- file.path(tdir, "man") manFiles <- dir(manDir) rmFiles <- manFiles[!(manFiles %in% manList)] rmFiles <- file.path(manDir, rmFiles) unlink(rmFiles) ## Then call createPackage createPackage(pkgname=pkgname, destinationDir=destinationDir, originDir=tdir, symbolValues=symbolValues, unlink=unlink, quiet=quiet) ## Then remove our terrible temp dir unlink(tdir, recursive=TRUE) ## Will need to return to tempdir() if we ever want to be able to do more ## than one at a time... :( } ## TESTING: ## library(AnnotationForge) ## debug(AnnotationForge:::.createAnnotPackage) ## debug(AnnotationForge:::.makeAnnDbPkg) ## this one is always called. ## debug(AnnotationForge:::.makeAnnDbPkgs) ## This one is called 1st for mine ## debug(AnnotationForge:::.makeAnnDbPkgList) ## called for others ## source("~/proj/Rpacks/AnnotationForge/inst/extdata/GentlemanLab/org-batch-script.R") ## source("~/proj/Rpacks/AnnotationForge/inst/extdata/GentlemanLab/chip-batch-script.R") ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### The "makeAnnDbPkg" new generic. ### setGeneric("makeAnnDbPkg", signature="x", function(x, dbfile, dest_dir=".", no.man=FALSE, ...) standardGeneric("makeAnnDbPkg") ) ## helper to extract metadata .getOrgDepFromMetadata <- function(dbfile){ con <- dbConnect(SQLite(), dbfile) sqliteQuickSQL(con, "SELECT value FROM metadata WHERE name = 'ORGPKGDEP'") } .makeAnnDbPkg <- function(x, dbfile, dest_dir=".", no.man=FALSE, ...){ x <- initWithDbMetada(x, dbfile) x <- initComputedSlots(x) dbfile_basename <- basename(dbfile) if (dbfile_basename != paste(x@AnnObjPrefix, ".sqlite", sep="")) stop("'", dbfile, "': File name doesn't match 'x@AnnObjPrefix' (", x@AnnObjPrefix, ")") if (!grepl("^/", x@PkgTemplate)[1]) { ##TODO: this regex seems hacky? template_path <- system.file("AnnDbPkg-templates", x@PkgTemplate, package="AnnotationForge") } else { template_path <- x@PkgTemplate } ann_dbi_version <- installed.packages()['AnnotationDbi','Version'] ## only define 'org_version' if we are making a chipDb package. ## Otherwise it will only cause trouble. con1 <- dbConnect(dbDriver("SQLite"), dbfile) type <- dbGetQuery(con1, "SELECT value FROM metadata WHERE name='Db type'") if(type=="ChipDb"){ org_version <- installed.packages()['org.Hs.eg.db','Version'] ## NOCHIPSCHEMA DBs know who they depend on if(x@DBschema=="NOCHIPSCHEMA_DB"){ org_pkg <- as.character(.getOrgDepFromMetadata(dbfile)) }else{ org_pkg <- paste0(AnnotationDbi:::getOrgPkgForSchema(x@DBschema),".db") } }else{ org_version <- "no org version date" org_pkg <- "no org pkg required" } symvals <- list( DBSCHEMA=x@DBschema, PKGTITLE=x@Title, ANNOBJPREFIX=x@AnnObjPrefix, ANNOBJTARGET=x@AnnObjTarget, ORGANISM=x@organism, SPECIES=x@species, MANUF=x@manufacturer, CHIPNAME=x@chipName, MANUFURL=x@manufacturerUrl, AUTHOR=x@Author, MAINTAINER=x@Maintainer, PKGVERSION=x@Version, LIC=x@License, BIOCVIEWS=x@biocViews, DBFILE=dbfile_basename, ANNDBIVERSION=ann_dbi_version, ORGVERSION=org_version, ORGPKGDEP=org_pkg ) man_dir <- file.path(template_path, "man") if (file.exists(man_dir)) { if (!no.man) { doc_template_names <- list.files(man_dir, "\\.Rd$") #is_static <- doc_template_names %in% c("_dbconn.Rd", "_dbfile.Rd") #doc_template_names <- doc_template_names[!is_static] ## Do this only if your schema is an NCBI* one. if(grepl("NCBI",x@PkgTemplate)){ ## extract the map_names from the bimap definitions map_names <- listMappings(x, type) ## now use this info to filter to relevant mappings doc_template_names <- filterManPages(doc_template_names, maps=map_names,x) }else{## if old school, just use the man pages in template map_names <- sub("\\.Rd$", "", doc_template_names) } if (length(map_names) != 0) symvals <- c(symvals, getSymbolValuesForManPages(map_names, dbfile)) } else { unlink(man_dir, recursive=TRUE) } } if (any(duplicated(names(symvals)))) { str(symvals) stop("'symvals' contains duplicated symbols (see above)") } .createAnnotPackage(x@Package, destinationDir=dest_dir, originDir=template_path, symbolValues=symvals, manList=doc_template_names) ## rename Rd files (prepend the pkg name) ## Here is also where we put the man files into the package (after renaming them) if (file.exists(man_dir) && !no.man && length(doc_template_names) != 0) { doc_path <- file.path(dest_dir, x@Package, "man") from_doc_names <- paste(doc_path, doc_template_names, sep=.Platform$file.sep) to_doc_names <- paste(x@AnnObjPrefix, doc_template_names, sep="") to_doc_names <- paste(doc_path, to_doc_names, sep=.Platform$file.sep) mapply(file.rename, from_doc_names, to_doc_names) } dest_db_dir <- file.path(dest_dir, x@Package, "inst", "extdata") if (!file.exists(dest_db_dir) && !dir.create(dest_db_dir, recursive=TRUE)) stop("unable to create dest db dir ", dest_db_dir) dest_dbfile <- file.path(dest_db_dir, dbfile_basename) if (!file.copy(dbfile, dest_dbfile)) stop("cannot copy file '", dbfile, "' to '", dest_dbfile, "'") if(.Platform$OS.type != 'windows'){ command <- paste("chmod 444", dest_dbfile) if (system(command) != 0) warning(command, " failed") } return(invisible(TRUE)) } setMethod("makeAnnDbPkg", "AnnDbPkgSeed", function(x, dbfile, dest_dir=".", no.man=FALSE, ...){ .makeAnnDbPkg(x, dbfile, dest_dir=dest_dir, no.man=no.man, ...) } ) .makeAnnDbPkgList <- function(x, dbfile, dest_dir=".", no.man=FALSE, ...){ x$Class <- "AnnDbPkgSeed" y <- do.call(new, x) makeAnnDbPkg(y, dbfile, dest_dir, no.man) } setMethod("makeAnnDbPkg", "list", function(x, dbfile, dest_dir=".", no.man=FALSE, ...){ .makeAnnDbPkgList(x, dbfile, dest_dir=dest_dir, no.man=no.man, ...) } ) ### 'x' can be a regular expression. ### Typical use: ### > library(AnnotationDbi) ### > makeAnnDbPkg(c("hgu95av2.db", "hgu133a2.db")) ### or to make all the packages: ### > makeAnnDbPkg(".*") # a character vector of length 1 is treated as a ### # regular expression ### .makeAnnDbPkgs <- function(x, dbfile, dest_dir=".", no.man=FALSE, ...){ if (missing(dbfile)) { dbfile <- system.file("extdata", "GentlemanLab", "ANNDBPKG-INDEX.TXT", package="AnnotationForge") } index <- loadAnnDbPkgIndex(dbfile) if (length(x) != 1) { ii <- match(x, index[ , "Package"]) if (any(is.na(ii))) stop("packages ", paste(x[is.na(ii)], collapse=", "), " not in ", dbfile) index <- index[ii, , drop=FALSE] } else if (!is.na(x) && x != "") { pkgname <- paste("^", x, "$", sep="") ii <- grep(pkgname, index[ , "Package"]) index <- index[ii, , drop=FALSE] } filter <- list(...) for (j in seq_len(length(filter))) { colname <- names(filter)[j] if (!(colname %in% colnames(index))) stop("unknown field '", colname, "'") colvals <- filter[[j]] if (!is.character(colvals)) stop("extra arg values must be of type character") index <- index[index[ , colname] %in% colvals, , drop=FALSE] } pkgnames_in1string <- paste(index[, "Package"], collapse=", ") cat(nrow(index), " package(s) to make: ", pkgnames_in1string, "\n", sep="") for (i in seq_len(nrow(index))) { y <- index[i, ] y <- as.list(y[!is.na(y)]) cat("[", i, "/", nrow(index), "] making package ", y[["Package"]], ": ", sep="") dbfile <- y[["DBfile"]] y <- y[names(y) != "DBfile"] makeAnnDbPkg(y, dbfile, dest_dir, no.man) } cat("DONE (", nrow(index), " package(s) made under the ", dest_dir, " directory)\n", sep="") } setMethod("makeAnnDbPkg", "character", function(x, dbfile, dest_dir=".", no.man=FALSE, ...){ .makeAnnDbPkgs(x, dbfile, dest_dir=dest_dir, no.man=no.man, ...) } )