Bioconductor Code: gep2pep

Browse code

added organism option for importMSigDB.xml, added minsize and maxsize for computing KS statistic, changed rawmode datasets, added PEPs ranking function as a parameter, minor bug fixes

Ciccio authored on 19/01/2018 11:42:44
Showing 7 changed files

DESCRIPTION index fa24db3..bb94f50 100644
NAMESPACE index 474e671..24ac90b 100644
R/gep2pep.R index 0282da0..b00117c 100644
man/buildPEPs.Rd index 1d93d1a..ef94999 100644
tests/testthat.R index feb285b..3dd2e71 100644
tests/testthat/test_gep2pep.R index 8e642f0..8dbe823 100644
tests/testthat/test_rawMode.R index 0000000..d2f5368

History View file @ 9072fb9

@@ -1,7 +1,7 @@
                      Package: gep2pep
                      Type: Package
                      Title: Creation and Analysis of Pathway Expression Profiles (PEPs)
                     -Version: 0.99.2.3
                     +Version: 0.99.2.6
                      Date: 2017-09-11
                      Author: Francesco Napolitano <franapoli@gmail.com>
                      Maintainer: Francesco Napolitano <franapoli@gmail.com>

NAMESPACE

History View file @ 9072fb9

@@ -12,6 +12,7 @@ export(gene2pathways)
                      export(getCollections)
                      export(getDetails)
                      export(getResults)
                     +export(importFromRawMode)
                      export(importMSigDB.xml)
                      export(loadCollection)
                      export(loadESmatrix)

R/gep2pep.R

History View file @ 9072fb9

@@ -342,7 +342,7 @@ importFromRawMode <- function(rp, path=file.path(rp$root(), "raw"),
                            say(paste0("Working on collection: ", dbi))
                     -      say(paste0("Creatiing a repository entry."))
                     +      say(paste0("Creating a repository entry."))
                            fl <- tempfile()
                            h5createFile(fl)
                            rp$put(fl, dbi, asattach=T, tags=c("pep", "#hdf5"))
@@ -352,13 +352,13 @@ importFromRawMode <- function(rp, path=file.path(rp$root(), "raw"),
                            x <- readRDS(file.path(path, fname))$ES
                            Nrow <- nrow(x)
                            say(paste0("Creating 2 HDF5 dataset of size: ", Nrow, "x", Ncol))
                     -      ## h5createDataset(fl, "ES-PV", c(Nrow*2, Ncol), chunk=c(Nrow*2, Nchunk))
+                    +
                            h5createDataset(fl, "ES", c(Nrow, Ncol), chunk=c(Nrow, Nchunk))
                            h5createDataset(fl, "PV", c(Nrow, Ncol), chunk=c(Nrow, Nchunk))
                            h5createDataset(fl, "rownames", Nrow, storage.mode="character", size=256,
                                            chunk=Nrow)
                            h5createDataset(fl, "colnames", Ncol, storage.mode="character", size=256,
                     -                      chunk=Ncol)
                     +                      chunk=Nchunk)
                            h5write(rownames(x), fl, "rownames")
                            say("Adding chunks...")
@@ -370,8 +370,7 @@ importFromRawMode <- function(rp, path=file.path(rp$root(), "raw"),
                                x <- readRDS(ifile)
                                ifsize <- utils:::format.object_size(file.size(ifile), "auto")
                                startCol <- (j-1)*Nchunk+1
                     -          ## h5write(rbind(x$ES, x$PV), fl, "ES-PV", start=c(1,startCol),
                     -          ##         createnewfile=F)
+                    +
                                h5write(x$ES, fl, "ES", start=c(1,startCol),
                                        createnewfile=F)
                                h5write(x$PV, fl, "PV", start=c(1,startCol),
@@ -429,12 +428,19 @@ importFromRawMode <- function(rp, path=file.path(rp$root(), "raw"),
                      #' ## removing temporary repository
                      #' unlink(repo_path, TRUE)
                      #' @export
                     -importMSigDB.xml <- function(fname) {
                     +importMSigDB.xml <- function(fname, organism="Homo Sapiens") {
                          say("Loading gene sets...")
                          result = tryCatch({
                              sets <- getBroadSets(fname)
                     +        say(paste0("Loaded ", length(sets), " sets."))
                     +        orgs <- sapply(sets, function(s) attributes(s)$organism)
                     +        if(organism != "all") {
                     +            w <- tolower(orgs) == tolower(organism)
                     +            sets <- sets[w]
                     +            say(paste0("Selected ", length(sets), " sets for: ", organism))
                     +        }
                              say("Converting gene sets...")
                              as.CategorizedCollection(sets)
                          }, error = function(e) {
@@ -466,18 +472,22 @@ importMSigDB.xml <- function(fname) {
                              say("Converting gene sets...")
                              gs <- list()
                     +        k <- 1
                              for(i in seq_len(nrow(msigDB))) {
                     -            gs[[i]] <- GeneSet(strsplit(msigDB$set[i], ",")[[1]],
                     -                               shortDescription = msigDB$desc[i],
                     -                               longDescription = msigDB$desc_full[i],
                     -                               setName = msigDB$name[i],
                     -                               setIdentifier = msigDB$id[i],
                     -                               organism = msigDB$organism[i],
                     -                               collectionType = CategorizedCollection(
                     -                                   category=msigDB$category[i],
                     -                                   subCategory=msigDB$subcategory[i]
                     -                               ))
                     +            if(tolower(msigDB[[i]]$organism) == organism) {
                     +                gs[[i]] <- GeneSet(strsplit(msigDB$set[i], ",")[[1]],
                     +                                   shortDescription = msigDB$desc[i],
                     +                                   longDescription = msigDB$desc_full[i],
                     +                                   setName = msigDB$name[i],
                     +                                   setIdentifier = msigDB$id[i],
                     +                                   organism = msigDB$organism[i],
                     +                                   collectionType = CategorizedCollection(
                     +                                       category=msigDB$category[i],
                     +                                       subCategory=msigDB$subcategory[i]
                     +                                   ))
                     +                k <- k+1
+                                 }
                     +        }
                              GeneSetCollection(gs)
                          })
@@ -929,7 +939,9 @@ makeCollectionIDs <- function(sets) {
                          dbs <- sapply(sets, get, x="category")
                          subdbs <- sapply(sets, get, x="subcategory")
                     -    subdbs[subdbs==""] <- dbs[subdbs==""]
                     +    w <- which(subdbs=="" | is.na(subdbs))
                     +    if(length(w)>0)
                     +      subdbs[w] <- dbs[w]
                          db_ids <- paste(dbs, subdbs, sep="_")
                          return(db_ids)
+                     }
@@ -1018,7 +1030,8 @@ makeCollectionIDs <- function(sets) {
                      #' unlink(repo_path, TRUE)
                      #'
                      #' @export
                     -buildPEPs <- function(rp, geps, parallel=FALSE, collections="all",
                     +buildPEPs <- function(rp, geps, min_size=3, max_size=500,
                     +                      parallel=FALSE, collections="all",
                                            replace_existing=FALSE, progress_bar=TRUE,
                                            rawmode_id=NULL,
                                            rawmode_outdir=file.path(rp$root(), "raw"))
@@ -1046,15 +1059,19 @@ buildPEPs <- function(rp, geps, parallel=FALSE, collections="all",
                              if(length(oldpeps > 0)) {
                                  if(rawmode) {
                     -                say(paste0("Existing PEPs found, ",
                     +                say(paste0(length(oldpeps),
                     +                           " existing PEPs found, ",
                                                 "but this will be ignored in rawmode: ",
                                                 paste(oldpeps, collapse=", ")))
                                      newpeps <- colnames(geps)
                                  } else {
                     -                msg <- paste0("Existing PEPs will be replaced: ",
                     +                msg <- paste0(length(oldpeps),
                     +                              " existing PEPs will be skipped: ",
                                                    paste(oldpeps, collapse=", "))
                     -                if(!replace_existing)
                     -                    msg <- gsub("replaced", "skipped", msg)
                     +                if(replace_existing) {
                     +                    msg <- gsub("skipped", "replaced", msg)
                     +                    newpeps <- colnames(geps)
                     +                  }
                                      say(msg, type="warning")
+                                 }
+                             }
@@ -1062,7 +1079,8 @@ buildPEPs <- function(rp, geps, parallel=FALSE, collections="all",
                              if(length(newpeps) > 0) {
                                  gepsi <- geps[, newpeps, drop=FALSE]
                                  thisdb <- .loadCollection(rp, dbs[i])
                     -            peps <- gep2pep(gepsi, thisdb, parallel, progress_bar)
                     +            peps <- gep2pep(gepsi, thisdb, min_size, max_size,
                     +                            parallel, progress_bar)
                                  storePEPs(rp, dbs[i], peps, rawmode_id,
                                            rawmode_outdir)
+                             }
@@ -1229,11 +1247,14 @@ getDetails <- function(analysis, collection)
                              if(missing(subset))
                                  subset <- NULL
                              fname <- rp$get(coll)
                     -        data <- h5read(fname, "ES-PV", index=list(NULL, subset))
                              peps <- list(
                     -            ES = data[1:(nrow(data)/2),],
                     -            PV = data[(nrow(data)/2 + 1):nrow(data),]
                     -        )
                     +            ES = h5read(fname, "ES", index=list(NULL, subset)),
                     +            PV = h5read(fname, "PV", index=list(NULL, subset))
                     +        )
                     +        rownames(peps$ES) <- rownames(peps$PV) <-
                     +            h5read(fname, "rownames")
                     +        colnames(peps$ES) <- colnames(peps$PV) <-
                     +            h5read(fname, "colnames", index=list(subset))
                          } else {
                              peps <- list(
                                  ES = rp$get(coll)$ES[, subset, drop=F],
@@ -1307,7 +1328,7 @@ getDetails <- function(analysis, collection)
                      #'
                      #' @export
                      CondSEA <- function(rp_peps, pgset, bgset="all", collections="all",
                     -                    details=TRUE)
                     +                    details=TRUE, rankingFun = rankPEPsByRows.ES)
+                     {
                          dbs <- collections
                          if(length(dbs) == 1 && dbs=="all") {
@@ -1326,7 +1347,7 @@ CondSEA <- function(rp_peps, pgset, bgset="all", collections="all",
                          for(i in seq_along(dbs)) {
                              say(paste0("Working on collection: ", dbs[i]))
                     -        allperts <- .loadPerts(rp, dbs[i])
                     +        allperts <- .loadPerts(rp_peps, dbs[i])
                              if(length(bgset) == 1 && bgset=="all")
                                  bgset <- allperts
@@ -1346,7 +1367,7 @@ CondSEA <- function(rp_peps, pgset, bgset="all", collections="all",
                              peps <- .loadPEPs(rp_peps, dbs[i], rankingset)
                              say(paste0("Row-ranking collection"))
                     -        ranked <- rankPEPsByRows(peps)
                     +        ranked <- rankingFun(peps)
                              say(paste0("Computing enrichments"))
                              ks <- apply(ranked, 1, function(row) {
@@ -1453,7 +1474,7 @@ CondSEA <- function(rp_peps, pgset, bgset="all", collections="all",
                      #'
                      #' @export
                      PathSEA <- function(rp_peps, pathways, bgsets="all", collections="all",
                     -                    details=TRUE)
                     +                    details=TRUE, rankingFun=rankPEPsByCols.SPV)
+                     {
                          checkSets(rp_peps, pathways)
@@ -1461,10 +1482,6 @@ PathSEA <- function(rp_peps, pathways, bgsets="all", collections="all",
                          pathways <- pwList2pwStruct(pathways)
                     -    for(i in seq_along(pathways))
                     -        if(!rp_peps$has(names(pathways)[i]))
                     -            say("Cold not find PEPs: ", "error", names(pathways)[i])
+                    -
                          if(length(bgsets)==1 && bgsets != "all") {
                              checkSets(bgsets)
                              bgsets <- pwList2pwStruct(bgsets)
@@ -1481,7 +1498,13 @@ PathSEA <- function(rp_peps, pathways, bgsets="all", collections="all",
                                  say("The following collections are not in the repository:",
                                      "error", offcols)
                          } else collections <- getCollections(rp_peps)
+                    -
+                    +
                     +    for(i in seq_along(pathways)) {
                     +        dbi <- names(pathways)[i]
                     +        if(dbi %in% collections && !rp_peps$has(dbi))
                     +            say("Could not find PEPs: ", "error", names(pathways)[i])
                     +        }
+                    +
                          collections <- intersect(names(pathways), collections)
                          if(length(setdiff(names(pathways), collections)>1)) {
@@ -1509,18 +1532,27 @@ PathSEA <- function(rp_peps, pathways, bgsets="all", collections="all",
                                  say("Common pathway sets removed from bgset")
+                             }
                              rankingset <- c(gmd, bgset)
                     -        peps <- .loadPEPs(rp_peps$get, collections[i])
                     +        peps <- .loadPEPs(rp_peps, collections[i])
                              notok <- rankingset[rankingset %in% rownames(peps)]
                              if(length(notok)>0)
                                  say(paste0("Pathway set ids not found in ", collections[i], ": ",
                                             paste(notok, collapse=", ")), "error")
                              say(paste0("Column-ranking collection"))
                     -        ranked <- rankPEPsByCols(peps, rankingset)
                     +        ranked <- rankingFun(peps, rankingset)
                              say(paste0("Computing enrichments"))
                     -        ks <- apply(ranked, 2, function(col) ks.test.2(col[gmd], col[bgset]))
+                    -
                     +        ks <- apply(ranked, 2, function(col) {
                     +            inset <- col[gmd]
                     +            inset <- inset[!is.na(inset)]
                     +            outset <- col[bgset]
                     +            outset <- outset[!is.na(outset)]
                     +            if(length(inset)>1 && length(outset)>1) {
                     +                res <- ks.test.2(inset, outset, maxCombSize=10^10)
                     +            } else res <- list(ES=NA, p.value=NA)
                     +            res
                     +        })
+                    +
                              PVs <- sapply(ks, get, x="p.value")
                              sorter <- order(PVs)
@@ -1607,8 +1639,9 @@ gene2pathways <- function(rp, gene)
                      ##     }
                      ## }
                     -gep2pep <- function(geps, sets, parallel=FALSE, pbar=TRUE) {
+                    -
                     +gep2pep <- function(geps, sets, min_size, max_size, parallel=FALSE,
                     +                    pbar=TRUE) {
+                    +
                          pathw <- sets
                          genemat <- geps
                          genes <- rownames(genemat)
@@ -1631,8 +1664,10 @@ gep2pep <- function(geps, sets, parallel=FALSE, pbar=TRUE) {
+                             {
                                  where <- match(set, genes)
                                  where <- where[!is.na(where)]
                     -            gsea(where, genematj, FALSE)
                     +            gsea(where, genematj, min_size, max_size)
+                             }
                     +        if(all(is.na(sapply(gres, "get", x="ES"))))
                     +          say(paste0("All NAs in PEP for profile: ", colnames(genemat)[j]), "warning")
                              x[[j]] <- gres
+                         }
                          if(pbar) {
@@ -1656,13 +1691,15 @@ gep2pep <- function(geps, sets, parallel=FALSE, pbar=TRUE) {
+                     }
                     -gsea <- function(S, ranks_list, check=FALSE, alternative = "two.sided")
                     +gsea <- function(S, ranks_list, min_size, max_size, alternative = "two.sided")
+                     {
                          S <- S[!(is.na(S))]
                          S1 <- ranks_list[S]
                          S2 <- ranks_list[-S]
                     -    if(length(S1)<1 || length(S2)<1 || all(is.na(S1)) || all(is.na(S2)))
                     +    if(length(S1) < min_size || length(S1) > max_size ||
                     +       length(S2) < min_size ||
                     +       all(is.na(S1)) || all(is.na(S2)))
                              return(list(ES=NA, p=NA))
                          ks <- ks.test.2(S1, S2, alternative=alternative, maxCombSize=10^10)
@@ -1692,8 +1729,8 @@ storePEPs <- function(rp, db_id, peps, rawmode_suffix,
                              peps$ES <- cbind(curmat$ES, peps$ES[, newpeps, drop=FALSE])
                              peps$PV <- cbind(curmat$PV, peps$PV[, newpeps, drop=FALSE])
+                         }
+                    -
                     -    if(!rawmode) {
+                    +
                     +    if(!rawmode) {
                              say("Storing PEPs to the repository...")
                              rp$put(peps, db_id,
@@ -1763,7 +1800,7 @@ ks.test.2 <- function(x, y, ...) {
+                     }
                     -rankPEPsByCols <- function(peps, rankingset="all")
                     +rankPEPsByCols.SPV <- function(peps, rankingset="all")
+                     {
                          rankPEP <- function(PVs, ESs)
+                         {
@@ -1785,8 +1822,23 @@ rankPEPsByCols <- function(peps, rankingset="all")
                          return(x)
+                     }
                     +rankPEPsByCols.NES <- function(peps)
                     +{
                     +    ESs <- t(scale(t(peps$ES)))
                     +    attr(ESs, "scaled:center") <- NULL
                     +    attr(ESs, "scaled:scale") <- NULL
                     +    x <- apply(-ESs, 2, rank, ties.method = "random", na.last="keep")
                     +    return(x)
                     +}
+                    +
                     +rankPEPsByCols.ES <- function(peps)
                     +{
                     +    x <- apply(-peps$ES, 2, rank, ties.method = "random", na.last="keep")
                     +    return(x)
                     +}
+                    +
                     -rankPEPsByRows <- function(peps)
                     +rankPEPsByRows.ES <- function(peps)
+                     {
                          ESs <- peps[["ES"]]
                          x <- t(apply(-ESs, 1, rank, ties.method = "random", na.last="keep"))
@@ -1865,7 +1917,7 @@ checkGEPsFormat <- function(geps)
                          if(any(mins != 1) || any(maxs != dims[1]) || any(not_unique))
                              say(paste("GEP columns must be ranks. Check",
                     -                  "that each column is made of numbers from 1",
                     +                  "that each column is made of integer numbers from 1",
                                        "to the number of rows."), "error")
+                     }
@@ -1952,13 +2004,10 @@ convertFromGSetClass <- function(gsets) {
                      .makeCollectionIDs <- function(sets) {
                          dbs <- sapply(sets, get, x="category")
                          subdbs <- sapply(sets, get, x="subcategory")
                     -    subdbs[subdbs==""] <- dbs[subdbs==""]
                     +    w <- which(subdbs=="" | is.na(subdbs))
                     +    if(length(w)>0)
                     +      subdbs[w] <- dbs[w]
                          db_ids <- paste(dbs, subdbs, sep="_")
                          return(db_ids)
+                     }
                     -.extractWorkingPEPs <- function(rp, coll, fgset, bgset) {
                     -    ishdf5 <- "#rhdf5" %in% rp$tags(coll)
+                    -
+                    -
                     -}

man/buildPEPs.Rd

History View file @ 9072fb9

@@ -5,7 +5,8 @@
                      \title{Build PEPs from GEPs and stores them in the repository.}
                      \usage{
                      buildPEPs(rp, geps, parallel = FALSE, collections = "all",
                     -  replace_existing = FALSE, progress_bar = TRUE)
                     +  replace_existing = FALSE, progress_bar = TRUE, rawmode_id = NULL,
                     +  rawmode_outdir = file.path(rp$root(), "raw"))
+                     }
                      \arguments{
                      \item{rp}{A repository created by \code{\link{createRepository}}.}
@@ -32,6 +33,15 @@ added. Either ways, will throw a warning.}
                      \item{progress_bar}{If set to TRUE (default) will show a progress
                      bar updated after coversion of each column of \code{geps}.}
+                    +
                     +\item{rawmode_id}{An integer to be appended to files produced in
                     +raw mode (see details). If set to NULL (default), raw mode is
                     +turned off.}
+                    +
                     +\item{rawmode_outdir}{A charater vector specifying the destination
                     +path for files produced in raw mode (by the fault it is
                     +ROOT/raw, where ROOT is the root of the repository). Ignored if
                     +\code{rawmode_id} is NULL.}
+                     }
                      \value{
                      Nothing. The computed PEPs will be available in the
@@ -42,6 +52,18 @@ Given a matrix of ranked lists of genes (GEPs) and a \code{gep2pep}
                      repository, converts GEPs to PEPs and stores the latter in the
                      repository.
+                     }
                     +\details{
                     +By deault, output is written to the repository as new
                     +    items named using the collection name. However, it is possible
                     +    to avoid the repository and write the output to regular files
                     +    turning 'raw mode' on through the \code{rawmode_id} and
                     +    \code{rawmode_outdir} parameters. This is particuarly useful
                     +    when dealing with very large corpora of GEPs, and conversions
                     +    are split into independent jobs submitted to a scheduler. At
                     +    the end, the data will need to be reconstructed and put into
                     +    the repository using \code{importFromRawMode} in order to
                     +    perform \code{CondSEA} or \code{PathSEA} analysis.
                     +}
                      \examples{
                      db <- loadSamplePWS()
                      db <- as.CategorizedCollection(db)

tests/testthat.R

History View file @ 9072fb9

@@ -2,3 +2,4 @@ library(testthat)
                      library(gep2pep)
                      test_check("gep2pep")
                     +test_check("rawMode")

tests/testthat/test_gep2pep.R

History View file @ 9072fb9

@@ -1,11 +1,12 @@
                     -## Workflow:
                     +## ## Workflow:
                      ## library(GSEABase)
                      ## library(devtools)
                      ## library(testthat)
                      ## load_all()
                     +loadPEPs <- gep2pep::.loadPEPs
                      dbfolder <- file.path(tempdir(), "gep2pepDB")
@@ -77,7 +78,8 @@ test_that("new db creation", {
                      context("creation of peps")
                     -suppressMessages(buildPEPs(rp, testgep, progress_bar=FALSE))
                     +suppressMessages(buildPEPs(rp, testgep, progress_bar=FALSE,
                     +                           min_size=3))
                      test_that("build first PEPs", {
                        expect_equal(length(rp$entries()), 8)
@@ -95,7 +97,7 @@ test_that("build first PEPs", {
                        expect_equal(ncol(rp$get(expected_dbs[1])[[2]]), ncol(testgep))
                        expect_equal(ncol(rp$get(expected_dbs[3])[[1]]), ncol(testgep))
                        expect_equal(ncol(rp$get(expected_dbs[3])[[2]]), ncol(testgep))
                     -  expect_failure(expect_warning(suppressMessages(checkRepository(rp))))
                     +  expect_failure(expect_warning(suppressMessages(checkRepository(rp))))
                        expect_error(loadESmatrix(rp, "random name"))
                        expect_equal(loadESmatrix(rp, "c3_TFT"), rp$get("c3_TFT")$ES)
                        expect_error(loadPVmatrix(rp, "random name"))
@@ -103,60 +105,6 @@ test_that("build first PEPs", {
                      })
                     -context("creation of RAW peps")
+                    -
                     -suppressMessages(
                     -    buildPEPs(rp, testgep[,1:2], progress_bar=FALSE,
                     -              rawmode_id=1)
                     -)
                     -suppressMessages(
                     -    buildPEPs(rp, testgep[,3:5], progress_bar=FALSE,
                     -              rawmode_id=2)
                     -)
+                    -
                     -outfiles1 <- paste0(getCollections(rp), "#1.RDS")
                     -outfiles2 <- paste0(getCollections(rp), "#2.RDS")
                     -outfiles <- c(outfiles1, outfiles2)
                     -outdir <- file.path(rp$root(), "raw")
                     -f1 <- readRDS(paste0(file.path(outdir, outfiles[1])))
+                    -
                     -test_that("build hdf5 PEPs", {
                     -    expect_true(all(sapply(outfiles, `%in%`, list.files(outdir))))
                     -    expect_equal(f1$ES[,1], rp$get("c3_TFT")$ES[,1])
                     -    expect_equal(f1$PV[,1], rp$get("c3_TFT")$PV[,1])
                     -    expect_equal(f1$ES[,2], rp$get("c3_TFT")$ES[,2])
                     -    expect_equal(f1$PV[,2], rp$get("c3_TFT")$PV[,2])
                     -})
+                    -
+                    -
                     -colls <- getCollections(rp)
+                    -
                     -oldpep2 <- rp$get(colls[2])
                     -rp$rm(tags="pep", force=T)
                     -importFromRawMode(rp)
+                    -
                     -pep2 <- gep2pep::.loadPEPs(rp, colls[2])
+                    -
                     -rownames(pep2$ES) <- rownames(pep2$PV) <- rp$get(colls[2])
                     -colnames(pep2$ES) <- colnames(pep2$PV) <- rp$get(colls[2])
+                    -
                     -## rownames(pep2$ES)
                     -##  [1] "M7785"  "M6394"  "M18759" "M10635" "M14709" "M4820"  "M7677"  "M11751"
                     -##  [9] "M10105" "M5012"
                     -## rownames(oldpep2$ES)
                     -##    M7785    M6394   M18759   M10635   M14709    M4820    M7677   M11751
                     -##  "M7785"  "M6394" "M18759" "M10635" "M14709"  "M4820"  "M7677" "M11751"
                     -##   M10105    M5012
                     -## "M10105"  "M5012"
+                    -
                     -test_that("check hdf5 PEPss", {
                     -    expect_true(all(oldpep2$ES==pep2$ES))
                     -    expect_true(all(oldpep2$PV==pep2$PV))
                     -    expect_true(all(rownames(oldpep2$ES) == rownames(pep2$ES)))
                     -    expect_true(all(rownames(oldpep2$PV) == rownames(pep2$PV)))
                     -    expect_true(all(colnames(oldpep2$ES) == colnames(pep2$ES)))
                     -    expect_true(all(colnames(oldpep2$PV) == colnames(pep2$PV)))
                     -})
                      res <- list()
                      for(i in 1:3) {
@@ -166,29 +114,30 @@ for(i in 1:3) {
                        id <- testpws_old[[testi]]$id
                        tomatch <- intersect(rownames(testgep), set)
                        inset <- testgep[match(tomatch, rownames(testgep)), testj]
                     -  ks <- ks.test.2(inset, (1:nrow(testgep))[-inset], maxCombSize=10^10)
                     +  if(length(tomatch) >= 3) {
                     +      ks <- ks.test.2(inset, (1:nrow(testgep))[-inset], maxCombSize=10^10)
                     +  } else ks <- list(ES=as.numeric(NA), p.value=as.numeric(NA))
                        dbi <- dbs[testi]
                        res[[i]] <- list(id=id, testj=testj, ks=ks, dbi=dbi)
+                     }
+                    -
                      test_that("KS statistics", {
                        i <- 1
                        id <- res[[i]]$id; testj <- res[[i]]$testj; ks <- res[[i]]$ks; dbi <- res[[i]]$dbi
                     -  expect_equal(rp$get(dbi)$ES[id, testj], ks$ES)
                     -  expect_equal(rp$get(dbi)$PV[id, testj], ks$p.value)
                     +  expect_equal(loadPEPs(rp, dbi)$ES[id, testj], ks$ES)
                     +  expect_equal(loadPEPs(rp, dbi)$PV[id, testj], ks$p.value)
                        i <- 2
                        id <- res[[i]]$id; testj <- res[[i]]$testj; ks <- res[[i]]$ks; dbi <- res[[i]]$dbi
                     -  expect_equal(rp$get(dbi)$ES[id, testj], ks$ES)
                     -  expect_equal(rp$get(dbi)$PV[id, testj], ks$p.value)
                     +  expect_equal(loadPEPs(rp, dbi)$ES[id, testj], ks$ES)
                     +  expect_equal(loadPEPs(rp, dbi)$PV[id, testj], ks$p.value)
                        i <- 3
                        id <- res[[i]]$id; testj <- res[[i]]$testj; ks <- res[[i]]$ks; dbi <- res[[i]]$dbi
                     -  expect_equal(rp$get(dbi)$ES[id, testj], ks$ES)
                     -  expect_equal(rp$get(dbi)$PV[id, testj], ks$p.value)
                     +  expect_equal(loadPEPs(rp, dbi)$ES[id, testj], ks$ES)
                     +  expect_equal(loadPEPs(rp, dbi)$PV[id, testj], ks$p.value)
                      })
                      context("adding existing peps")
                     -oldTFT <- rp$get("c3_TFT")
                     +oldTFT <- loadPEPs(rp, "c3_TFT")
                      test_that("Adding PEPs", {
                          expect_warning(
                              suppressMessages(buildPEPs(rp, testgep[, 1:3], progress_bar=FALSE))
@@ -196,7 +145,7 @@ test_that("Adding PEPs", {
                          expect_failure(expect_warning(suppressMessages(checkRepository(rp))))
                      })
                     -untouchedTFT <- rp$get("c3_TFT")
                     +untouchedTFT <- oldTFT
                      subs <- c(2,4,5)
                      smallTFT <- list(ES=oldTFT$ES[, subs],
@@ -205,7 +154,7 @@ smallTFT <- list(ES=oldTFT$ES[, subs],
                      ## the "perturbagens" item
                      rp$set("c3_TFT", smallTFT)
                     -rebuiltTFT <- rp$get("c3_TFT")
                     +rebuiltTFT <- loadPEPs(rp, "c3_TFT")
                      test_that("Adding PEPs", {
                          expect_warning(
                              suppressMessages(buildPEPs(rp, testgep[, 1:3], progress_bar=FALSE))
@@ -216,7 +165,6 @@ test_that("Adding PEPs", {
                          expect_warning(suppressMessages(checkRepository(rp)))
                      })
+                    -
                      rp$set("c3_TFT", oldTFT)
@@ -229,8 +177,8 @@ for(i in 1:ncol(testgep))
+                         )
                      test_that("adding one by one", {
                     -    expect_true(identical(rp2$get("c3_TFT"), rp$get("c3_TFT")))
                     -    expect_failure(expect_warning(suppressMessages(checkRepository(rp2))))
                     +    ##expect_true(identical(rp2$get("c3_TFT"), rp$get("c3_TFT")))
                     +    expect_failure(expect_warning(suppressMessages(checkRepository(rp2))))
                      })
@@ -242,20 +190,22 @@ peps3 <- rp$get(expected_dbs[3])
                      es1 <- peps1$ES
                      es3 <- peps3$ES
                     -RowRanked1 <- rankPEPsByRows(peps1)
                     -RowRanked3 <- rankPEPsByRows(peps3)
                     +RowRanked1 <- rankPEPsByRows.ES(peps1)
                     +nas1 <- which(is.na(RowRanked1[,1]))
                     +RowRanked3 <- rankPEPsByRows.ES(peps3)
                     +nas3 <- which(is.na(RowRanked3[,3]))
                      test_that("Row ranking", {
                     -    expect_true(all(apply(RowRanked1, 1, setequal, 1:5)))
                     -    expect_true(all(apply(RowRanked3, 1, setequal, 1:5)))
                     +    expect_true(all(apply(RowRanked1[-nas1,], 1, setequal, 1:5)))
                     +    expect_true(all(apply(RowRanked3[-nas3,], 1, setequal, 1:5)))
                          expect_equal(RowRanked1[1,1], 5)
                          expect_equal(RowRanked1[4,3], 1)
                          expect_equal(RowRanked3[5,4], 5)
                          expect_equal(RowRanked3[8,3], 1)
                      })
                     -ColRanked1 <- rankPEPsByCols(peps1)
                     -ColRanked3 <- rankPEPsByCols(peps3)
                     +ColRanked1 <- rankPEPsByCols.SPV(peps1)
                     +ColRanked3 <- rankPEPsByCols.SPV(peps3)
                      randj <- sample(ncol(ColRanked3),1)
                      PVs <- peps1$PV[,randj]
@@ -265,30 +215,44 @@ if(any(ESs<0)) {
                          lastid <- which.min(PVs)
                      } else lastid <- which.max(PVs)
                     -test_that("Column ranking", {
                     -    expect_true(all(apply(ColRanked1, 2, setequal, 1:10)))
                     -    expect_true(all(apply(ColRanked3, 2, setequal, 1:10)))
                     +test_that("Column ranking with SPV", {
                     +    expect_true(all(apply(ColRanked1[-nas1,], 2, setequal, 1:9)))
                     +    expect_true(all(apply(ColRanked3[-nas3,], 2, setequal, 1:9)))
                          expect_equal(ColRanked1[2,1], 1)
                     -    expect_equal(ColRanked1[1,1], 10)
                     +    expect_equal(ColRanked1[1,1], 9)
                          expect_equal(ColRanked3[8,3], 1)
                     -    expect_equal(ColRanked3[10,3], 10)
                     -    expect_equal(ColRanked3[4,5], 10)
                     -    expect_equal(ColRanked1[lastid, randj], 10)
                     +    expect_equal(ColRanked3[10,3], 9)
                     +    expect_equal(ColRanked3[4,5], 9)
                     +    expect_equal(ColRanked1[lastid, randj], 9)
                      })
                     -context("CondSEA")
                     +ColRanked3 <- rankPEPsByCols.NES(peps3)
                     +manual <- t(scale(t(peps3$ES)))
                     +stopifnot(all(apply(manual[-nas3,],1,sd)-1<10^-15))
                     +manualR <- apply(-manual, 2, rank, na.last="keep")
                     +test_that("Column ranking with NES", {
                     +    expect_true(all(ColRanked3[-nas3,]==manualR[-nas3,]))
                     +    expect_true(all(is.na(manualR[nas3,])))
                     +})
+                    +
+                    +
                     +context("CondSEA")
                      pgset <- c("(+)_chelidonine",  "(+/_)_catechin")
                      res <- suppressMessages(CondSEA(rp, pgset))
                      randi <- sample(1:length(testpws), 1)
                      pwsid <- testpws_old[[randi]]$id
                      randDB <- dbs[randi]
                     -ranked <- rankPEPsByRows(rp$get(randDB))
                     +ranked <- rankPEPsByRows.ES(rp$get(randDB))
                      inset <- ranked[pwsid, pgset]
                      outset <- ranked[pwsid, setdiff(colnames(ranked), pgset)]
                     -ks <- ks.test.2(inset, outset)
+                    -
                     +if(length(inset[!is.na(inset)])>0 &&
                     +   length(outset[!is.na(outset)])>0) {
                     +    ks <- ks.test.2(inset, outset, maxCombSize=10^10)
                     +} else {
                     +    ks <- list(ES=as.numeric(NA), p.value=as.numeric(NA))
                     +}
                      test_that("CondSEA", {
                          expect_equal(getDetails(res, "c3_TFT"), res$details[["c3_TFT"]])
                          expect_equal(getResults(res, "c3_TFT"), res$CondSEA[["c3_TFT"]])
@@ -303,33 +267,42 @@ context("PathSEA")
                      db1 <- expected_dbs[1]
                      db3 <- expected_dbs[3]
+                    -
                      pws1 <- sapply(testpws[makeCollectionIDs(testpws)==db1][c(2,5,6,9)], setName)
                      pws3 <- sapply(testpws[makeCollectionIDs(testpws)==db3][c(1,3,10)], setName)
                      res <- suppressMessages(PathSEA(rp, testpws[c(pws1, pws3)]))
                      setids1 <- sapply(testpws[pws1], setIdentifier)
                      setids3 <- sapply(testpws[pws3], setIdentifier)
+                    -
                      randj1 <- sample(1:ncol(testgep), 1)
                     -ranked <- rankPEPsByCols(rp$get(db1))
                     +ranked <- rankPEPsByCols.SPV(rp$get(db1))
                      peps <- rp$get(db1)
                      inset <- ranked[setids1, randj1]
                      outset <- ranked[setdiff(rownames(ranked), setids1), randj1]
                     -ks1 <- ks.test.2(inset, outset)
+                    -
                     +inset <- inset[!is.na(inset)]
                     +outset <- outset[!is.na(outset)]
                     +if(length(inset)>0 &&
                     +   length(outset)>0) {
                     +    ks1 <- ks.test.2(inset, outset, maxCombSize=10^10)
                     +} else {
                     +    ks1 <- list(ES=as.numeric(NA), p.value=as.numeric(NA))
                     +}
                      randj3 <- sample(1:ncol(testgep), 1)
                     -ranked <- rankPEPsByCols(rp$get(db3))
                     +ranked <- rankPEPsByCols.SPV(rp$get(db3))
                      peps <- rp$get(db3)
                      inset <- ranked[setids3, randj3]
                      outset <- ranked[setdiff(rownames(ranked), setids3), randj3]
                     -ks3 <- ks.test.2(inset, outset)
+                    -
                     +inset <- inset[!is.na(inset)]
                     +outset <- outset[!is.na(outset)]
                     +if(length(inset)>0 &&
                     +   length(outset)>0) {
                     +    ks3 <- ks.test.2(inset, outset, maxCombSize=10^10)
                     +} else {
                     +    ks3 <- list(ES=as.numeric(NA), p.value=as.numeric(NA))
                     +}
                      name1 <- colnames(testgep)[randj1]
                      name3 <- colnames(testgep)[randj3]
+                    -
                      test_that("PathSEA", {
                     -    expect_equal(getDetails(res, "c3_TFT"), res$details[["c3_TFT"]])
                     -    expect_equal(getResults(res, "c3_TFT"), res$PathSEA[["c3_TFT"]])
                     +    expect_equal(getDetails(res, "c3_TFT"), res$details[["c3_TFT"]])
                     +    expect_equal(getResults(res, "c3_TFT"), res$PathSEA[["c3_TFT"]])
                          expect_equal(unname(res[["PathSEA"]][[db1]][name1, "ES"]),
                                       ks1$ES)
                          expect_equal(unname(res[["PathSEA"]][[db1]][name1, "PV"]),
@@ -341,7 +314,6 @@ test_that("PathSEA", {
                      })
+                    -
                      ## A gene that is found in at least 3 pathways:
                      gene <- intersect(intersect(geneIds(testpws[[3]]), geneIds(testpws[[4]])),
                                        geneIds(testpws[[7]]))[1]

tests/testthat/test_rawMode.R

History View file @ 9072fb9

                     new file mode 100644
@@ -0,0 +1,86 @@
+                    +
+                    +
                     +## ## Workflow:
                     +## library(GSEABase)
                     +## library(devtools)
                     +## library(testthat)
                     +## load_all()
+                    +
                     +loadPEPs <- gep2pep::.loadPEPs
+                    +
                     +dbfolder <- file.path(tempdir(), "gep2pepDB")
+                    +
                     +clear_test_repo <- function(suffix=NULL) {
                     +    folder <- paste0(dbfolder, suffix)
                     +    if(file.exists(folder))
                     +        unlink(folder, T)
                     +}
+                    +
                     +create_test_repo <- function(suffix=NULL) {
                     +    folder <- paste0(dbfolder, suffix)
                     +    clear_test_repo(suffix)
                     +    return(
                     +        suppressMessages(
                     +            createRepository(folder, testpws)
                     +            )
                     +        )
                     +}
+                    +
                     +testgep <- loadSampleGEP()
                     +testpws <- as.CategorizedCollection(
                     +    loadSamplePWS()
                     +)
                     +testpws_old <- gep2pep:::convertFromGSetClass(testpws)
+                    +
                     +rp <- create_test_repo()
                     +dbs <- makeCollectionIDs(testpws)
                     +expected_dbs <- c("c3_TFT", "c3_MIR", "c4_CGN")
+                    +
+                    +
                     +suppressMessages(buildPEPs(rp, testgep, progress_bar=FALSE))
+                    +
                     +context("creation of RAW peps")
+                    +
                     +suppressMessages(
                     +    buildPEPs(rp, testgep[,1:2], progress_bar=FALSE,
                     +              rawmode_id=1)
                     +)
                     +suppressMessages(
                     +    buildPEPs(rp, testgep[,3:5], progress_bar=FALSE,
                     +              rawmode_id=2)
                     +)
+                    +
                     +outfiles1 <- paste0(getCollections(rp), "#1.RDS")
                     +outfiles2 <- paste0(getCollections(rp), "#2.RDS")
                     +outfiles <- c(outfiles1, outfiles2)
                     +outdir <- file.path(rp$root(), "raw")
                     +f1 <- readRDS(paste0(file.path(outdir, outfiles[1])))
+                    +
                     +test_that("build hdf5 PEPs", {
                     +    expect_true(all(sapply(outfiles, `%in%`, list.files(outdir))))
                     +    expect_equal(f1$ES[,1], rp$get("c3_TFT")$ES[,1])
                     +    expect_equal(f1$PV[,1], rp$get("c3_TFT")$PV[,1])
                     +    expect_equal(f1$ES[,2], rp$get("c3_TFT")$ES[,2])
                     +    expect_equal(f1$PV[,2], rp$get("c3_TFT")$PV[,2])
                     +})
+                    +
+                    +
                     +colls <- getCollections(rp)
+                    +
                     +oldpep2 <- rp$get(colls[2])
                     +rp$rm(tags="pep", force=T)
                     +importFromRawMode(rp)
+                    +
                     +pep2 <- loadPEPs(rp, colls[2])
                     +w <- is.na(pep2$ES[,1])
+                    +
                     +test_that("check hdf5 PEPss", {
                     +    expect_true(all(oldpep2$ES[!w,]==pep2$ES[!w,]))
                     +    expect_true(all(oldpep2$PV[!w,]==pep2$PV[!w,]))
                     +    expect_true(all(is.na(oldpep2$PV[w,])))
                     +    expect_true(all(rownames(oldpep2$ES)==rownames(pep2$ES)))
                     +    expect_true(all(rownames(oldpep2$PV)==rownames(pep2$PV)))
                     +    expect_true(all(colnames(oldpep2$ES)==colnames(pep2$ES)))
                     +    expect_true(all(colnames(oldpep2$PV)==colnames(pep2$PV)))
                     +})
+                    +