Browse code

added organism option for importMSigDB.xml, added minsize and maxsize for computing KS statistic, changed rawmode datasets, added PEPs ranking function as a parameter, minor bug fixes

Ciccio authored on 19/01/2018 11:42:44
Showing 7 changed files

... ...
@@ -1,7 +1,7 @@
1 1
 Package: gep2pep
2 2
 Type: Package
3 3
 Title: Creation and Analysis of Pathway Expression Profiles (PEPs)
4
-Version: 0.99.2.3
4
+Version: 0.99.2.6
5 5
 Date: 2017-09-11
6 6
 Author: Francesco Napolitano <franapoli@gmail.com>
7 7
 Maintainer: Francesco Napolitano <franapoli@gmail.com>
... ...
@@ -12,6 +12,7 @@ export(gene2pathways)
12 12
 export(getCollections)
13 13
 export(getDetails)
14 14
 export(getResults)
15
+export(importFromRawMode)
15 16
 export(importMSigDB.xml)
16 17
 export(loadCollection)
17 18
 export(loadESmatrix)
... ...
@@ -342,7 +342,7 @@ importFromRawMode <- function(rp, path=file.path(rp$root(), "raw"),
342 342
       
343 343
       say(paste0("Working on collection: ", dbi))
344 344
 
345
-      say(paste0("Creatiing a repository entry."))
345
+      say(paste0("Creating a repository entry."))
346 346
       fl <- tempfile()
347 347
       h5createFile(fl)
348 348
       rp$put(fl, dbi, asattach=T, tags=c("pep", "#hdf5"))
... ...
@@ -352,13 +352,13 @@ importFromRawMode <- function(rp, path=file.path(rp$root(), "raw"),
352 352
       x <- readRDS(file.path(path, fname))$ES
353 353
       Nrow <- nrow(x)
354 354
       say(paste0("Creating 2 HDF5 dataset of size: ", Nrow, "x", Ncol))
355
-      ## h5createDataset(fl, "ES-PV", c(Nrow*2, Ncol), chunk=c(Nrow*2, Nchunk))
355
+
356 356
       h5createDataset(fl, "ES", c(Nrow, Ncol), chunk=c(Nrow, Nchunk))
357 357
       h5createDataset(fl, "PV", c(Nrow, Ncol), chunk=c(Nrow, Nchunk))
358 358
       h5createDataset(fl, "rownames", Nrow, storage.mode="character", size=256,
359 359
                       chunk=Nrow)
360 360
       h5createDataset(fl, "colnames", Ncol, storage.mode="character", size=256,
361
-                      chunk=Ncol)
361
+                      chunk=Nchunk)
362 362
       h5write(rownames(x), fl, "rownames")      
363 363
        
364 364
       say("Adding chunks...")
... ...
@@ -370,8 +370,7 @@ importFromRawMode <- function(rp, path=file.path(rp$root(), "raw"),
370 370
           x <- readRDS(ifile)
371 371
           ifsize <- utils:::format.object_size(file.size(ifile), "auto")
372 372
           startCol <- (j-1)*Nchunk+1
373
-          ## h5write(rbind(x$ES, x$PV), fl, "ES-PV", start=c(1,startCol),
374
-          ##         createnewfile=F)
373
+
375 374
           h5write(x$ES, fl, "ES", start=c(1,startCol),
376 375
                   createnewfile=F)
377 376
           h5write(x$PV, fl, "PV", start=c(1,startCol),
... ...
@@ -429,12 +428,19 @@ importFromRawMode <- function(rp, path=file.path(rp$root(), "raw"),
429 428
 #' ## removing temporary repository
430 429
 #' unlink(repo_path, TRUE)
431 430
 #' @export
432
-importMSigDB.xml <- function(fname) {
431
+importMSigDB.xml <- function(fname, organism="Homo Sapiens") {
433 432
 
434 433
     say("Loading gene sets...")
435 434
 
436 435
     result = tryCatch({
437 436
         sets <- getBroadSets(fname)
437
+        say(paste0("Loaded ", length(sets), " sets."))
438
+        orgs <- sapply(sets, function(s) attributes(s)$organism)        
439
+        if(organism != "all") {
440
+            w <- tolower(orgs) == tolower(organism)
441
+            sets <- sets[w]
442
+            say(paste0("Selected ", length(sets), " sets for: ", organism))
443
+        }
438 444
         say("Converting gene sets...")
439 445
         as.CategorizedCollection(sets)    
440 446
     }, error = function(e) {        
... ...
@@ -466,18 +472,22 @@ importMSigDB.xml <- function(fname) {
466 472
 
467 473
         say("Converting gene sets...")
468 474
         gs <- list()
475
+        k <- 1
469 476
         for(i in seq_len(nrow(msigDB))) {
470
-            gs[[i]] <- GeneSet(strsplit(msigDB$set[i], ",")[[1]],
471
-                               shortDescription = msigDB$desc[i],
472
-                               longDescription = msigDB$desc_full[i],
473
-                               setName = msigDB$name[i],
474
-                               setIdentifier = msigDB$id[i],
475
-                               organism = msigDB$organism[i],
476
-                               collectionType = CategorizedCollection(
477
-                                   category=msigDB$category[i],
478
-                                   subCategory=msigDB$subcategory[i]
479
-                               ))
477
+            if(tolower(msigDB[[i]]$organism) == organism) {
478
+                gs[[i]] <- GeneSet(strsplit(msigDB$set[i], ",")[[1]],
479
+                                   shortDescription = msigDB$desc[i],
480
+                                   longDescription = msigDB$desc_full[i],
481
+                                   setName = msigDB$name[i],
482
+                                   setIdentifier = msigDB$id[i],
483
+                                   organism = msigDB$organism[i],
484
+                                   collectionType = CategorizedCollection(
485
+                                       category=msigDB$category[i],
486
+                                       subCategory=msigDB$subcategory[i]
487
+                                   ))
488
+                k <- k+1
480 489
             }
490
+        }
481 491
         GeneSetCollection(gs)
482 492
     })
483 493
 
... ...
@@ -929,7 +939,9 @@ makeCollectionIDs <- function(sets) {
929 939
         
930 940
     dbs <- sapply(sets, get, x="category")
931 941
     subdbs <- sapply(sets, get, x="subcategory")
932
-    subdbs[subdbs==""] <- dbs[subdbs==""]
942
+    w <- which(subdbs=="" | is.na(subdbs)) 
943
+    if(length(w)>0)
944
+      subdbs[w] <- dbs[w]
933 945
     db_ids <- paste(dbs, subdbs, sep="_")
934 946
     return(db_ids)
935 947
 }
... ...
@@ -1018,7 +1030,8 @@ makeCollectionIDs <- function(sets) {
1018 1030
 #' unlink(repo_path, TRUE)
1019 1031
 #'
1020 1032
 #' @export
1021
-buildPEPs <- function(rp, geps, parallel=FALSE, collections="all",
1033
+buildPEPs <- function(rp, geps, min_size=3, max_size=500,
1034
+                      parallel=FALSE, collections="all",
1022 1035
                       replace_existing=FALSE, progress_bar=TRUE,
1023 1036
                       rawmode_id=NULL,
1024 1037
                       rawmode_outdir=file.path(rp$root(), "raw"))
... ...
@@ -1046,15 +1059,19 @@ buildPEPs <- function(rp, geps, parallel=FALSE, collections="all",
1046 1059
 
1047 1060
         if(length(oldpeps > 0)) {
1048 1061
             if(rawmode) {
1049
-                say(paste0("Existing PEPs found, ",
1062
+                say(paste0(length(oldpeps),
1063
+                           " existing PEPs found, ",
1050 1064
                            "but this will be ignored in rawmode: ",
1051 1065
                            paste(oldpeps, collapse=", ")))
1052 1066
                 newpeps <- colnames(geps)
1053 1067
             } else {
1054
-                msg <- paste0("Existing PEPs will be replaced: ",
1068
+                msg <- paste0(length(oldpeps),
1069
+                              " existing PEPs will be skipped: ",
1055 1070
                               paste(oldpeps, collapse=", "))
1056
-                if(!replace_existing)
1057
-                    msg <- gsub("replaced", "skipped", msg)
1071
+                if(replace_existing) {
1072
+                    msg <- gsub("skipped", "replaced", msg)
1073
+                    newpeps <- colnames(geps)
1074
+                  }
1058 1075
                 say(msg, type="warning")
1059 1076
             }
1060 1077
         }
... ...
@@ -1062,7 +1079,8 @@ buildPEPs <- function(rp, geps, parallel=FALSE, collections="all",
1062 1079
         if(length(newpeps) > 0) {
1063 1080
             gepsi <- geps[, newpeps, drop=FALSE]
1064 1081
             thisdb <- .loadCollection(rp, dbs[i])
1065
-            peps <- gep2pep(gepsi, thisdb, parallel, progress_bar)
1082
+            peps <- gep2pep(gepsi, thisdb, min_size, max_size,
1083
+                            parallel, progress_bar)
1066 1084
             storePEPs(rp, dbs[i], peps, rawmode_id,
1067 1085
                       rawmode_outdir)
1068 1086
         }
... ...
@@ -1229,11 +1247,14 @@ getDetails <- function(analysis, collection)
1229 1247
         if(missing(subset))
1230 1248
             subset <- NULL
1231 1249
         fname <- rp$get(coll)
1232
-        data <- h5read(fname, "ES-PV", index=list(NULL, subset))
1233 1250
         peps <- list(
1234
-            ES = data[1:(nrow(data)/2),],
1235
-            PV = data[(nrow(data)/2 + 1):nrow(data),]
1236
-        )        
1251
+            ES = h5read(fname, "ES", index=list(NULL, subset)),
1252
+            PV = h5read(fname, "PV", index=list(NULL, subset))
1253
+        )
1254
+        rownames(peps$ES) <- rownames(peps$PV) <-
1255
+            h5read(fname, "rownames")
1256
+        colnames(peps$ES) <- colnames(peps$PV) <-
1257
+            h5read(fname, "colnames", index=list(subset))
1237 1258
     } else {
1238 1259
         peps <- list(
1239 1260
             ES = rp$get(coll)$ES[, subset, drop=F],
... ...
@@ -1307,7 +1328,7 @@ getDetails <- function(analysis, collection)
1307 1328
 #'
1308 1329
 #' @export
1309 1330
 CondSEA <- function(rp_peps, pgset, bgset="all", collections="all",
1310
-                    details=TRUE)
1331
+                    details=TRUE, rankingFun = rankPEPsByRows.ES)
1311 1332
 {
1312 1333
     dbs <- collections
1313 1334
     if(length(dbs) == 1 && dbs=="all") {
... ...
@@ -1326,7 +1347,7 @@ CondSEA <- function(rp_peps, pgset, bgset="all", collections="all",
1326 1347
     for(i in seq_along(dbs)) {        
1327 1348
         say(paste0("Working on collection: ", dbs[i]))
1328 1349
 
1329
-        allperts <- .loadPerts(rp, dbs[i])
1350
+        allperts <- .loadPerts(rp_peps, dbs[i])
1330 1351
 
1331 1352
         if(length(bgset) == 1 && bgset=="all")
1332 1353
             bgset <- allperts
... ...
@@ -1346,7 +1367,7 @@ CondSEA <- function(rp_peps, pgset, bgset="all", collections="all",
1346 1367
 
1347 1368
         peps <- .loadPEPs(rp_peps, dbs[i], rankingset)
1348 1369
         say(paste0("Row-ranking collection"))
1349
-        ranked <- rankPEPsByRows(peps)
1370
+        ranked <- rankingFun(peps)
1350 1371
         say(paste0("Computing enrichments"))
1351 1372
         
1352 1373
         ks <- apply(ranked, 1, function(row) {
... ...
@@ -1453,7 +1474,7 @@ CondSEA <- function(rp_peps, pgset, bgset="all", collections="all",
1453 1474
 #'
1454 1475
 #' @export
1455 1476
 PathSEA <- function(rp_peps, pathways, bgsets="all", collections="all",
1456
-                    details=TRUE)
1477
+                    details=TRUE, rankingFun=rankPEPsByCols.SPV)
1457 1478
 {
1458 1479
     checkSets(rp_peps, pathways)
1459 1480
     
... ...
@@ -1461,10 +1482,6 @@ PathSEA <- function(rp_peps, pathways, bgsets="all", collections="all",
1461 1482
     
1462 1483
     pathways <- pwList2pwStruct(pathways)
1463 1484
 
1464
-    for(i in seq_along(pathways))
1465
-        if(!rp_peps$has(names(pathways)[i]))
1466
-            say("Cold not find PEPs: ", "error", names(pathways)[i])
1467
-
1468 1485
     if(length(bgsets)==1 && bgsets != "all") {
1469 1486
         checkSets(bgsets)
1470 1487
         bgsets <- pwList2pwStruct(bgsets)
... ...
@@ -1481,7 +1498,13 @@ PathSEA <- function(rp_peps, pathways, bgsets="all", collections="all",
1481 1498
             say("The following collections are not in the repository:",
1482 1499
                 "error", offcols)
1483 1500
     } else collections <- getCollections(rp_peps)
1484
-            
1501
+
1502
+    for(i in seq_along(pathways)) {
1503
+        dbi <- names(pathways)[i]
1504
+        if(dbi %in% collections && !rp_peps$has(dbi))
1505
+            say("Could not find PEPs: ", "error", names(pathways)[i])
1506
+        }
1507
+    
1485 1508
     collections <- intersect(names(pathways), collections)
1486 1509
     
1487 1510
     if(length(setdiff(names(pathways), collections)>1)) {
... ...
@@ -1509,18 +1532,27 @@ PathSEA <- function(rp_peps, pathways, bgsets="all", collections="all",
1509 1532
             say("Common pathway sets removed from bgset")
1510 1533
         }
1511 1534
         rankingset <- c(gmd, bgset)
1512
-        peps <- .loadPEPs(rp_peps$get, collections[i])
1535
+        peps <- .loadPEPs(rp_peps, collections[i])
1513 1536
         notok <- rankingset[rankingset %in% rownames(peps)]
1514 1537
         if(length(notok)>0)
1515 1538
             say(paste0("Pathway set ids not found in ", collections[i], ": ",
1516 1539
                        paste(notok, collapse=", ")), "error")
1517 1540
 
1518 1541
         say(paste0("Column-ranking collection"))
1519
-        ranked <- rankPEPsByCols(peps, rankingset)
1542
+        ranked <- rankingFun(peps, rankingset)
1520 1543
         say(paste0("Computing enrichments"))
1521 1544
 
1522
-        ks <- apply(ranked, 2, function(col) ks.test.2(col[gmd], col[bgset]))
1523
-
1545
+        ks <- apply(ranked, 2, function(col) {
1546
+            inset <- col[gmd]
1547
+            inset <- inset[!is.na(inset)]
1548
+            outset <- col[bgset]
1549
+            outset <- outset[!is.na(outset)]
1550
+            if(length(inset)>1 && length(outset)>1) {
1551
+                res <- ks.test.2(inset, outset, maxCombSize=10^10)
1552
+            } else res <- list(ES=NA, p.value=NA)
1553
+            res
1554
+        })
1555
+        
1524 1556
         PVs <- sapply(ks, get, x="p.value")
1525 1557
         sorter <- order(PVs)
1526 1558
         
... ...
@@ -1607,8 +1639,9 @@ gene2pathways <- function(rp, gene)
1607 1639
 ##     }       
1608 1640
 ## }
1609 1641
 
1610
-gep2pep <- function(geps, sets, parallel=FALSE, pbar=TRUE) {
1611
-
1642
+gep2pep <- function(geps, sets, min_size, max_size, parallel=FALSE,
1643
+                    pbar=TRUE) {
1644
+  
1612 1645
     pathw <- sets
1613 1646
     genemat <- geps
1614 1647
     genes <- rownames(genemat)
... ...
@@ -1631,8 +1664,10 @@ gep2pep <- function(geps, sets, parallel=FALSE, pbar=TRUE) {
1631 1664
         {
1632 1665
             where <- match(set, genes)
1633 1666
             where <- where[!is.na(where)]
1634
-            gsea(where, genematj, FALSE)
1667
+            gsea(where, genematj, min_size, max_size)
1635 1668
         }
1669
+        if(all(is.na(sapply(gres, "get", x="ES"))))
1670
+          say(paste0("All NAs in PEP for profile: ", colnames(genemat)[j]), "warning")
1636 1671
         x[[j]] <- gres
1637 1672
     }
1638 1673
     if(pbar) {
... ...
@@ -1656,13 +1691,15 @@ gep2pep <- function(geps, sets, parallel=FALSE, pbar=TRUE) {
1656 1691
 }
1657 1692
 
1658 1693
 
1659
-gsea <- function(S, ranks_list, check=FALSE, alternative = "two.sided")
1694
+gsea <- function(S, ranks_list, min_size, max_size, alternative = "two.sided")
1660 1695
 {
1661 1696
     S <- S[!(is.na(S))]
1662 1697
     S1 <- ranks_list[S]
1663 1698
     S2 <- ranks_list[-S]
1664 1699
 
1665
-    if(length(S1)<1 || length(S2)<1 || all(is.na(S1)) || all(is.na(S2)))
1700
+    if(length(S1) < min_size || length(S1) > max_size ||
1701
+       length(S2) < min_size ||
1702
+       all(is.na(S1)) || all(is.na(S2)))
1666 1703
         return(list(ES=NA, p=NA))
1667 1704
 
1668 1705
     ks <- ks.test.2(S1, S2, alternative=alternative, maxCombSize=10^10)
... ...
@@ -1692,8 +1729,8 @@ storePEPs <- function(rp, db_id, peps, rawmode_suffix,
1692 1729
         peps$ES <- cbind(curmat$ES, peps$ES[, newpeps, drop=FALSE])
1693 1730
         peps$PV <- cbind(curmat$PV, peps$PV[, newpeps, drop=FALSE])
1694 1731
     }
1695
-    
1696
-    if(!rawmode) {
1732
+
1733
+    if(!rawmode) {        
1697 1734
         say("Storing PEPs to the repository...")
1698 1735
     
1699 1736
         rp$put(peps, db_id,
... ...
@@ -1763,7 +1800,7 @@ ks.test.2 <- function(x, y, ...) {
1763 1800
 }
1764 1801
 
1765 1802
 
1766
-rankPEPsByCols <- function(peps, rankingset="all")
1803
+rankPEPsByCols.SPV <- function(peps, rankingset="all")
1767 1804
 {
1768 1805
     rankPEP <- function(PVs, ESs)
1769 1806
     {
... ...
@@ -1785,8 +1822,23 @@ rankPEPsByCols <- function(peps, rankingset="all")
1785 1822
     return(x)
1786 1823
 }
1787 1824
 
1825
+rankPEPsByCols.NES <- function(peps)
1826
+{
1827
+    ESs <- t(scale(t(peps$ES)))
1828
+    attr(ESs, "scaled:center") <- NULL
1829
+    attr(ESs, "scaled:scale") <- NULL
1830
+    x <- apply(-ESs, 2, rank, ties.method = "random", na.last="keep")
1831
+    return(x)
1832
+}
1833
+
1834
+rankPEPsByCols.ES <- function(peps)
1835
+{
1836
+    x <- apply(-peps$ES, 2, rank, ties.method = "random", na.last="keep")
1837
+    return(x)
1838
+}
1839
+
1788 1840
 
1789
-rankPEPsByRows <- function(peps)
1841
+rankPEPsByRows.ES <- function(peps)
1790 1842
 {
1791 1843
     ESs <- peps[["ES"]]
1792 1844
     x <- t(apply(-ESs, 1, rank, ties.method = "random", na.last="keep"))
... ...
@@ -1865,7 +1917,7 @@ checkGEPsFormat <- function(geps)
1865 1917
     
1866 1918
     if(any(mins != 1) || any(maxs != dims[1]) || any(not_unique))
1867 1919
         say(paste("GEP columns must be ranks. Check",
1868
-                  "that each column is made of numbers from 1",
1920
+                  "that each column is made of integer numbers from 1",
1869 1921
                   "to the number of rows."), "error")
1870 1922
         
1871 1923
 }
... ...
@@ -1952,13 +2004,10 @@ convertFromGSetClass <- function(gsets) {
1952 2004
 .makeCollectionIDs <- function(sets) {
1953 2005
     dbs <- sapply(sets, get, x="category")
1954 2006
     subdbs <- sapply(sets, get, x="subcategory")
1955
-    subdbs[subdbs==""] <- dbs[subdbs==""]
2007
+    w <- which(subdbs=="" | is.na(subdbs))
2008
+    if(length(w)>0)
2009
+      subdbs[w] <- dbs[w]
1956 2010
     db_ids <- paste(dbs, subdbs, sep="_")
1957 2011
     return(db_ids)
1958 2012
 }
1959 2013
 
1960
-.extractWorkingPEPs <- function(rp, coll, fgset, bgset) {
1961
-    ishdf5 <- "#rhdf5" %in% rp$tags(coll)
1962
-
1963
-    
1964
-}
... ...
@@ -5,7 +5,8 @@
5 5
 \title{Build PEPs from GEPs and stores them in the repository.}
6 6
 \usage{
7 7
 buildPEPs(rp, geps, parallel = FALSE, collections = "all",
8
-  replace_existing = FALSE, progress_bar = TRUE)
8
+  replace_existing = FALSE, progress_bar = TRUE, rawmode_id = NULL,
9
+  rawmode_outdir = file.path(rp$root(), "raw"))
9 10
 }
10 11
 \arguments{
11 12
 \item{rp}{A repository created by \code{\link{createRepository}}.}
... ...
@@ -32,6 +33,15 @@ added. Either ways, will throw a warning.}
32 33
 
33 34
 \item{progress_bar}{If set to TRUE (default) will show a progress
34 35
 bar updated after coversion of each column of \code{geps}.}
36
+
37
+\item{rawmode_id}{An integer to be appended to files produced in
38
+raw mode (see details). If set to NULL (default), raw mode is
39
+turned off.}
40
+
41
+\item{rawmode_outdir}{A charater vector specifying the destination
42
+path for files produced in raw mode (by the fault it is
43
+ROOT/raw, where ROOT is the root of the repository). Ignored if
44
+\code{rawmode_id} is NULL.}
35 45
 }
36 46
 \value{
37 47
 Nothing. The computed PEPs will be available in the
... ...
@@ -42,6 +52,18 @@ Given a matrix of ranked lists of genes (GEPs) and a \code{gep2pep}
42 52
 repository, converts GEPs to PEPs and stores the latter in the
43 53
 repository.
44 54
 }
55
+\details{
56
+By deault, output is written to the repository as new
57
+    items named using the collection name. However, it is possible
58
+    to avoid the repository and write the output to regular files
59
+    turning 'raw mode' on through the \code{rawmode_id} and
60
+    \code{rawmode_outdir} parameters. This is particuarly useful
61
+    when dealing with very large corpora of GEPs, and conversions
62
+    are split into independent jobs submitted to a scheduler. At
63
+    the end, the data will need to be reconstructed and put into
64
+    the repository using \code{importFromRawMode} in order to
65
+    perform \code{CondSEA} or \code{PathSEA} analysis.
66
+}
45 67
 \examples{
46 68
 db <- loadSamplePWS()
47 69
 db <- as.CategorizedCollection(db)
... ...
@@ -2,3 +2,4 @@ library(testthat)
2 2
 library(gep2pep)
3 3
 
4 4
 test_check("gep2pep")
5
+test_check("rawMode")
... ...
@@ -1,11 +1,12 @@
1 1
 
2 2
 
3
-## Workflow:
3
+## ## Workflow:
4 4
 ## library(GSEABase)
5 5
 ## library(devtools)
6 6
 ## library(testthat)
7 7
 ## load_all()
8 8
 
9
+loadPEPs <- gep2pep::.loadPEPs
9 10
 
10 11
 dbfolder <- file.path(tempdir(), "gep2pepDB")
11 12
 
... ...
@@ -77,7 +78,8 @@ test_that("new db creation", {
77 78
 
78 79
 context("creation of peps")
79 80
 
80
-suppressMessages(buildPEPs(rp, testgep, progress_bar=FALSE))
81
+suppressMessages(buildPEPs(rp, testgep, progress_bar=FALSE,
82
+                           min_size=3))
81 83
 
82 84
 test_that("build first PEPs", {
83 85
   expect_equal(length(rp$entries()), 8)
... ...
@@ -95,7 +97,7 @@ test_that("build first PEPs", {
95 97
   expect_equal(ncol(rp$get(expected_dbs[1])[[2]]), ncol(testgep))
96 98
   expect_equal(ncol(rp$get(expected_dbs[3])[[1]]), ncol(testgep))
97 99
   expect_equal(ncol(rp$get(expected_dbs[3])[[2]]), ncol(testgep))
98
-  expect_failure(expect_warning(suppressMessages(checkRepository(rp))))  
100
+  expect_failure(expect_warning(suppressMessages(checkRepository(rp))))
99 101
   expect_error(loadESmatrix(rp, "random name"))
100 102
   expect_equal(loadESmatrix(rp, "c3_TFT"), rp$get("c3_TFT")$ES)
101 103
   expect_error(loadPVmatrix(rp, "random name"))
... ...
@@ -103,60 +105,6 @@ test_that("build first PEPs", {
103 105
 })
104 106
 
105 107
 
106
-context("creation of RAW peps")
107
-
108
-suppressMessages(
109
-    buildPEPs(rp, testgep[,1:2], progress_bar=FALSE,
110
-              rawmode_id=1)
111
-)
112
-suppressMessages(
113
-    buildPEPs(rp, testgep[,3:5], progress_bar=FALSE,
114
-              rawmode_id=2)
115
-)
116
-
117
-outfiles1 <- paste0(getCollections(rp), "#1.RDS")
118
-outfiles2 <- paste0(getCollections(rp), "#2.RDS")
119
-outfiles <- c(outfiles1, outfiles2)
120
-outdir <- file.path(rp$root(), "raw")
121
-f1 <- readRDS(paste0(file.path(outdir, outfiles[1])))
122
-
123
-test_that("build hdf5 PEPs", {
124
-    expect_true(all(sapply(outfiles, `%in%`, list.files(outdir))))
125
-    expect_equal(f1$ES[,1], rp$get("c3_TFT")$ES[,1])
126
-    expect_equal(f1$PV[,1], rp$get("c3_TFT")$PV[,1])
127
-    expect_equal(f1$ES[,2], rp$get("c3_TFT")$ES[,2])
128
-    expect_equal(f1$PV[,2], rp$get("c3_TFT")$PV[,2])
129
-})
130
-
131
-
132
-colls <- getCollections(rp)
133
-
134
-oldpep2 <- rp$get(colls[2])
135
-rp$rm(tags="pep", force=T)
136
-importFromRawMode(rp)
137
-
138
-pep2 <- gep2pep::.loadPEPs(rp, colls[2])
139
-
140
-rownames(pep2$ES) <- rownames(pep2$PV) <- rp$get(colls[2])
141
-colnames(pep2$ES) <- colnames(pep2$PV) <- rp$get(colls[2])
142
-
143
-## rownames(pep2$ES)
144
-##  [1] "M7785"  "M6394"  "M18759" "M10635" "M14709" "M4820"  "M7677"  "M11751"
145
-##  [9] "M10105" "M5012" 
146
-## rownames(oldpep2$ES)
147
-##    M7785    M6394   M18759   M10635   M14709    M4820    M7677   M11751 
148
-##  "M7785"  "M6394" "M18759" "M10635" "M14709"  "M4820"  "M7677" "M11751" 
149
-##   M10105    M5012 
150
-## "M10105"  "M5012"     
151
-
152
-test_that("check hdf5 PEPss", {
153
-    expect_true(all(oldpep2$ES==pep2$ES))
154
-    expect_true(all(oldpep2$PV==pep2$PV))
155
-    expect_true(all(rownames(oldpep2$ES) == rownames(pep2$ES)))
156
-    expect_true(all(rownames(oldpep2$PV) == rownames(pep2$PV)))
157
-    expect_true(all(colnames(oldpep2$ES) == colnames(pep2$ES)))
158
-    expect_true(all(colnames(oldpep2$PV) == colnames(pep2$PV)))
159
-})
160 108
 
161 109
 res <- list()
162 110
 for(i in 1:3) {
... ...
@@ -166,29 +114,30 @@ for(i in 1:3) {
166 114
   id <- testpws_old[[testi]]$id
167 115
   tomatch <- intersect(rownames(testgep), set)
168 116
   inset <- testgep[match(tomatch, rownames(testgep)), testj]
169
-  ks <- ks.test.2(inset, (1:nrow(testgep))[-inset], maxCombSize=10^10)
117
+  if(length(tomatch) >= 3) {
118
+      ks <- ks.test.2(inset, (1:nrow(testgep))[-inset], maxCombSize=10^10)
119
+  } else ks <- list(ES=as.numeric(NA), p.value=as.numeric(NA))
170 120
   dbi <- dbs[testi]
171 121
   res[[i]] <- list(id=id, testj=testj, ks=ks, dbi=dbi)
172 122
 }
173
-
174 123
 test_that("KS statistics", {
175 124
   i <- 1
176 125
   id <- res[[i]]$id; testj <- res[[i]]$testj; ks <- res[[i]]$ks; dbi <- res[[i]]$dbi
177
-  expect_equal(rp$get(dbi)$ES[id, testj], ks$ES)
178
-  expect_equal(rp$get(dbi)$PV[id, testj], ks$p.value)
126
+  expect_equal(loadPEPs(rp, dbi)$ES[id, testj], ks$ES)
127
+  expect_equal(loadPEPs(rp, dbi)$PV[id, testj], ks$p.value)
179 128
   i <- 2
180 129
   id <- res[[i]]$id; testj <- res[[i]]$testj; ks <- res[[i]]$ks; dbi <- res[[i]]$dbi
181
-  expect_equal(rp$get(dbi)$ES[id, testj], ks$ES)
182
-  expect_equal(rp$get(dbi)$PV[id, testj], ks$p.value)
130
+  expect_equal(loadPEPs(rp, dbi)$ES[id, testj], ks$ES)
131
+  expect_equal(loadPEPs(rp, dbi)$PV[id, testj], ks$p.value)
183 132
   i <- 3
184 133
   id <- res[[i]]$id; testj <- res[[i]]$testj; ks <- res[[i]]$ks; dbi <- res[[i]]$dbi
185
-  expect_equal(rp$get(dbi)$ES[id, testj], ks$ES)
186
-  expect_equal(rp$get(dbi)$PV[id, testj], ks$p.value)
134
+  expect_equal(loadPEPs(rp, dbi)$ES[id, testj], ks$ES)
135
+  expect_equal(loadPEPs(rp, dbi)$PV[id, testj], ks$p.value)
187 136
 })
188 137
 
189 138
 context("adding existing peps")
190 139
 
191
-oldTFT <- rp$get("c3_TFT")
140
+oldTFT <- loadPEPs(rp, "c3_TFT")
192 141
 test_that("Adding PEPs", {
193 142
     expect_warning(
194 143
         suppressMessages(buildPEPs(rp, testgep[, 1:3], progress_bar=FALSE))
... ...
@@ -196,7 +145,7 @@ test_that("Adding PEPs", {
196 145
     expect_failure(expect_warning(suppressMessages(checkRepository(rp))))    
197 146
 })
198 147
 
199
-untouchedTFT <- rp$get("c3_TFT")
148
+untouchedTFT <- oldTFT
200 149
 
201 150
 subs <- c(2,4,5)
202 151
 smallTFT <- list(ES=oldTFT$ES[, subs],
... ...
@@ -205,7 +154,7 @@ smallTFT <- list(ES=oldTFT$ES[, subs],
205 154
 ## the "perturbagens" item
206 155
 rp$set("c3_TFT", smallTFT)
207 156
 
208
-rebuiltTFT <- rp$get("c3_TFT")
157
+rebuiltTFT <- loadPEPs(rp, "c3_TFT")
209 158
 test_that("Adding PEPs", {
210 159
     expect_warning(
211 160
         suppressMessages(buildPEPs(rp, testgep[, 1:3], progress_bar=FALSE))
... ...
@@ -216,7 +165,6 @@ test_that("Adding PEPs", {
216 165
     expect_warning(suppressMessages(checkRepository(rp)))
217 166
 })
218 167
 
219
-
220 168
 rp$set("c3_TFT", oldTFT)
221 169
 
222 170
 
... ...
@@ -229,8 +177,8 @@ for(i in 1:ncol(testgep))
229 177
     )
230 178
 
231 179
 test_that("adding one by one", {
232
-    expect_true(identical(rp2$get("c3_TFT"), rp$get("c3_TFT")))
233
-    expect_failure(expect_warning(suppressMessages(checkRepository(rp2))))    
180
+    ##expect_true(identical(rp2$get("c3_TFT"), rp$get("c3_TFT")))
181
+    expect_failure(expect_warning(suppressMessages(checkRepository(rp2))))
234 182
 })
235 183
 
236 184
 
... ...
@@ -242,20 +190,22 @@ peps3 <- rp$get(expected_dbs[3])
242 190
 
243 191
 es1 <- peps1$ES
244 192
 es3 <- peps3$ES
245
-RowRanked1 <- rankPEPsByRows(peps1)
246
-RowRanked3 <- rankPEPsByRows(peps3)
193
+RowRanked1 <- rankPEPsByRows.ES(peps1)
194
+nas1 <- which(is.na(RowRanked1[,1]))
195
+RowRanked3 <- rankPEPsByRows.ES(peps3)
196
+nas3 <- which(is.na(RowRanked3[,3]))
247 197
 
248 198
 test_that("Row ranking", {
249
-    expect_true(all(apply(RowRanked1, 1, setequal, 1:5)))
250
-    expect_true(all(apply(RowRanked3, 1, setequal, 1:5)))
199
+    expect_true(all(apply(RowRanked1[-nas1,], 1, setequal, 1:5)))
200
+    expect_true(all(apply(RowRanked3[-nas3,], 1, setequal, 1:5)))
251 201
     expect_equal(RowRanked1[1,1], 5)
252 202
     expect_equal(RowRanked1[4,3], 1)
253 203
     expect_equal(RowRanked3[5,4], 5)
254 204
     expect_equal(RowRanked3[8,3], 1)
255 205
 })
256 206
 
257
-ColRanked1 <- rankPEPsByCols(peps1)
258
-ColRanked3 <- rankPEPsByCols(peps3)
207
+ColRanked1 <- rankPEPsByCols.SPV(peps1)
208
+ColRanked3 <- rankPEPsByCols.SPV(peps3)
259 209
 
260 210
 randj <- sample(ncol(ColRanked3),1)
261 211
 PVs <- peps1$PV[,randj]
... ...
@@ -265,30 +215,44 @@ if(any(ESs<0)) {
265 215
     lastid <- which.min(PVs)
266 216
 } else lastid <- which.max(PVs)
267 217
 
268
-test_that("Column ranking", {
269
-    expect_true(all(apply(ColRanked1, 2, setequal, 1:10)))
270
-    expect_true(all(apply(ColRanked3, 2, setequal, 1:10)))
218
+test_that("Column ranking with SPV", {
219
+    expect_true(all(apply(ColRanked1[-nas1,], 2, setequal, 1:9)))
220
+    expect_true(all(apply(ColRanked3[-nas3,], 2, setequal, 1:9)))
271 221
     expect_equal(ColRanked1[2,1], 1)
272
-    expect_equal(ColRanked1[1,1], 10)
222
+    expect_equal(ColRanked1[1,1], 9)
273 223
     expect_equal(ColRanked3[8,3], 1)
274
-    expect_equal(ColRanked3[10,3], 10)
275
-    expect_equal(ColRanked3[4,5], 10)
276
-    expect_equal(ColRanked1[lastid, randj], 10)    
224
+    expect_equal(ColRanked3[10,3], 9)
225
+    expect_equal(ColRanked3[4,5], 9)
226
+    expect_equal(ColRanked1[lastid, randj], 9)
277 227
 })
278 228
 
279 229
 
280
-context("CondSEA")
230
+ColRanked3 <- rankPEPsByCols.NES(peps3)
231
+manual <- t(scale(t(peps3$ES)))
232
+stopifnot(all(apply(manual[-nas3,],1,sd)-1<10^-15))
233
+manualR <- apply(-manual, 2, rank, na.last="keep")
281 234
 
235
+test_that("Column ranking with NES", {
236
+    expect_true(all(ColRanked3[-nas3,]==manualR[-nas3,]))
237
+    expect_true(all(is.na(manualR[nas3,])))
238
+})
239
+
240
+
241
+context("CondSEA")
282 242
 pgset <- c("(+)_chelidonine",  "(+/_)_catechin")
283 243
 res <- suppressMessages(CondSEA(rp, pgset))
284 244
 randi <- sample(1:length(testpws), 1)
285 245
 pwsid <- testpws_old[[randi]]$id
286 246
 randDB <- dbs[randi]
287
-ranked <- rankPEPsByRows(rp$get(randDB))
247
+ranked <- rankPEPsByRows.ES(rp$get(randDB))
288 248
 inset <- ranked[pwsid, pgset]
289 249
 outset <- ranked[pwsid, setdiff(colnames(ranked), pgset)]
290
-ks <- ks.test.2(inset, outset)
291
-
250
+if(length(inset[!is.na(inset)])>0 &&
251
+   length(outset[!is.na(outset)])>0) {
252
+    ks <- ks.test.2(inset, outset, maxCombSize=10^10)
253
+} else {
254
+    ks <- list(ES=as.numeric(NA), p.value=as.numeric(NA))
255
+}
292 256
 test_that("CondSEA", {
293 257
     expect_equal(getDetails(res, "c3_TFT"), res$details[["c3_TFT"]])   
294 258
     expect_equal(getResults(res, "c3_TFT"), res$CondSEA[["c3_TFT"]])
... ...
@@ -303,33 +267,42 @@ context("PathSEA")
303 267
 
304 268
 db1 <- expected_dbs[1]
305 269
 db3 <- expected_dbs[3]
306
-
307 270
 pws1 <- sapply(testpws[makeCollectionIDs(testpws)==db1][c(2,5,6,9)], setName)
308 271
 pws3 <- sapply(testpws[makeCollectionIDs(testpws)==db3][c(1,3,10)], setName)
309 272
 res <- suppressMessages(PathSEA(rp, testpws[c(pws1, pws3)]))
310 273
 setids1 <- sapply(testpws[pws1], setIdentifier)
311 274
 setids3 <- sapply(testpws[pws3], setIdentifier)
312
-
313 275
 randj1 <- sample(1:ncol(testgep), 1)
314
-ranked <- rankPEPsByCols(rp$get(db1))
276
+ranked <- rankPEPsByCols.SPV(rp$get(db1))
315 277
 peps <- rp$get(db1)
316 278
 inset <- ranked[setids1, randj1]
317 279
 outset <- ranked[setdiff(rownames(ranked), setids1), randj1]
318
-ks1 <- ks.test.2(inset, outset)
319
-
280
+inset <- inset[!is.na(inset)]
281
+outset <- outset[!is.na(outset)]
282
+if(length(inset)>0 &&
283
+   length(outset)>0) {
284
+    ks1 <- ks.test.2(inset, outset, maxCombSize=10^10)
285
+} else {
286
+    ks1 <- list(ES=as.numeric(NA), p.value=as.numeric(NA))
287
+}
320 288
 randj3 <- sample(1:ncol(testgep), 1)
321
-ranked <- rankPEPsByCols(rp$get(db3))
289
+ranked <- rankPEPsByCols.SPV(rp$get(db3))
322 290
 peps <- rp$get(db3)
323 291
 inset <- ranked[setids3, randj3]
324 292
 outset <- ranked[setdiff(rownames(ranked), setids3), randj3]
325
-ks3 <- ks.test.2(inset, outset)
326
-
293
+inset <- inset[!is.na(inset)]
294
+outset <- outset[!is.na(outset)]
295
+if(length(inset)>0 &&
296
+   length(outset)>0) {
297
+    ks3 <- ks.test.2(inset, outset, maxCombSize=10^10)
298
+} else {
299
+    ks3 <- list(ES=as.numeric(NA), p.value=as.numeric(NA))
300
+}
327 301
 name1 <- colnames(testgep)[randj1]
328 302
 name3 <- colnames(testgep)[randj3]
329
-
330 303
 test_that("PathSEA", {
331
-    expect_equal(getDetails(res, "c3_TFT"), res$details[["c3_TFT"]])   
332
-    expect_equal(getResults(res, "c3_TFT"), res$PathSEA[["c3_TFT"]])   
304
+    expect_equal(getDetails(res, "c3_TFT"), res$details[["c3_TFT"]])
305
+    expect_equal(getResults(res, "c3_TFT"), res$PathSEA[["c3_TFT"]])
333 306
     expect_equal(unname(res[["PathSEA"]][[db1]][name1, "ES"]),
334 307
                  ks1$ES)
335 308
     expect_equal(unname(res[["PathSEA"]][[db1]][name1, "PV"]),
... ...
@@ -341,7 +314,6 @@ test_that("PathSEA", {
341 314
 })
342 315
 
343 316
 
344
-
345 317
 ## A gene that is found in at least 3 pathways:
346 318
 gene <- intersect(intersect(geneIds(testpws[[3]]), geneIds(testpws[[4]])),
347 319
                   geneIds(testpws[[7]]))[1]
348 320
new file mode 100644
... ...
@@ -0,0 +1,86 @@
1
+
2
+
3
+## ## Workflow:
4
+## library(GSEABase)
5
+## library(devtools)
6
+## library(testthat)
7
+## load_all()
8
+
9
+loadPEPs <- gep2pep::.loadPEPs
10
+
11
+dbfolder <- file.path(tempdir(), "gep2pepDB")
12
+
13
+clear_test_repo <- function(suffix=NULL) {
14
+    folder <- paste0(dbfolder, suffix)
15
+    if(file.exists(folder))
16
+        unlink(folder, T)
17
+}
18
+
19
+create_test_repo <- function(suffix=NULL) {
20
+    folder <- paste0(dbfolder, suffix)
21
+    clear_test_repo(suffix)
22
+    return(
23
+        suppressMessages(
24
+            createRepository(folder, testpws)
25
+            )
26
+        )    
27
+}
28
+
29
+testgep <- loadSampleGEP()
30
+testpws <- as.CategorizedCollection(
31
+    loadSamplePWS()
32
+)
33
+testpws_old <- gep2pep:::convertFromGSetClass(testpws)
34
+
35
+rp <- create_test_repo()
36
+dbs <- makeCollectionIDs(testpws)
37
+expected_dbs <- c("c3_TFT", "c3_MIR", "c4_CGN")
38
+
39
+
40
+suppressMessages(buildPEPs(rp, testgep, progress_bar=FALSE))
41
+
42
+context("creation of RAW peps")
43
+
44
+suppressMessages(
45
+    buildPEPs(rp, testgep[,1:2], progress_bar=FALSE,
46
+              rawmode_id=1)
47
+)
48
+suppressMessages(
49
+    buildPEPs(rp, testgep[,3:5], progress_bar=FALSE,
50
+              rawmode_id=2)
51
+)
52
+
53
+outfiles1 <- paste0(getCollections(rp), "#1.RDS")
54
+outfiles2 <- paste0(getCollections(rp), "#2.RDS")
55
+outfiles <- c(outfiles1, outfiles2)
56
+outdir <- file.path(rp$root(), "raw")
57
+f1 <- readRDS(paste0(file.path(outdir, outfiles[1])))
58
+
59
+test_that("build hdf5 PEPs", {
60
+    expect_true(all(sapply(outfiles, `%in%`, list.files(outdir))))
61
+    expect_equal(f1$ES[,1], rp$get("c3_TFT")$ES[,1])
62
+    expect_equal(f1$PV[,1], rp$get("c3_TFT")$PV[,1])
63
+    expect_equal(f1$ES[,2], rp$get("c3_TFT")$ES[,2])
64
+    expect_equal(f1$PV[,2], rp$get("c3_TFT")$PV[,2])
65
+})
66
+
67
+
68
+colls <- getCollections(rp)
69
+
70
+oldpep2 <- rp$get(colls[2])
71
+rp$rm(tags="pep", force=T)
72
+importFromRawMode(rp)
73
+
74
+pep2 <- loadPEPs(rp, colls[2])
75
+w <- is.na(pep2$ES[,1])
76
+    
77
+test_that("check hdf5 PEPss", {
78
+    expect_true(all(oldpep2$ES[!w,]==pep2$ES[!w,]))
79
+    expect_true(all(oldpep2$PV[!w,]==pep2$PV[!w,]))
80
+    expect_true(all(is.na(oldpep2$PV[w,])))
81
+    expect_true(all(rownames(oldpep2$ES)==rownames(pep2$ES)))
82
+    expect_true(all(rownames(oldpep2$PV)==rownames(pep2$PV)))
83
+    expect_true(all(colnames(oldpep2$ES)==colnames(pep2$ES)))
84
+    expect_true(all(colnames(oldpep2$PV)==colnames(pep2$PV)))
85
+})
86
+