... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
Package: gep2pep |
2 | 2 |
Type: Package |
3 | 3 |
Title: Creation and Analysis of Pathway Expression Profiles (PEPs) |
4 |
-Version: 0.99.2.3 |
|
4 |
+Version: 0.99.2.6 |
|
5 | 5 |
Date: 2017-09-11 |
6 | 6 |
Author: Francesco Napolitano <franapoli@gmail.com> |
7 | 7 |
Maintainer: Francesco Napolitano <franapoli@gmail.com> |
... | ... |
@@ -342,7 +342,7 @@ importFromRawMode <- function(rp, path=file.path(rp$root(), "raw"), |
342 | 342 |
|
343 | 343 |
say(paste0("Working on collection: ", dbi)) |
344 | 344 |
|
345 |
- say(paste0("Creatiing a repository entry.")) |
|
345 |
+ say(paste0("Creating a repository entry.")) |
|
346 | 346 |
fl <- tempfile() |
347 | 347 |
h5createFile(fl) |
348 | 348 |
rp$put(fl, dbi, asattach=T, tags=c("pep", "#hdf5")) |
... | ... |
@@ -352,13 +352,13 @@ importFromRawMode <- function(rp, path=file.path(rp$root(), "raw"), |
352 | 352 |
x <- readRDS(file.path(path, fname))$ES |
353 | 353 |
Nrow <- nrow(x) |
354 | 354 |
say(paste0("Creating 2 HDF5 dataset of size: ", Nrow, "x", Ncol)) |
355 |
- ## h5createDataset(fl, "ES-PV", c(Nrow*2, Ncol), chunk=c(Nrow*2, Nchunk)) |
|
355 |
+ |
|
356 | 356 |
h5createDataset(fl, "ES", c(Nrow, Ncol), chunk=c(Nrow, Nchunk)) |
357 | 357 |
h5createDataset(fl, "PV", c(Nrow, Ncol), chunk=c(Nrow, Nchunk)) |
358 | 358 |
h5createDataset(fl, "rownames", Nrow, storage.mode="character", size=256, |
359 | 359 |
chunk=Nrow) |
360 | 360 |
h5createDataset(fl, "colnames", Ncol, storage.mode="character", size=256, |
361 |
- chunk=Ncol) |
|
361 |
+ chunk=Nchunk) |
|
362 | 362 |
h5write(rownames(x), fl, "rownames") |
363 | 363 |
|
364 | 364 |
say("Adding chunks...") |
... | ... |
@@ -370,8 +370,7 @@ importFromRawMode <- function(rp, path=file.path(rp$root(), "raw"), |
370 | 370 |
x <- readRDS(ifile) |
371 | 371 |
ifsize <- utils:::format.object_size(file.size(ifile), "auto") |
372 | 372 |
startCol <- (j-1)*Nchunk+1 |
373 |
- ## h5write(rbind(x$ES, x$PV), fl, "ES-PV", start=c(1,startCol), |
|
374 |
- ## createnewfile=F) |
|
373 |
+ |
|
375 | 374 |
h5write(x$ES, fl, "ES", start=c(1,startCol), |
376 | 375 |
createnewfile=F) |
377 | 376 |
h5write(x$PV, fl, "PV", start=c(1,startCol), |
... | ... |
@@ -429,12 +428,19 @@ importFromRawMode <- function(rp, path=file.path(rp$root(), "raw"), |
429 | 428 |
#' ## removing temporary repository |
430 | 429 |
#' unlink(repo_path, TRUE) |
431 | 430 |
#' @export |
432 |
-importMSigDB.xml <- function(fname) { |
|
431 |
+importMSigDB.xml <- function(fname, organism="Homo Sapiens") { |
|
433 | 432 |
|
434 | 433 |
say("Loading gene sets...") |
435 | 434 |
|
436 | 435 |
result = tryCatch({ |
437 | 436 |
sets <- getBroadSets(fname) |
437 |
+ say(paste0("Loaded ", length(sets), " sets.")) |
|
438 |
+ orgs <- sapply(sets, function(s) attributes(s)$organism) |
|
439 |
+ if(organism != "all") { |
|
440 |
+ w <- tolower(orgs) == tolower(organism) |
|
441 |
+ sets <- sets[w] |
|
442 |
+ say(paste0("Selected ", length(sets), " sets for: ", organism)) |
|
443 |
+ } |
|
438 | 444 |
say("Converting gene sets...") |
439 | 445 |
as.CategorizedCollection(sets) |
440 | 446 |
}, error = function(e) { |
... | ... |
@@ -466,18 +472,22 @@ importMSigDB.xml <- function(fname) { |
466 | 472 |
|
467 | 473 |
say("Converting gene sets...") |
468 | 474 |
gs <- list() |
475 |
+ k <- 1 |
|
469 | 476 |
for(i in seq_len(nrow(msigDB))) { |
470 |
- gs[[i]] <- GeneSet(strsplit(msigDB$set[i], ",")[[1]], |
|
471 |
- shortDescription = msigDB$desc[i], |
|
472 |
- longDescription = msigDB$desc_full[i], |
|
473 |
- setName = msigDB$name[i], |
|
474 |
- setIdentifier = msigDB$id[i], |
|
475 |
- organism = msigDB$organism[i], |
|
476 |
- collectionType = CategorizedCollection( |
|
477 |
- category=msigDB$category[i], |
|
478 |
- subCategory=msigDB$subcategory[i] |
|
479 |
- )) |
|
477 |
+ if(tolower(msigDB[[i]]$organism) == organism) { |
|
478 |
+ gs[[i]] <- GeneSet(strsplit(msigDB$set[i], ",")[[1]], |
|
479 |
+ shortDescription = msigDB$desc[i], |
|
480 |
+ longDescription = msigDB$desc_full[i], |
|
481 |
+ setName = msigDB$name[i], |
|
482 |
+ setIdentifier = msigDB$id[i], |
|
483 |
+ organism = msigDB$organism[i], |
|
484 |
+ collectionType = CategorizedCollection( |
|
485 |
+ category=msigDB$category[i], |
|
486 |
+ subCategory=msigDB$subcategory[i] |
|
487 |
+ )) |
|
488 |
+ k <- k+1 |
|
480 | 489 |
} |
490 |
+ } |
|
481 | 491 |
GeneSetCollection(gs) |
482 | 492 |
}) |
483 | 493 |
|
... | ... |
@@ -929,7 +939,9 @@ makeCollectionIDs <- function(sets) { |
929 | 939 |
|
930 | 940 |
dbs <- sapply(sets, get, x="category") |
931 | 941 |
subdbs <- sapply(sets, get, x="subcategory") |
932 |
- subdbs[subdbs==""] <- dbs[subdbs==""] |
|
942 |
+ w <- which(subdbs=="" | is.na(subdbs)) |
|
943 |
+ if(length(w)>0) |
|
944 |
+ subdbs[w] <- dbs[w] |
|
933 | 945 |
db_ids <- paste(dbs, subdbs, sep="_") |
934 | 946 |
return(db_ids) |
935 | 947 |
} |
... | ... |
@@ -1018,7 +1030,8 @@ makeCollectionIDs <- function(sets) { |
1018 | 1030 |
#' unlink(repo_path, TRUE) |
1019 | 1031 |
#' |
1020 | 1032 |
#' @export |
1021 |
-buildPEPs <- function(rp, geps, parallel=FALSE, collections="all", |
|
1033 |
+buildPEPs <- function(rp, geps, min_size=3, max_size=500, |
|
1034 |
+ parallel=FALSE, collections="all", |
|
1022 | 1035 |
replace_existing=FALSE, progress_bar=TRUE, |
1023 | 1036 |
rawmode_id=NULL, |
1024 | 1037 |
rawmode_outdir=file.path(rp$root(), "raw")) |
... | ... |
@@ -1046,15 +1059,19 @@ buildPEPs <- function(rp, geps, parallel=FALSE, collections="all", |
1046 | 1059 |
|
1047 | 1060 |
if(length(oldpeps > 0)) { |
1048 | 1061 |
if(rawmode) { |
1049 |
- say(paste0("Existing PEPs found, ", |
|
1062 |
+ say(paste0(length(oldpeps), |
|
1063 |
+ " existing PEPs found, ", |
|
1050 | 1064 |
"but this will be ignored in rawmode: ", |
1051 | 1065 |
paste(oldpeps, collapse=", "))) |
1052 | 1066 |
newpeps <- colnames(geps) |
1053 | 1067 |
} else { |
1054 |
- msg <- paste0("Existing PEPs will be replaced: ", |
|
1068 |
+ msg <- paste0(length(oldpeps), |
|
1069 |
+ " existing PEPs will be skipped: ", |
|
1055 | 1070 |
paste(oldpeps, collapse=", ")) |
1056 |
- if(!replace_existing) |
|
1057 |
- msg <- gsub("replaced", "skipped", msg) |
|
1071 |
+ if(replace_existing) { |
|
1072 |
+ msg <- gsub("skipped", "replaced", msg) |
|
1073 |
+ newpeps <- colnames(geps) |
|
1074 |
+ } |
|
1058 | 1075 |
say(msg, type="warning") |
1059 | 1076 |
} |
1060 | 1077 |
} |
... | ... |
@@ -1062,7 +1079,8 @@ buildPEPs <- function(rp, geps, parallel=FALSE, collections="all", |
1062 | 1079 |
if(length(newpeps) > 0) { |
1063 | 1080 |
gepsi <- geps[, newpeps, drop=FALSE] |
1064 | 1081 |
thisdb <- .loadCollection(rp, dbs[i]) |
1065 |
- peps <- gep2pep(gepsi, thisdb, parallel, progress_bar) |
|
1082 |
+ peps <- gep2pep(gepsi, thisdb, min_size, max_size, |
|
1083 |
+ parallel, progress_bar) |
|
1066 | 1084 |
storePEPs(rp, dbs[i], peps, rawmode_id, |
1067 | 1085 |
rawmode_outdir) |
1068 | 1086 |
} |
... | ... |
@@ -1229,11 +1247,14 @@ getDetails <- function(analysis, collection) |
1229 | 1247 |
if(missing(subset)) |
1230 | 1248 |
subset <- NULL |
1231 | 1249 |
fname <- rp$get(coll) |
1232 |
- data <- h5read(fname, "ES-PV", index=list(NULL, subset)) |
|
1233 | 1250 |
peps <- list( |
1234 |
- ES = data[1:(nrow(data)/2),], |
|
1235 |
- PV = data[(nrow(data)/2 + 1):nrow(data),] |
|
1236 |
- ) |
|
1251 |
+ ES = h5read(fname, "ES", index=list(NULL, subset)), |
|
1252 |
+ PV = h5read(fname, "PV", index=list(NULL, subset)) |
|
1253 |
+ ) |
|
1254 |
+ rownames(peps$ES) <- rownames(peps$PV) <- |
|
1255 |
+ h5read(fname, "rownames") |
|
1256 |
+ colnames(peps$ES) <- colnames(peps$PV) <- |
|
1257 |
+ h5read(fname, "colnames", index=list(subset)) |
|
1237 | 1258 |
} else { |
1238 | 1259 |
peps <- list( |
1239 | 1260 |
ES = rp$get(coll)$ES[, subset, drop=F], |
... | ... |
@@ -1307,7 +1328,7 @@ getDetails <- function(analysis, collection) |
1307 | 1328 |
#' |
1308 | 1329 |
#' @export |
1309 | 1330 |
CondSEA <- function(rp_peps, pgset, bgset="all", collections="all", |
1310 |
- details=TRUE) |
|
1331 |
+ details=TRUE, rankingFun = rankPEPsByRows.ES) |
|
1311 | 1332 |
{ |
1312 | 1333 |
dbs <- collections |
1313 | 1334 |
if(length(dbs) == 1 && dbs=="all") { |
... | ... |
@@ -1326,7 +1347,7 @@ CondSEA <- function(rp_peps, pgset, bgset="all", collections="all", |
1326 | 1347 |
for(i in seq_along(dbs)) { |
1327 | 1348 |
say(paste0("Working on collection: ", dbs[i])) |
1328 | 1349 |
|
1329 |
- allperts <- .loadPerts(rp, dbs[i]) |
|
1350 |
+ allperts <- .loadPerts(rp_peps, dbs[i]) |
|
1330 | 1351 |
|
1331 | 1352 |
if(length(bgset) == 1 && bgset=="all") |
1332 | 1353 |
bgset <- allperts |
... | ... |
@@ -1346,7 +1367,7 @@ CondSEA <- function(rp_peps, pgset, bgset="all", collections="all", |
1346 | 1367 |
|
1347 | 1368 |
peps <- .loadPEPs(rp_peps, dbs[i], rankingset) |
1348 | 1369 |
say(paste0("Row-ranking collection")) |
1349 |
- ranked <- rankPEPsByRows(peps) |
|
1370 |
+ ranked <- rankingFun(peps) |
|
1350 | 1371 |
say(paste0("Computing enrichments")) |
1351 | 1372 |
|
1352 | 1373 |
ks <- apply(ranked, 1, function(row) { |
... | ... |
@@ -1453,7 +1474,7 @@ CondSEA <- function(rp_peps, pgset, bgset="all", collections="all", |
1453 | 1474 |
#' |
1454 | 1475 |
#' @export |
1455 | 1476 |
PathSEA <- function(rp_peps, pathways, bgsets="all", collections="all", |
1456 |
- details=TRUE) |
|
1477 |
+ details=TRUE, rankingFun=rankPEPsByCols.SPV) |
|
1457 | 1478 |
{ |
1458 | 1479 |
checkSets(rp_peps, pathways) |
1459 | 1480 |
|
... | ... |
@@ -1461,10 +1482,6 @@ PathSEA <- function(rp_peps, pathways, bgsets="all", collections="all", |
1461 | 1482 |
|
1462 | 1483 |
pathways <- pwList2pwStruct(pathways) |
1463 | 1484 |
|
1464 |
- for(i in seq_along(pathways)) |
|
1465 |
- if(!rp_peps$has(names(pathways)[i])) |
|
1466 |
- say("Cold not find PEPs: ", "error", names(pathways)[i]) |
|
1467 |
- |
|
1468 | 1485 |
if(length(bgsets)==1 && bgsets != "all") { |
1469 | 1486 |
checkSets(bgsets) |
1470 | 1487 |
bgsets <- pwList2pwStruct(bgsets) |
... | ... |
@@ -1481,7 +1498,13 @@ PathSEA <- function(rp_peps, pathways, bgsets="all", collections="all", |
1481 | 1498 |
say("The following collections are not in the repository:", |
1482 | 1499 |
"error", offcols) |
1483 | 1500 |
} else collections <- getCollections(rp_peps) |
1484 |
- |
|
1501 |
+ |
|
1502 |
+ for(i in seq_along(pathways)) { |
|
1503 |
+ dbi <- names(pathways)[i] |
|
1504 |
+ if(dbi %in% collections && !rp_peps$has(dbi)) |
|
1505 |
+ say("Could not find PEPs: ", "error", names(pathways)[i]) |
|
1506 |
+ } |
|
1507 |
+ |
|
1485 | 1508 |
collections <- intersect(names(pathways), collections) |
1486 | 1509 |
|
1487 | 1510 |
if(length(setdiff(names(pathways), collections)>1)) { |
... | ... |
@@ -1509,18 +1532,27 @@ PathSEA <- function(rp_peps, pathways, bgsets="all", collections="all", |
1509 | 1532 |
say("Common pathway sets removed from bgset") |
1510 | 1533 |
} |
1511 | 1534 |
rankingset <- c(gmd, bgset) |
1512 |
- peps <- .loadPEPs(rp_peps$get, collections[i]) |
|
1535 |
+ peps <- .loadPEPs(rp_peps, collections[i]) |
|
1513 | 1536 |
notok <- rankingset[rankingset %in% rownames(peps)] |
1514 | 1537 |
if(length(notok)>0) |
1515 | 1538 |
say(paste0("Pathway set ids not found in ", collections[i], ": ", |
1516 | 1539 |
paste(notok, collapse=", ")), "error") |
1517 | 1540 |
|
1518 | 1541 |
say(paste0("Column-ranking collection")) |
1519 |
- ranked <- rankPEPsByCols(peps, rankingset) |
|
1542 |
+ ranked <- rankingFun(peps, rankingset) |
|
1520 | 1543 |
say(paste0("Computing enrichments")) |
1521 | 1544 |
|
1522 |
- ks <- apply(ranked, 2, function(col) ks.test.2(col[gmd], col[bgset])) |
|
1523 |
- |
|
1545 |
+ ks <- apply(ranked, 2, function(col) { |
|
1546 |
+ inset <- col[gmd] |
|
1547 |
+ inset <- inset[!is.na(inset)] |
|
1548 |
+ outset <- col[bgset] |
|
1549 |
+ outset <- outset[!is.na(outset)] |
|
1550 |
+ if(length(inset)>1 && length(outset)>1) { |
|
1551 |
+ res <- ks.test.2(inset, outset, maxCombSize=10^10) |
|
1552 |
+ } else res <- list(ES=NA, p.value=NA) |
|
1553 |
+ res |
|
1554 |
+ }) |
|
1555 |
+ |
|
1524 | 1556 |
PVs <- sapply(ks, get, x="p.value") |
1525 | 1557 |
sorter <- order(PVs) |
1526 | 1558 |
|
... | ... |
@@ -1607,8 +1639,9 @@ gene2pathways <- function(rp, gene) |
1607 | 1639 |
## } |
1608 | 1640 |
## } |
1609 | 1641 |
|
1610 |
-gep2pep <- function(geps, sets, parallel=FALSE, pbar=TRUE) { |
|
1611 |
- |
|
1642 |
+gep2pep <- function(geps, sets, min_size, max_size, parallel=FALSE, |
|
1643 |
+ pbar=TRUE) { |
|
1644 |
+ |
|
1612 | 1645 |
pathw <- sets |
1613 | 1646 |
genemat <- geps |
1614 | 1647 |
genes <- rownames(genemat) |
... | ... |
@@ -1631,8 +1664,10 @@ gep2pep <- function(geps, sets, parallel=FALSE, pbar=TRUE) { |
1631 | 1664 |
{ |
1632 | 1665 |
where <- match(set, genes) |
1633 | 1666 |
where <- where[!is.na(where)] |
1634 |
- gsea(where, genematj, FALSE) |
|
1667 |
+ gsea(where, genematj, min_size, max_size) |
|
1635 | 1668 |
} |
1669 |
+ if(all(is.na(sapply(gres, "get", x="ES")))) |
|
1670 |
+ say(paste0("All NAs in PEP for profile: ", colnames(genemat)[j]), "warning") |
|
1636 | 1671 |
x[[j]] <- gres |
1637 | 1672 |
} |
1638 | 1673 |
if(pbar) { |
... | ... |
@@ -1656,13 +1691,15 @@ gep2pep <- function(geps, sets, parallel=FALSE, pbar=TRUE) { |
1656 | 1691 |
} |
1657 | 1692 |
|
1658 | 1693 |
|
1659 |
-gsea <- function(S, ranks_list, check=FALSE, alternative = "two.sided") |
|
1694 |
+gsea <- function(S, ranks_list, min_size, max_size, alternative = "two.sided") |
|
1660 | 1695 |
{ |
1661 | 1696 |
S <- S[!(is.na(S))] |
1662 | 1697 |
S1 <- ranks_list[S] |
1663 | 1698 |
S2 <- ranks_list[-S] |
1664 | 1699 |
|
1665 |
- if(length(S1)<1 || length(S2)<1 || all(is.na(S1)) || all(is.na(S2))) |
|
1700 |
+ if(length(S1) < min_size || length(S1) > max_size || |
|
1701 |
+ length(S2) < min_size || |
|
1702 |
+ all(is.na(S1)) || all(is.na(S2))) |
|
1666 | 1703 |
return(list(ES=NA, p=NA)) |
1667 | 1704 |
|
1668 | 1705 |
ks <- ks.test.2(S1, S2, alternative=alternative, maxCombSize=10^10) |
... | ... |
@@ -1692,8 +1729,8 @@ storePEPs <- function(rp, db_id, peps, rawmode_suffix, |
1692 | 1729 |
peps$ES <- cbind(curmat$ES, peps$ES[, newpeps, drop=FALSE]) |
1693 | 1730 |
peps$PV <- cbind(curmat$PV, peps$PV[, newpeps, drop=FALSE]) |
1694 | 1731 |
} |
1695 |
- |
|
1696 |
- if(!rawmode) { |
|
1732 |
+ |
|
1733 |
+ if(!rawmode) { |
|
1697 | 1734 |
say("Storing PEPs to the repository...") |
1698 | 1735 |
|
1699 | 1736 |
rp$put(peps, db_id, |
... | ... |
@@ -1763,7 +1800,7 @@ ks.test.2 <- function(x, y, ...) { |
1763 | 1800 |
} |
1764 | 1801 |
|
1765 | 1802 |
|
1766 |
-rankPEPsByCols <- function(peps, rankingset="all") |
|
1803 |
+rankPEPsByCols.SPV <- function(peps, rankingset="all") |
|
1767 | 1804 |
{ |
1768 | 1805 |
rankPEP <- function(PVs, ESs) |
1769 | 1806 |
{ |
... | ... |
@@ -1785,8 +1822,23 @@ rankPEPsByCols <- function(peps, rankingset="all") |
1785 | 1822 |
return(x) |
1786 | 1823 |
} |
1787 | 1824 |
|
1825 |
+rankPEPsByCols.NES <- function(peps) |
|
1826 |
+{ |
|
1827 |
+ ESs <- t(scale(t(peps$ES))) |
|
1828 |
+ attr(ESs, "scaled:center") <- NULL |
|
1829 |
+ attr(ESs, "scaled:scale") <- NULL |
|
1830 |
+ x <- apply(-ESs, 2, rank, ties.method = "random", na.last="keep") |
|
1831 |
+ return(x) |
|
1832 |
+} |
|
1833 |
+ |
|
1834 |
+rankPEPsByCols.ES <- function(peps) |
|
1835 |
+{ |
|
1836 |
+ x <- apply(-peps$ES, 2, rank, ties.method = "random", na.last="keep") |
|
1837 |
+ return(x) |
|
1838 |
+} |
|
1839 |
+ |
|
1788 | 1840 |
|
1789 |
-rankPEPsByRows <- function(peps) |
|
1841 |
+rankPEPsByRows.ES <- function(peps) |
|
1790 | 1842 |
{ |
1791 | 1843 |
ESs <- peps[["ES"]] |
1792 | 1844 |
x <- t(apply(-ESs, 1, rank, ties.method = "random", na.last="keep")) |
... | ... |
@@ -1865,7 +1917,7 @@ checkGEPsFormat <- function(geps) |
1865 | 1917 |
|
1866 | 1918 |
if(any(mins != 1) || any(maxs != dims[1]) || any(not_unique)) |
1867 | 1919 |
say(paste("GEP columns must be ranks. Check", |
1868 |
- "that each column is made of numbers from 1", |
|
1920 |
+ "that each column is made of integer numbers from 1", |
|
1869 | 1921 |
"to the number of rows."), "error") |
1870 | 1922 |
|
1871 | 1923 |
} |
... | ... |
@@ -1952,13 +2004,10 @@ convertFromGSetClass <- function(gsets) { |
1952 | 2004 |
.makeCollectionIDs <- function(sets) { |
1953 | 2005 |
dbs <- sapply(sets, get, x="category") |
1954 | 2006 |
subdbs <- sapply(sets, get, x="subcategory") |
1955 |
- subdbs[subdbs==""] <- dbs[subdbs==""] |
|
2007 |
+ w <- which(subdbs=="" | is.na(subdbs)) |
|
2008 |
+ if(length(w)>0) |
|
2009 |
+ subdbs[w] <- dbs[w] |
|
1956 | 2010 |
db_ids <- paste(dbs, subdbs, sep="_") |
1957 | 2011 |
return(db_ids) |
1958 | 2012 |
} |
1959 | 2013 |
|
1960 |
-.extractWorkingPEPs <- function(rp, coll, fgset, bgset) { |
|
1961 |
- ishdf5 <- "#rhdf5" %in% rp$tags(coll) |
|
1962 |
- |
|
1963 |
- |
|
1964 |
-} |
... | ... |
@@ -5,7 +5,8 @@ |
5 | 5 |
\title{Build PEPs from GEPs and stores them in the repository.} |
6 | 6 |
\usage{ |
7 | 7 |
buildPEPs(rp, geps, parallel = FALSE, collections = "all", |
8 |
- replace_existing = FALSE, progress_bar = TRUE) |
|
8 |
+ replace_existing = FALSE, progress_bar = TRUE, rawmode_id = NULL, |
|
9 |
+ rawmode_outdir = file.path(rp$root(), "raw")) |
|
9 | 10 |
} |
10 | 11 |
\arguments{ |
11 | 12 |
\item{rp}{A repository created by \code{\link{createRepository}}.} |
... | ... |
@@ -32,6 +33,15 @@ added. Either ways, will throw a warning.} |
32 | 33 |
|
33 | 34 |
\item{progress_bar}{If set to TRUE (default) will show a progress |
34 | 35 |
bar updated after coversion of each column of \code{geps}.} |
36 |
+ |
|
37 |
+\item{rawmode_id}{An integer to be appended to files produced in |
|
38 |
+raw mode (see details). If set to NULL (default), raw mode is |
|
39 |
+turned off.} |
|
40 |
+ |
|
41 |
+\item{rawmode_outdir}{A charater vector specifying the destination |
|
42 |
+path for files produced in raw mode (by the fault it is |
|
43 |
+ROOT/raw, where ROOT is the root of the repository). Ignored if |
|
44 |
+\code{rawmode_id} is NULL.} |
|
35 | 45 |
} |
36 | 46 |
\value{ |
37 | 47 |
Nothing. The computed PEPs will be available in the |
... | ... |
@@ -42,6 +52,18 @@ Given a matrix of ranked lists of genes (GEPs) and a \code{gep2pep} |
42 | 52 |
repository, converts GEPs to PEPs and stores the latter in the |
43 | 53 |
repository. |
44 | 54 |
} |
55 |
+\details{ |
|
56 |
+By deault, output is written to the repository as new |
|
57 |
+ items named using the collection name. However, it is possible |
|
58 |
+ to avoid the repository and write the output to regular files |
|
59 |
+ turning 'raw mode' on through the \code{rawmode_id} and |
|
60 |
+ \code{rawmode_outdir} parameters. This is particuarly useful |
|
61 |
+ when dealing with very large corpora of GEPs, and conversions |
|
62 |
+ are split into independent jobs submitted to a scheduler. At |
|
63 |
+ the end, the data will need to be reconstructed and put into |
|
64 |
+ the repository using \code{importFromRawMode} in order to |
|
65 |
+ perform \code{CondSEA} or \code{PathSEA} analysis. |
|
66 |
+} |
|
45 | 67 |
\examples{ |
46 | 68 |
db <- loadSamplePWS() |
47 | 69 |
db <- as.CategorizedCollection(db) |
... | ... |
@@ -1,11 +1,12 @@ |
1 | 1 |
|
2 | 2 |
|
3 |
-## Workflow: |
|
3 |
+## ## Workflow: |
|
4 | 4 |
## library(GSEABase) |
5 | 5 |
## library(devtools) |
6 | 6 |
## library(testthat) |
7 | 7 |
## load_all() |
8 | 8 |
|
9 |
+loadPEPs <- gep2pep::.loadPEPs |
|
9 | 10 |
|
10 | 11 |
dbfolder <- file.path(tempdir(), "gep2pepDB") |
11 | 12 |
|
... | ... |
@@ -77,7 +78,8 @@ test_that("new db creation", { |
77 | 78 |
|
78 | 79 |
context("creation of peps") |
79 | 80 |
|
80 |
-suppressMessages(buildPEPs(rp, testgep, progress_bar=FALSE)) |
|
81 |
+suppressMessages(buildPEPs(rp, testgep, progress_bar=FALSE, |
|
82 |
+ min_size=3)) |
|
81 | 83 |
|
82 | 84 |
test_that("build first PEPs", { |
83 | 85 |
expect_equal(length(rp$entries()), 8) |
... | ... |
@@ -95,7 +97,7 @@ test_that("build first PEPs", { |
95 | 97 |
expect_equal(ncol(rp$get(expected_dbs[1])[[2]]), ncol(testgep)) |
96 | 98 |
expect_equal(ncol(rp$get(expected_dbs[3])[[1]]), ncol(testgep)) |
97 | 99 |
expect_equal(ncol(rp$get(expected_dbs[3])[[2]]), ncol(testgep)) |
98 |
- expect_failure(expect_warning(suppressMessages(checkRepository(rp)))) |
|
100 |
+ expect_failure(expect_warning(suppressMessages(checkRepository(rp)))) |
|
99 | 101 |
expect_error(loadESmatrix(rp, "random name")) |
100 | 102 |
expect_equal(loadESmatrix(rp, "c3_TFT"), rp$get("c3_TFT")$ES) |
101 | 103 |
expect_error(loadPVmatrix(rp, "random name")) |
... | ... |
@@ -103,60 +105,6 @@ test_that("build first PEPs", { |
103 | 105 |
}) |
104 | 106 |
|
105 | 107 |
|
106 |
-context("creation of RAW peps") |
|
107 |
- |
|
108 |
-suppressMessages( |
|
109 |
- buildPEPs(rp, testgep[,1:2], progress_bar=FALSE, |
|
110 |
- rawmode_id=1) |
|
111 |
-) |
|
112 |
-suppressMessages( |
|
113 |
- buildPEPs(rp, testgep[,3:5], progress_bar=FALSE, |
|
114 |
- rawmode_id=2) |
|
115 |
-) |
|
116 |
- |
|
117 |
-outfiles1 <- paste0(getCollections(rp), "#1.RDS") |
|
118 |
-outfiles2 <- paste0(getCollections(rp), "#2.RDS") |
|
119 |
-outfiles <- c(outfiles1, outfiles2) |
|
120 |
-outdir <- file.path(rp$root(), "raw") |
|
121 |
-f1 <- readRDS(paste0(file.path(outdir, outfiles[1]))) |
|
122 |
- |
|
123 |
-test_that("build hdf5 PEPs", { |
|
124 |
- expect_true(all(sapply(outfiles, `%in%`, list.files(outdir)))) |
|
125 |
- expect_equal(f1$ES[,1], rp$get("c3_TFT")$ES[,1]) |
|
126 |
- expect_equal(f1$PV[,1], rp$get("c3_TFT")$PV[,1]) |
|
127 |
- expect_equal(f1$ES[,2], rp$get("c3_TFT")$ES[,2]) |
|
128 |
- expect_equal(f1$PV[,2], rp$get("c3_TFT")$PV[,2]) |
|
129 |
-}) |
|
130 |
- |
|
131 |
- |
|
132 |
-colls <- getCollections(rp) |
|
133 |
- |
|
134 |
-oldpep2 <- rp$get(colls[2]) |
|
135 |
-rp$rm(tags="pep", force=T) |
|
136 |
-importFromRawMode(rp) |
|
137 |
- |
|
138 |
-pep2 <- gep2pep::.loadPEPs(rp, colls[2]) |
|
139 |
- |
|
140 |
-rownames(pep2$ES) <- rownames(pep2$PV) <- rp$get(colls[2]) |
|
141 |
-colnames(pep2$ES) <- colnames(pep2$PV) <- rp$get(colls[2]) |
|
142 |
- |
|
143 |
-## rownames(pep2$ES) |
|
144 |
-## [1] "M7785" "M6394" "M18759" "M10635" "M14709" "M4820" "M7677" "M11751" |
|
145 |
-## [9] "M10105" "M5012" |
|
146 |
-## rownames(oldpep2$ES) |
|
147 |
-## M7785 M6394 M18759 M10635 M14709 M4820 M7677 M11751 |
|
148 |
-## "M7785" "M6394" "M18759" "M10635" "M14709" "M4820" "M7677" "M11751" |
|
149 |
-## M10105 M5012 |
|
150 |
-## "M10105" "M5012" |
|
151 |
- |
|
152 |
-test_that("check hdf5 PEPss", { |
|
153 |
- expect_true(all(oldpep2$ES==pep2$ES)) |
|
154 |
- expect_true(all(oldpep2$PV==pep2$PV)) |
|
155 |
- expect_true(all(rownames(oldpep2$ES) == rownames(pep2$ES))) |
|
156 |
- expect_true(all(rownames(oldpep2$PV) == rownames(pep2$PV))) |
|
157 |
- expect_true(all(colnames(oldpep2$ES) == colnames(pep2$ES))) |
|
158 |
- expect_true(all(colnames(oldpep2$PV) == colnames(pep2$PV))) |
|
159 |
-}) |
|
160 | 108 |
|
161 | 109 |
res <- list() |
162 | 110 |
for(i in 1:3) { |
... | ... |
@@ -166,29 +114,30 @@ for(i in 1:3) { |
166 | 114 |
id <- testpws_old[[testi]]$id |
167 | 115 |
tomatch <- intersect(rownames(testgep), set) |
168 | 116 |
inset <- testgep[match(tomatch, rownames(testgep)), testj] |
169 |
- ks <- ks.test.2(inset, (1:nrow(testgep))[-inset], maxCombSize=10^10) |
|
117 |
+ if(length(tomatch) >= 3) { |
|
118 |
+ ks <- ks.test.2(inset, (1:nrow(testgep))[-inset], maxCombSize=10^10) |
|
119 |
+ } else ks <- list(ES=as.numeric(NA), p.value=as.numeric(NA)) |
|
170 | 120 |
dbi <- dbs[testi] |
171 | 121 |
res[[i]] <- list(id=id, testj=testj, ks=ks, dbi=dbi) |
172 | 122 |
} |
173 |
- |
|
174 | 123 |
test_that("KS statistics", { |
175 | 124 |
i <- 1 |
176 | 125 |
id <- res[[i]]$id; testj <- res[[i]]$testj; ks <- res[[i]]$ks; dbi <- res[[i]]$dbi |
177 |
- expect_equal(rp$get(dbi)$ES[id, testj], ks$ES) |
|
178 |
- expect_equal(rp$get(dbi)$PV[id, testj], ks$p.value) |
|
126 |
+ expect_equal(loadPEPs(rp, dbi)$ES[id, testj], ks$ES) |
|
127 |
+ expect_equal(loadPEPs(rp, dbi)$PV[id, testj], ks$p.value) |
|
179 | 128 |
i <- 2 |
180 | 129 |
id <- res[[i]]$id; testj <- res[[i]]$testj; ks <- res[[i]]$ks; dbi <- res[[i]]$dbi |
181 |
- expect_equal(rp$get(dbi)$ES[id, testj], ks$ES) |
|
182 |
- expect_equal(rp$get(dbi)$PV[id, testj], ks$p.value) |
|
130 |
+ expect_equal(loadPEPs(rp, dbi)$ES[id, testj], ks$ES) |
|
131 |
+ expect_equal(loadPEPs(rp, dbi)$PV[id, testj], ks$p.value) |
|
183 | 132 |
i <- 3 |
184 | 133 |
id <- res[[i]]$id; testj <- res[[i]]$testj; ks <- res[[i]]$ks; dbi <- res[[i]]$dbi |
185 |
- expect_equal(rp$get(dbi)$ES[id, testj], ks$ES) |
|
186 |
- expect_equal(rp$get(dbi)$PV[id, testj], ks$p.value) |
|
134 |
+ expect_equal(loadPEPs(rp, dbi)$ES[id, testj], ks$ES) |
|
135 |
+ expect_equal(loadPEPs(rp, dbi)$PV[id, testj], ks$p.value) |
|
187 | 136 |
}) |
188 | 137 |
|
189 | 138 |
context("adding existing peps") |
190 | 139 |
|
191 |
-oldTFT <- rp$get("c3_TFT") |
|
140 |
+oldTFT <- loadPEPs(rp, "c3_TFT") |
|
192 | 141 |
test_that("Adding PEPs", { |
193 | 142 |
expect_warning( |
194 | 143 |
suppressMessages(buildPEPs(rp, testgep[, 1:3], progress_bar=FALSE)) |
... | ... |
@@ -196,7 +145,7 @@ test_that("Adding PEPs", { |
196 | 145 |
expect_failure(expect_warning(suppressMessages(checkRepository(rp)))) |
197 | 146 |
}) |
198 | 147 |
|
199 |
-untouchedTFT <- rp$get("c3_TFT") |
|
148 |
+untouchedTFT <- oldTFT |
|
200 | 149 |
|
201 | 150 |
subs <- c(2,4,5) |
202 | 151 |
smallTFT <- list(ES=oldTFT$ES[, subs], |
... | ... |
@@ -205,7 +154,7 @@ smallTFT <- list(ES=oldTFT$ES[, subs], |
205 | 154 |
## the "perturbagens" item |
206 | 155 |
rp$set("c3_TFT", smallTFT) |
207 | 156 |
|
208 |
-rebuiltTFT <- rp$get("c3_TFT") |
|
157 |
+rebuiltTFT <- loadPEPs(rp, "c3_TFT") |
|
209 | 158 |
test_that("Adding PEPs", { |
210 | 159 |
expect_warning( |
211 | 160 |
suppressMessages(buildPEPs(rp, testgep[, 1:3], progress_bar=FALSE)) |
... | ... |
@@ -216,7 +165,6 @@ test_that("Adding PEPs", { |
216 | 165 |
expect_warning(suppressMessages(checkRepository(rp))) |
217 | 166 |
}) |
218 | 167 |
|
219 |
- |
|
220 | 168 |
rp$set("c3_TFT", oldTFT) |
221 | 169 |
|
222 | 170 |
|
... | ... |
@@ -229,8 +177,8 @@ for(i in 1:ncol(testgep)) |
229 | 177 |
) |
230 | 178 |
|
231 | 179 |
test_that("adding one by one", { |
232 |
- expect_true(identical(rp2$get("c3_TFT"), rp$get("c3_TFT"))) |
|
233 |
- expect_failure(expect_warning(suppressMessages(checkRepository(rp2)))) |
|
180 |
+ ##expect_true(identical(rp2$get("c3_TFT"), rp$get("c3_TFT"))) |
|
181 |
+ expect_failure(expect_warning(suppressMessages(checkRepository(rp2)))) |
|
234 | 182 |
}) |
235 | 183 |
|
236 | 184 |
|
... | ... |
@@ -242,20 +190,22 @@ peps3 <- rp$get(expected_dbs[3]) |
242 | 190 |
|
243 | 191 |
es1 <- peps1$ES |
244 | 192 |
es3 <- peps3$ES |
245 |
-RowRanked1 <- rankPEPsByRows(peps1) |
|
246 |
-RowRanked3 <- rankPEPsByRows(peps3) |
|
193 |
+RowRanked1 <- rankPEPsByRows.ES(peps1) |
|
194 |
+nas1 <- which(is.na(RowRanked1[,1])) |
|
195 |
+RowRanked3 <- rankPEPsByRows.ES(peps3) |
|
196 |
+nas3 <- which(is.na(RowRanked3[,3])) |
|
247 | 197 |
|
248 | 198 |
test_that("Row ranking", { |
249 |
- expect_true(all(apply(RowRanked1, 1, setequal, 1:5))) |
|
250 |
- expect_true(all(apply(RowRanked3, 1, setequal, 1:5))) |
|
199 |
+ expect_true(all(apply(RowRanked1[-nas1,], 1, setequal, 1:5))) |
|
200 |
+ expect_true(all(apply(RowRanked3[-nas3,], 1, setequal, 1:5))) |
|
251 | 201 |
expect_equal(RowRanked1[1,1], 5) |
252 | 202 |
expect_equal(RowRanked1[4,3], 1) |
253 | 203 |
expect_equal(RowRanked3[5,4], 5) |
254 | 204 |
expect_equal(RowRanked3[8,3], 1) |
255 | 205 |
}) |
256 | 206 |
|
257 |
-ColRanked1 <- rankPEPsByCols(peps1) |
|
258 |
-ColRanked3 <- rankPEPsByCols(peps3) |
|
207 |
+ColRanked1 <- rankPEPsByCols.SPV(peps1) |
|
208 |
+ColRanked3 <- rankPEPsByCols.SPV(peps3) |
|
259 | 209 |
|
260 | 210 |
randj <- sample(ncol(ColRanked3),1) |
261 | 211 |
PVs <- peps1$PV[,randj] |
... | ... |
@@ -265,30 +215,44 @@ if(any(ESs<0)) { |
265 | 215 |
lastid <- which.min(PVs) |
266 | 216 |
} else lastid <- which.max(PVs) |
267 | 217 |
|
268 |
-test_that("Column ranking", { |
|
269 |
- expect_true(all(apply(ColRanked1, 2, setequal, 1:10))) |
|
270 |
- expect_true(all(apply(ColRanked3, 2, setequal, 1:10))) |
|
218 |
+test_that("Column ranking with SPV", { |
|
219 |
+ expect_true(all(apply(ColRanked1[-nas1,], 2, setequal, 1:9))) |
|
220 |
+ expect_true(all(apply(ColRanked3[-nas3,], 2, setequal, 1:9))) |
|
271 | 221 |
expect_equal(ColRanked1[2,1], 1) |
272 |
- expect_equal(ColRanked1[1,1], 10) |
|
222 |
+ expect_equal(ColRanked1[1,1], 9) |
|
273 | 223 |
expect_equal(ColRanked3[8,3], 1) |
274 |
- expect_equal(ColRanked3[10,3], 10) |
|
275 |
- expect_equal(ColRanked3[4,5], 10) |
|
276 |
- expect_equal(ColRanked1[lastid, randj], 10) |
|
224 |
+ expect_equal(ColRanked3[10,3], 9) |
|
225 |
+ expect_equal(ColRanked3[4,5], 9) |
|
226 |
+ expect_equal(ColRanked1[lastid, randj], 9) |
|
277 | 227 |
}) |
278 | 228 |
|
279 | 229 |
|
280 |
-context("CondSEA") |
|
230 |
+ColRanked3 <- rankPEPsByCols.NES(peps3) |
|
231 |
+manual <- t(scale(t(peps3$ES))) |
|
232 |
+stopifnot(all(apply(manual[-nas3,],1,sd)-1<10^-15)) |
|
233 |
+manualR <- apply(-manual, 2, rank, na.last="keep") |
|
281 | 234 |
|
235 |
+test_that("Column ranking with NES", { |
|
236 |
+ expect_true(all(ColRanked3[-nas3,]==manualR[-nas3,])) |
|
237 |
+ expect_true(all(is.na(manualR[nas3,]))) |
|
238 |
+}) |
|
239 |
+ |
|
240 |
+ |
|
241 |
+context("CondSEA") |
|
282 | 242 |
pgset <- c("(+)_chelidonine", "(+/_)_catechin") |
283 | 243 |
res <- suppressMessages(CondSEA(rp, pgset)) |
284 | 244 |
randi <- sample(1:length(testpws), 1) |
285 | 245 |
pwsid <- testpws_old[[randi]]$id |
286 | 246 |
randDB <- dbs[randi] |
287 |
-ranked <- rankPEPsByRows(rp$get(randDB)) |
|
247 |
+ranked <- rankPEPsByRows.ES(rp$get(randDB)) |
|
288 | 248 |
inset <- ranked[pwsid, pgset] |
289 | 249 |
outset <- ranked[pwsid, setdiff(colnames(ranked), pgset)] |
290 |
-ks <- ks.test.2(inset, outset) |
|
291 |
- |
|
250 |
+if(length(inset[!is.na(inset)])>0 && |
|
251 |
+ length(outset[!is.na(outset)])>0) { |
|
252 |
+ ks <- ks.test.2(inset, outset, maxCombSize=10^10) |
|
253 |
+} else { |
|
254 |
+ ks <- list(ES=as.numeric(NA), p.value=as.numeric(NA)) |
|
255 |
+} |
|
292 | 256 |
test_that("CondSEA", { |
293 | 257 |
expect_equal(getDetails(res, "c3_TFT"), res$details[["c3_TFT"]]) |
294 | 258 |
expect_equal(getResults(res, "c3_TFT"), res$CondSEA[["c3_TFT"]]) |
... | ... |
@@ -303,33 +267,42 @@ context("PathSEA") |
303 | 267 |
|
304 | 268 |
db1 <- expected_dbs[1] |
305 | 269 |
db3 <- expected_dbs[3] |
306 |
- |
|
307 | 270 |
pws1 <- sapply(testpws[makeCollectionIDs(testpws)==db1][c(2,5,6,9)], setName) |
308 | 271 |
pws3 <- sapply(testpws[makeCollectionIDs(testpws)==db3][c(1,3,10)], setName) |
309 | 272 |
res <- suppressMessages(PathSEA(rp, testpws[c(pws1, pws3)])) |
310 | 273 |
setids1 <- sapply(testpws[pws1], setIdentifier) |
311 | 274 |
setids3 <- sapply(testpws[pws3], setIdentifier) |
312 |
- |
|
313 | 275 |
randj1 <- sample(1:ncol(testgep), 1) |
314 |
-ranked <- rankPEPsByCols(rp$get(db1)) |
|
276 |
+ranked <- rankPEPsByCols.SPV(rp$get(db1)) |
|
315 | 277 |
peps <- rp$get(db1) |
316 | 278 |
inset <- ranked[setids1, randj1] |
317 | 279 |
outset <- ranked[setdiff(rownames(ranked), setids1), randj1] |
318 |
-ks1 <- ks.test.2(inset, outset) |
|
319 |
- |
|
280 |
+inset <- inset[!is.na(inset)] |
|
281 |
+outset <- outset[!is.na(outset)] |
|
282 |
+if(length(inset)>0 && |
|
283 |
+ length(outset)>0) { |
|
284 |
+ ks1 <- ks.test.2(inset, outset, maxCombSize=10^10) |
|
285 |
+} else { |
|
286 |
+ ks1 <- list(ES=as.numeric(NA), p.value=as.numeric(NA)) |
|
287 |
+} |
|
320 | 288 |
randj3 <- sample(1:ncol(testgep), 1) |
321 |
-ranked <- rankPEPsByCols(rp$get(db3)) |
|
289 |
+ranked <- rankPEPsByCols.SPV(rp$get(db3)) |
|
322 | 290 |
peps <- rp$get(db3) |
323 | 291 |
inset <- ranked[setids3, randj3] |
324 | 292 |
outset <- ranked[setdiff(rownames(ranked), setids3), randj3] |
325 |
-ks3 <- ks.test.2(inset, outset) |
|
326 |
- |
|
293 |
+inset <- inset[!is.na(inset)] |
|
294 |
+outset <- outset[!is.na(outset)] |
|
295 |
+if(length(inset)>0 && |
|
296 |
+ length(outset)>0) { |
|
297 |
+ ks3 <- ks.test.2(inset, outset, maxCombSize=10^10) |
|
298 |
+} else { |
|
299 |
+ ks3 <- list(ES=as.numeric(NA), p.value=as.numeric(NA)) |
|
300 |
+} |
|
327 | 301 |
name1 <- colnames(testgep)[randj1] |
328 | 302 |
name3 <- colnames(testgep)[randj3] |
329 |
- |
|
330 | 303 |
test_that("PathSEA", { |
331 |
- expect_equal(getDetails(res, "c3_TFT"), res$details[["c3_TFT"]]) |
|
332 |
- expect_equal(getResults(res, "c3_TFT"), res$PathSEA[["c3_TFT"]]) |
|
304 |
+ expect_equal(getDetails(res, "c3_TFT"), res$details[["c3_TFT"]]) |
|
305 |
+ expect_equal(getResults(res, "c3_TFT"), res$PathSEA[["c3_TFT"]]) |
|
333 | 306 |
expect_equal(unname(res[["PathSEA"]][[db1]][name1, "ES"]), |
334 | 307 |
ks1$ES) |
335 | 308 |
expect_equal(unname(res[["PathSEA"]][[db1]][name1, "PV"]), |
... | ... |
@@ -341,7 +314,6 @@ test_that("PathSEA", { |
341 | 314 |
}) |
342 | 315 |
|
343 | 316 |
|
344 |
- |
|
345 | 317 |
## A gene that is found in at least 3 pathways: |
346 | 318 |
gene <- intersect(intersect(geneIds(testpws[[3]]), geneIds(testpws[[4]])), |
347 | 319 |
geneIds(testpws[[7]]))[1] |
348 | 320 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,86 @@ |
1 |
+ |
|
2 |
+ |
|
3 |
+## ## Workflow: |
|
4 |
+## library(GSEABase) |
|
5 |
+## library(devtools) |
|
6 |
+## library(testthat) |
|
7 |
+## load_all() |
|
8 |
+ |
|
9 |
+loadPEPs <- gep2pep::.loadPEPs |
|
10 |
+ |
|
11 |
+dbfolder <- file.path(tempdir(), "gep2pepDB") |
|
12 |
+ |
|
13 |
+clear_test_repo <- function(suffix=NULL) { |
|
14 |
+ folder <- paste0(dbfolder, suffix) |
|
15 |
+ if(file.exists(folder)) |
|
16 |
+ unlink(folder, T) |
|
17 |
+} |
|
18 |
+ |
|
19 |
+create_test_repo <- function(suffix=NULL) { |
|
20 |
+ folder <- paste0(dbfolder, suffix) |
|
21 |
+ clear_test_repo(suffix) |
|
22 |
+ return( |
|
23 |
+ suppressMessages( |
|
24 |
+ createRepository(folder, testpws) |
|
25 |
+ ) |
|
26 |
+ ) |
|
27 |
+} |
|
28 |
+ |
|
29 |
+testgep <- loadSampleGEP() |
|
30 |
+testpws <- as.CategorizedCollection( |
|
31 |
+ loadSamplePWS() |
|
32 |
+) |
|
33 |
+testpws_old <- gep2pep:::convertFromGSetClass(testpws) |
|
34 |
+ |
|
35 |
+rp <- create_test_repo() |
|
36 |
+dbs <- makeCollectionIDs(testpws) |
|
37 |
+expected_dbs <- c("c3_TFT", "c3_MIR", "c4_CGN") |
|
38 |
+ |
|
39 |
+ |
|
40 |
+suppressMessages(buildPEPs(rp, testgep, progress_bar=FALSE)) |
|
41 |
+ |
|
42 |
+context("creation of RAW peps") |
|
43 |
+ |
|
44 |
+suppressMessages( |
|
45 |
+ buildPEPs(rp, testgep[,1:2], progress_bar=FALSE, |
|
46 |
+ rawmode_id=1) |
|
47 |
+) |
|
48 |
+suppressMessages( |
|
49 |
+ buildPEPs(rp, testgep[,3:5], progress_bar=FALSE, |
|
50 |
+ rawmode_id=2) |
|
51 |
+) |
|
52 |
+ |
|
53 |
+outfiles1 <- paste0(getCollections(rp), "#1.RDS") |
|
54 |
+outfiles2 <- paste0(getCollections(rp), "#2.RDS") |
|
55 |
+outfiles <- c(outfiles1, outfiles2) |
|
56 |
+outdir <- file.path(rp$root(), "raw") |
|
57 |
+f1 <- readRDS(paste0(file.path(outdir, outfiles[1]))) |
|
58 |
+ |
|
59 |
+test_that("build hdf5 PEPs", { |
|
60 |
+ expect_true(all(sapply(outfiles, `%in%`, list.files(outdir)))) |
|
61 |
+ expect_equal(f1$ES[,1], rp$get("c3_TFT")$ES[,1]) |
|
62 |
+ expect_equal(f1$PV[,1], rp$get("c3_TFT")$PV[,1]) |
|
63 |
+ expect_equal(f1$ES[,2], rp$get("c3_TFT")$ES[,2]) |
|
64 |
+ expect_equal(f1$PV[,2], rp$get("c3_TFT")$PV[,2]) |
|
65 |
+}) |
|
66 |
+ |
|
67 |
+ |
|
68 |
+colls <- getCollections(rp) |
|
69 |
+ |
|
70 |
+oldpep2 <- rp$get(colls[2]) |
|
71 |
+rp$rm(tags="pep", force=T) |
|
72 |
+importFromRawMode(rp) |
|
73 |
+ |
|
74 |
+pep2 <- loadPEPs(rp, colls[2]) |
|
75 |
+w <- is.na(pep2$ES[,1]) |
|
76 |
+ |
|
77 |
+test_that("check hdf5 PEPss", { |
|
78 |
+ expect_true(all(oldpep2$ES[!w,]==pep2$ES[!w,])) |
|
79 |
+ expect_true(all(oldpep2$PV[!w,]==pep2$PV[!w,])) |
|
80 |
+ expect_true(all(is.na(oldpep2$PV[w,]))) |
|
81 |
+ expect_true(all(rownames(oldpep2$ES)==rownames(pep2$ES))) |
|
82 |
+ expect_true(all(rownames(oldpep2$PV)==rownames(pep2$PV))) |
|
83 |
+ expect_true(all(colnames(oldpep2$ES)==colnames(pep2$ES))) |
|
84 |
+ expect_true(all(colnames(oldpep2$PV)==colnames(pep2$PV))) |
|
85 |
+}) |
|
86 |
+ |