Browse code

add logging to testing functions

noriakis authored on 13/05/2022 09:39:33
Showing 4 changed files

... ...
@@ -2,8 +2,10 @@
2 2
 
3 3
 export(bngeneplot)
4 4
 export(bngeneplotCustom)
5
+export(bngenetest)
5 6
 export(bnpathplot)
6 7
 export(bnpathplotCustom)
8
+export(bnpathtest)
7 9
 export(compareBNs)
8 10
 export(obtainPath)
9 11
 export(queryCpDistLs)
... ...
@@ -384,3 +384,217 @@ compareBNs <- function(listOfNets){
384 384
   }
385 385
   return(fms)
386 386
 }
387
+
388
+#' bnpathtest
389
+#'
390
+#' Testing various R for bayesian network between pathways
391
+#'
392
+#' @param results the enrichment analysis result
393
+#' @param exp gene expression matrix
394
+#' @param expRow the type of the identifier of rows of expression matrix
395
+#' @param expSample candidate rows to be included in the inference
396
+#'                  default to all
397
+#' @param algo structure learning method used in boot.strength()
398
+#'             default to "hc"
399
+#' @param algorithm.args parameters to pass to
400
+#'                       bnlearn structure learnng function
401
+#' @param cl cluster object from parallel::makeCluster()
402
+#' @param qvalueCutOff the cutoff value for qvalue
403
+#' @param adjpCutOff the cutoff value for adjusted pvalues
404
+#' @param nCategory the number of pathways to be included
405
+#' @param Rrange the sequence of R values to be tested
406
+#' @param scoreType return the specified scores
407
+#' @param orgDb perform clusterProfiler::setReadable
408
+#'              based on this organism database
409
+#' @return list of graphs and scores
410
+#' @examples
411
+#' data("exampleEaRes");data("exampleGeneExp")
412
+#' res <- bnpathtest(results = exampleEaRes, exp = exampleGeneExp,
413
+#'        algo="hc", Rrange=seq(10, 30, 10), expRow = "ENSEMBL",
414
+#'        scoreType="bge")
415
+#' @export
416
+#'
417
+bnpathtest <- function (results, exp, expSample=NULL, algo="hc",
418
+                        algorithm.args=NULL, expRow="ENSEMBL", cl=NULL,
419
+                        orgDb=org.Hs.eg.db,
420
+                        qvalueCutOff=0.05, adjpCutOff=0.05, nCategory=15,
421
+                        Rrange=seq(2,40,2), scoreType="aic-g")
422
+{
423
+    if (!is.null(orgDb)){
424
+        results <- clusterProfiler::setReadable(results, OrgDb=orgDb)
425
+    }
426
+    if (is.null(expSample)) {expSample=colnames(exp)}
427
+    # if (results@keytype == "kegg"){
428
+    #     resultsGeneType <- "ENTREZID"
429
+    # } else {
430
+    #     resultsGeneType <- results@keytype
431
+    # }
432
+
433
+    res <- results@result
434
+    if (!is.null(qvalueCutOff)) { res <- subset(res, qvalue < qvalueCutOff) }
435
+    if (!is.null(adjpCutOff)) { res <- subset(res, p.adjust < adjpCutOff) }
436
+    if (nCategory) {
437
+        res <- res[seq_len(nCategory),]
438
+        res <- res[!is.na(res$ID),]
439
+    }
440
+
441
+
442
+    pcs <- c()
443
+    pwayNames <- c()
444
+    for (i in seq_len(length(rownames(res)))) {
445
+        genesInPathway <- strsplit(res[i, ]$geneID, "/")[[1]]
446
+        if (!is.null(orgDb)){
447
+            genesInPathway <- clusterProfiler::bitr(genesInPathway,
448
+                                                    fromType="SYMBOL",
449
+                                                    toType=expRow,
450
+                                                    OrgDb=orgDb)[expRow][,1]
451
+        }
452
+        pathwayMatrix <- exp[ intersect(rownames(exp), genesInPathway),
453
+                            expSample ]
454
+        if (dim(pathwayMatrix)[1]==0) {
455
+            message("no gene in the pathway present in expression data")
456
+        } else {
457
+            pathwayMatrixPca <- prcomp(t(pathwayMatrix), scale. = FALSE)$x[,1]
458
+            avExp <- apply(pathwayMatrix, 2, mean)
459
+            corFlag <- cor(pathwayMatrixPca, avExp)
460
+            if (corFlag < 0){pathwayMatrixPca <- pathwayMatrixPca*-1}
461
+            # pathwayMatrixSum <- apply(pathwayMatrix, 2, sum)
462
+            pwayNames <- c(pwayNames, res[i,]$Description)
463
+            pcs <- cbind(pcs, pathwayMatrixPca)
464
+        }
465
+    }
466
+    colnames(pcs) <- pwayNames
467
+    pcs <- data.frame(pcs, check.names=FALSE)
468
+
469
+    strList <- list()
470
+    graphList <- list()
471
+    scoreList <- list()
472
+
473
+    # strengthBf <- bf.strength(hc(pcs), pcs)
474
+    # averageBf <- averaged.network(strengthBf)
475
+    # averageBf <- cextend(averageBf, strict=FALSE)
476
+
477
+    for (r in Rrange){
478
+        cat(paste("performing R:", r, "\n"))
479
+        strength <- boot.strength(pcs, algorithm=algo, R=r, cluster=cl, algorithm.args=NULL)
480
+        strList[[paste0("R",r)]] <- strength
481
+        av <- averaged.network(strength)
482
+        # Infer the edge direction by default
483
+        # av <- chooseEdgeDir(av, pcs, scoreType)
484
+        av <- cextend(av, strict=FALSE)
485
+
486
+        if (is.dag(bnlearn::as.igraph(av))){
487
+            graphList[[paste0("R",r)]] <- av
488
+            scoreList[[paste0("R",r)]] <- score(av, pcs, scoreType)
489
+        } else {
490
+            graphList[[paste0("R",r)]] <- av
491
+            scoreList[[paste0("R",r)]] <- NA
492
+        }
493
+    }
494
+
495
+    return(list("df"=pcs, "str"=strList, "graph"=graphList, "score"=scoreList))
496
+}
497
+
498
+
499
+#' bngenetest
500
+#'
501
+#' Testing various R for bayesian network between genes
502
+#'
503
+#' @param results the enrichment analysis result
504
+#' @param exp gene expression matrix
505
+#' @param expRow the type of the identifier of rows of expression matrix
506
+#' @param expSample candidate rows to be included in the inference
507
+#'                  default to all
508
+#' @param algo structure learning method used in boot.strength()
509
+#'             default to "hc"
510
+#' @param algorithm.args parameters to pass to
511
+#'                       bnlearn structure learnng function
512
+#' @param cl cluster object from parallel::makeCluster()
513
+#' @param Rrange the sequence of R values to be tested
514
+#' @param scoreType return the specified scores
515
+#' @param convertSymbol whether the label of resulting network is
516
+#'                      converted to symbol, default to TRUE
517
+#' @param pathNum the pathway number (the number of row of the original result,
518
+#'                                    ordered by p-value)
519
+#' @param orgDb perform clusterProfiler::setReadable
520
+#'              based on this organism database
521
+#'
522
+#' @return list of graphs and scores
523
+#' @examples
524
+#' data("exampleEaRes");data("exampleGeneExp")
525
+#' res <- bngenetest(results = exampleEaRes, exp = exampleGeneExp,
526
+#' algo="hc", Rrange=seq(10, 30, 10), pathNum=1, scoreType="bge")
527
+#' @export
528
+#'
529
+bngenetest <- function (results, exp, expSample=NULL, algo="hc",
530
+                        Rrange=seq(2,40,2), cl=NULL, algorithm.args=NULL,
531
+                        pathNum=NULL, convertSymbol=TRUE, expRow="ENSEMBL",
532
+                        scoreType="aic-g", orgDb=org.Hs.eg.db)
533
+{
534
+    # if (results@keytype == "kegg"){
535
+    #     resultsGeneType <- "ENTREZID"
536
+    # } else {
537
+    #     resultsGeneType <- results@keytype
538
+    # }
539
+    if (!is.null(orgDb)){
540
+        results <- setReadable(results, OrgDb=orgDb)
541
+    }
542
+    if (is.null(expSample)) {expSample=colnames(exp)}
543
+    res <- results@result
544
+
545
+    genesInPathway <- unlist(strsplit(res[pathNum, ]$geneID, "/"))
546
+
547
+    if (!is.null(orgDb)){
548
+        genesInPathway <- clusterProfiler::bitr(genesInPathway,
549
+                                                fromType="SYMBOL",
550
+                                                toType=expRow,
551
+                                                OrgDb=orgDb)[expRow][,1]
552
+    }
553
+
554
+    pcs <- exp[ intersect(rownames(exp), genesInPathway), expSample ]
555
+
556
+    if (convertSymbol) {
557
+          matchTable <- clusterProfiler::bitr(rownames(pcs), fromType=expRow,
558
+                                toType="SYMBOL", OrgDb=orgDb)
559
+          if (sum(duplicated(matchTable[,1])) >= 1) {
560
+            message("Removing expRow that matches the multiple symbols")
561
+            matchTable <- matchTable[
562
+            !matchTable[,1] %in% matchTable[,1][duplicated(matchTable[,1])],]
563
+          }
564
+          rnSym <- matchTable["SYMBOL"][,1]
565
+          rnExp <- matchTable[expRow][,1]
566
+          pcs <- pcs[rnExp, ]
567
+          rownames(pcs) <- rnSym
568
+    }
569
+
570
+    pcs <- data.frame(t(pcs))
571
+
572
+    strList <- list()
573
+    graphList <- list()
574
+    scoreList <- list()
575
+
576
+    # strengthBf <- bf.strength(hc(pcs), pcs)
577
+    # averageBf <- averaged.network(strengthBf)
578
+    # averageBf <- cextend(averageBf, strict=FALSE)
579
+
580
+    for (r in Rrange){
581
+        cat(paste("performing R:", r, "\n"))
582
+        strength <- boot.strength(pcs, algorithm=algo,
583
+            algorithm.args=algorithm.args, R=r, cluster=cl)
584
+        strList[[paste0("R",r)]] <- strength
585
+        av <- averaged.network(strength)
586
+        # Infer the edge direction by default
587
+        # av <- chooseEdgeDir(av, pcs, scoreType)
588
+        av <- cextend(av, strict=FALSE)
589
+
590
+        if (is.dag(bnlearn::as.igraph(av))){
591
+            graphList[[paste0("R",r)]] <- av
592
+            scoreList[[paste0("R",r)]] <- score(av, pcs, scoreType)
593
+        } else {
594
+            graphList[[paste0("R",r)]] <- av
595
+            scoreList[[paste0("R",r)]] <- NA
596
+        }
597
+    }
598
+
599
+    return(list("df"=pcs, "str"=strList, "graph"=graphList, "score"=scoreList))
600
+}
387 601
new file mode 100644
... ...
@@ -0,0 +1,63 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/utilities.R
3
+\name{bngenetest}
4
+\alias{bngenetest}
5
+\title{bngenetest}
6
+\usage{
7
+bngenetest(
8
+  results,
9
+  exp,
10
+  expSample = NULL,
11
+  algo = "hc",
12
+  Rrange = seq(2, 40, 2),
13
+  cl = NULL,
14
+  algorithm.args = NULL,
15
+  pathNum = NULL,
16
+  convertSymbol = TRUE,
17
+  expRow = "ENSEMBL",
18
+  scoreType = "aic-g",
19
+  orgDb = org.Hs.eg.db
20
+)
21
+}
22
+\arguments{
23
+\item{results}{the enrichment analysis result}
24
+
25
+\item{exp}{gene expression matrix}
26
+
27
+\item{expSample}{candidate rows to be included in the inference
28
+default to all}
29
+
30
+\item{algo}{structure learning method used in boot.strength()
31
+default to "hc"}
32
+
33
+\item{Rrange}{the sequence of R values to be tested}
34
+
35
+\item{cl}{cluster object from parallel::makeCluster()}
36
+
37
+\item{algorithm.args}{parameters to pass to
38
+bnlearn structure learnng function}
39
+
40
+\item{pathNum}{the pathway number (the number of row of the original result,
41
+ordered by p-value)}
42
+
43
+\item{convertSymbol}{whether the label of resulting network is
44
+converted to symbol, default to TRUE}
45
+
46
+\item{expRow}{the type of the identifier of rows of expression matrix}
47
+
48
+\item{scoreType}{return the specified scores}
49
+
50
+\item{orgDb}{perform clusterProfiler::setReadable
51
+based on this organism database}
52
+}
53
+\value{
54
+list of graphs and scores
55
+}
56
+\description{
57
+Testing various R for bayesian network between genes
58
+}
59
+\examples{
60
+data("exampleEaRes");data("exampleGeneExp")
61
+res <- bngenetest(results = exampleEaRes, exp = exampleGeneExp,
62
+algo="hc", Rrange=seq(10, 30, 10), pathNum=1, scoreType="bge")
63
+}
0 64
new file mode 100644
... ...
@@ -0,0 +1,65 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/utilities.R
3
+\name{bnpathtest}
4
+\alias{bnpathtest}
5
+\title{bnpathtest}
6
+\usage{
7
+bnpathtest(
8
+  results,
9
+  exp,
10
+  expSample = NULL,
11
+  algo = "hc",
12
+  algorithm.args = NULL,
13
+  expRow = "ENSEMBL",
14
+  cl = NULL,
15
+  orgDb = org.Hs.eg.db,
16
+  qvalueCutOff = 0.05,
17
+  adjpCutOff = 0.05,
18
+  nCategory = 15,
19
+  Rrange = seq(2, 40, 2),
20
+  scoreType = "aic-g"
21
+)
22
+}
23
+\arguments{
24
+\item{results}{the enrichment analysis result}
25
+
26
+\item{exp}{gene expression matrix}
27
+
28
+\item{expSample}{candidate rows to be included in the inference
29
+default to all}
30
+
31
+\item{algo}{structure learning method used in boot.strength()
32
+default to "hc"}
33
+
34
+\item{algorithm.args}{parameters to pass to
35
+bnlearn structure learnng function}
36
+
37
+\item{expRow}{the type of the identifier of rows of expression matrix}
38
+
39
+\item{cl}{cluster object from parallel::makeCluster()}
40
+
41
+\item{orgDb}{perform clusterProfiler::setReadable
42
+based on this organism database}
43
+
44
+\item{qvalueCutOff}{the cutoff value for qvalue}
45
+
46
+\item{adjpCutOff}{the cutoff value for adjusted pvalues}
47
+
48
+\item{nCategory}{the number of pathways to be included}
49
+
50
+\item{Rrange}{the sequence of R values to be tested}
51
+
52
+\item{scoreType}{return the specified scores}
53
+}
54
+\value{
55
+list of graphs and scores
56
+}
57
+\description{
58
+Testing various R for bayesian network between pathways
59
+}
60
+\examples{
61
+data("exampleEaRes");data("exampleGeneExp")
62
+res <- bnpathtest(results = exampleEaRes, exp = exampleGeneExp,
63
+       algo="hc", Rrange=seq(10, 30, 10), expRow = "ENSEMBL",
64
+       scoreType="bge")
65
+}