Browse code

selectBestModel

zhewa authored on 12/05/2020 11:14:38
Showing 3 changed files

... ...
@@ -102,6 +102,7 @@ exportMethods(perplexity)
102 102
 exportMethods(resList)
103 103
 exportMethods(runParams)
104 104
 exportMethods(sampleLabel)
105
+exportMethods(selectBestModel)
105 106
 exportMethods(subsetCeldaList)
106 107
 import(Rcpp)
107 108
 import(RcppEigen)
... ...
@@ -541,50 +541,97 @@ setMethod("subsetCeldaList",
541 541
 
542 542
 #' @title Select best chain within each combination of parameters
543 543
 #' @description Select the chain with the best log likelihood for each
544
-#'  combination of tested parameters from a `celdaList` object gererated by
545
-#'  `celdaGridSearch()`.
546
-#' @param celdaList Object of class `celdaList`. An object containing celda
547
-#'  models returned from `celdaGridSearch()`.
548
-#' @param asList `TRUE` or `FALSE`. Whether to return the best model as a
549
-#'  `celdaList` object or not. If `FALSE`, return the best model as a
550
-#'  corresponding `celda_C`, `celda_G` or `celda_CG` object.
551
-#' @return A new `celdaList` object containing one model with the best log
552
-#'  likelihood for each set of parameters. If only one set of parameters is in
553
-#'  the `celdaList`, the best model will be returned directly instead of a
554
-#'  `celdaList` object.
555
-#' @seealso `celdaGridSearch()` can run Celda with multiple parameters and
556
-#'  chains in parallel. `subsetCeldaList()` can subset the `celdaList` object.
544
+#'  combination of tested parameters from a \code{SCE} object gererated by
545
+#'  \link{celdaGridSearch} or from a \code{celdaList} object.
546
+#' @param x Object of class \linkS4class{SingleCellExperiment} or
547
+#'  \code{celdaList}. An object containing celda
548
+#'  models returned from \link{celdaGridSearch}.
549
+#' @param asList \code{TRUE} or \code{FALSE}. Whether to return the
550
+#'  best model as a
551
+#'  \code{celdaList} object or not. If \code{FALSE}, return the best model as a
552
+#'  corresponding celda model object.
553
+#' @param useAssay A string specifying which \code{assay}
554
+#'  slot to use if \code{x} is a
555
+#'  \linkS4class{SingleCellExperiment} object. Default "counts".
556
+#' @return One of
557
+#' \itemize{
558
+#'  \item A new \linkS4class{SingleCellExperiment} object containing
559
+#'  one model with the best log-likelihood for each set of parameters in
560
+#'  \code{metadata(x)}. If there is only one set of parameters,
561
+#'  a new \linkS4class{SingleCellExperiment} object
562
+#'  with the matching model stored in the
563
+#'  \link[S4Vectors]{metadata}
564
+#'  \code{"celda_parameters"} slot will be returned. Otherwise, a new
565
+#'  \linkS4class{SingleCellExperiment} object with the subset models stored
566
+#'  in the \link[S4Vectors]{metadata}
567
+#'  \code{"celda_grid_search"} slot will be returned.
568
+#'  \item A new \code{celdaList} object containing one model with the best
569
+#'  log-likelihood for each set of parameters. If only one set of parameters
570
+#'  is in the \code{celdaList}, the best model will be returned directly
571
+#'  instead of a \code{celdaList} object.}
572
+#' @seealso \link{celdaGridSearch} \link{subsetCeldaList}
573
+#' @export
574
+setGeneric("selectBestModel", function(x, ...) {
575
+    standardGeneric("selectBestModel")})
576
+
577
+
578
+#' @rdname selectBestModel
579
+#' @examples
580
+#' data(sceCeldaCGGridSearch)
581
+#' ## Returns same result as running celdaGridSearch with "bestOnly = TRUE"
582
+#' sce <- selectBestModel(sceCeldaCGGridSearch)
583
+#' @importFrom data.table as.data.table
584
+#' @export
585
+setMethod("selectBestModel", signature(x = "SingleCellExperiment"),
586
+    function(x, asList = FALSE, useAssay = "counts") {
587
+        logLikelihood <- NULL
588
+        group <- setdiff(colnames(runParams(x)),
589
+            c("index", "chain", "logLikelihood", "mean_perplexity", "seed"))
590
+        dt <- data.table::as.data.table(runParams(celdaList))
591
+        newRunParams <- as.data.frame(dt[, .SD[which.max(logLikelihood)],
592
+            by = group])
593
+        newRunParams <- newRunParams[, colnames(runParams(celdaList))]
594
+
595
+        ix <- match(newRunParams$index, runParams(celdaList)$index)
596
+        if (nrow(newRunParams) == 1 & !asList) {
597
+            x <- celdatosce(resList(x)[[ix]],
598
+                SummarizedExperiment::assay(x, i = useAssay))
599
+        } else {
600
+            x@metadata$celda_grid_search@runParams <-
601
+                as.data.frame(newRunParams)
602
+            x@metadata$celda_grid_search@resList <- resList(x)[ix]
603
+        }
604
+        return(x)
605
+    }
606
+)
607
+
608
+
557 609
 #' @examples
558 610
 #' data(celdaCGGridSearchRes)
559 611
 #' ## Returns same result as running celdaGridSearch with "bestOnly = TRUE"
560 612
 #' cgsBest <- selectBestModel(celdaCGGridSearchRes)
561 613
 #' @importFrom data.table as.data.table
562 614
 #' @export
563
-selectBestModel <- function(celdaList, asList = FALSE) {
564
-  if (!methods::is(celdaList, "celdaList")) {
565
-    stop("celdaList parameter was not of class celdaList.")
566
-  }
615
+setMethod("selectBestModel", signature(x = "celdaList"),
616
+    function(x, asList = FALSE) {
617
+        logLikelihood <- NULL
618
+        group <- setdiff(colnames(runParams(x)),
619
+            c("index", "chain", "logLikelihood", "mean_perplexity", "seed"))
620
+        dt <- data.table::as.data.table(runParams(x))
621
+        newRunParams <- as.data.frame(dt[, .SD[which.max(logLikelihood)],
622
+            by = group])
623
+        newRunParams <- newRunParams[, colnames(runParams(x))]
567 624
 
568
-  logLikelihood <- NULL
569
-  group <- setdiff(
570
-    colnames(runParams(celdaList)),
571
-    c("index", "chain", "logLikelihood", "mean_perplexity", "seed")
572
-  )
573
-  dt <- data.table::as.data.table(runParams(celdaList))
574
-  newRunParams <- as.data.frame(dt[, .SD[which.max(logLikelihood)],
575
-    by = group
576
-  ])
577
-  newRunParams <- newRunParams[, colnames(runParams(celdaList))]
578
-
579
-  ix <- match(newRunParams$index, runParams(celdaList)$index)
580
-  if (nrow(newRunParams) == 1 & !asList) {
581
-    return(resList(celdaList)[[ix]])
582
-  } else {
583
-    celdaList@runParams <- as.data.frame(newRunParams)
584
-    celdaList@resList <- resList(celdaList)[ix]
585
-    return(celdaList)
586
-  }
587
-}
625
+        ix <- match(newRunParams$index, runParams(x)$index)
626
+        if (nrow(newRunParams) == 1 & !asList) {
627
+            return(resList(x)[[ix]])
628
+        } else {
629
+            x@runParams <- as.data.frame(newRunParams)
630
+            x@resList <- resList(x)[ix]
631
+            return(x)
632
+        }
633
+    }
634
+)
588 635
 
589 636
 
590 637
 .createSCEceldaGridSearch <- function(celdaList,
... ...
@@ -2,35 +2,55 @@
2 2
 % Please edit documentation in R/celdaGridSearch.R
3 3
 \name{selectBestModel}
4 4
 \alias{selectBestModel}
5
+\alias{selectBestModel,SingleCellExperiment-method}
5 6
 \title{Select best chain within each combination of parameters}
6 7
 \usage{
7
-selectBestModel(celdaList, asList = FALSE)
8
+selectBestModel(x, ...)
9
+
10
+\S4method{selectBestModel}{SingleCellExperiment}(x, asList = FALSE, useAssay = "counts")
8 11
 }
9 12
 \arguments{
10
-\item{celdaList}{Object of class `celdaList`. An object containing celda
11
-models returned from `celdaGridSearch()`.}
13
+\item{x}{Object of class \linkS4class{SingleCellExperiment} or
14
+\code{celdaList}. An object containing celda
15
+models returned from \link{celdaGridSearch}.}
16
+
17
+\item{asList}{\code{TRUE} or \code{FALSE}. Whether to return the
18
+best model as a
19
+\code{celdaList} object or not. If \code{FALSE}, return the best model as a
20
+corresponding celda model object.}
12 21
 
13
-\item{asList}{`TRUE` or `FALSE`. Whether to return the best model as a
14
-`celdaList` object or not. If `FALSE`, return the best model as a
15
-corresponding `celda_C`, `celda_G` or `celda_CG` object.}
22
+\item{useAssay}{A string specifying which \code{assay}
23
+slot to use if \code{x} is a
24
+\linkS4class{SingleCellExperiment} object. Default "counts".}
16 25
 }
17 26
 \value{
18
-A new `celdaList` object containing one model with the best log
19
- likelihood for each set of parameters. If only one set of parameters is in
20
- the `celdaList`, the best model will be returned directly instead of a
21
- `celdaList` object.
27
+One of
28
+\itemize{
29
+ \item A new \linkS4class{SingleCellExperiment} object containing
30
+ one model with the best log-likelihood for each set of parameters in
31
+ \code{metadata(x)}. If there is only one set of parameters,
32
+ a new \linkS4class{SingleCellExperiment} object
33
+ with the matching model stored in the
34
+ \link[S4Vectors]{metadata}
35
+ \code{"celda_parameters"} slot will be returned. Otherwise, a new
36
+ \linkS4class{SingleCellExperiment} object with the subset models stored
37
+ in the \link[S4Vectors]{metadata}
38
+ \code{"celda_grid_search"} slot will be returned.
39
+ \item A new \code{celdaList} object containing one model with the best
40
+ log-likelihood for each set of parameters. If only one set of parameters
41
+ is in the \code{celdaList}, the best model will be returned directly
42
+ instead of a \code{celdaList} object.}
22 43
 }
23 44
 \description{
24 45
 Select the chain with the best log likelihood for each
25
- combination of tested parameters from a `celdaList` object gererated by
26
- `celdaGridSearch()`.
46
+ combination of tested parameters from a \code{SCE} object gererated by
47
+ \link{celdaGridSearch} or from a \code{celdaList} object.
27 48
 }
28 49
 \examples{
29
-data(celdaCGGridSearchRes)
50
+data(sceCeldaCGGridSearch)
30 51
 ## Returns same result as running celdaGridSearch with "bestOnly = TRUE"
31
-cgsBest <- selectBestModel(celdaCGGridSearchRes)
52
+sce <- selectBestModel(sceCeldaCGGridSearch)
32 53
 }
33 54
 \seealso{
34
-`celdaGridSearch()` can run Celda with multiple parameters and
35
- chains in parallel. `subsetCeldaList()` can subset the `celdaList` object.
55
+\link{celdaGridSearch} \link{subsetCeldaList}
36 56
 }