Browse code

Add .selectSCEMatrix() and .manageCellVar; together with refining batch correction functions

Yichen Wang authored on 11/08/2022 16:49:05
Showing 31 changed files

... ...
@@ -75,35 +75,21 @@ getTSNE <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
75 75
                     nIterations = 1000, numThreads = 1, seed = NULL){
76 76
   params <- as.list(environment())
77 77
   params$inSCE <- NULL
78
-  # input class check
79
-  if (!inherits(inSCE, "SingleCellExperiment")){
80
-    stop("Please use a SingleCellExperiment object")
81
-  }
82
-  # matrix specification check
83
-  # When useReducedDim, never useAssay;
84
-  # when useAltExp, useAssay/reducedDim from there
85
-  if (is.null(useAssay) && is.null(useReducedDim)) {
86
-    stop("`useAssay` and `useReducedDim` cannot be NULL at the same time.")
87
-  }
78
+  # Note: useMat = list(useAssay = useAssay, ...)
79
+  # `.selectSCEMatrix()` does the check and corrects the conditions
80
+  # When using useReducedDim, useAssay will be ignored
81
+  useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay,
82
+                             useReducedDim = useReducedDim,
83
+                             useAltExp = useAltExp,
84
+                             returnMatrix = FALSE)$names
85
+  params$useAssay <- useMat$useAssay
86
+  useAssay <- useMat$useAssay
87
+
88 88
   if (!is.null(useAltExp)) {
89
-    if (!useAltExp %in% SingleCellExperiment::altExpNames(inSCE)) {
90
-      stop("Specified `useAltExp` not found.")
91
-    }
92 89
     sce <- SingleCellExperiment::altExp(inSCE, useAltExp)
93 90
   } else {
94 91
     sce <- inSCE
95 92
   }
96
-  if (!is.null(useReducedDim)) {
97
-    useAssay <- NULL
98
-    if (!useReducedDim %in% SingleCellExperiment::reducedDimNames(sce)) {
99
-      stop("Specified `useReducedDim` not found.")
100
-    }
101
-  } else {
102
-    # In this case, there is definitely `useAssay` specified
103
-    if (!useAssay %in% SummarizedExperiment::assayNames(sce)) {
104
-      stop("Specified `useAssay` not found.")
105
-    }
106
-  }
107 93
 
108 94
   if (!is.null(useAssay)) {
109 95
     if (isTRUE(logNorm)) {
... ...
@@ -119,9 +105,9 @@ getTSNE <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
119 105
       if (!is.null(nTop) && nTop < nrow(sce)) {
120 106
         suppressMessages({
121 107
           sce <- runFeatureSelection(sce, useAssay, method = "modelGeneVar")
108
+          sce <- setTopHVG(sce, method = "modelGeneVar", hvgNumber = nTop,
109
+                           featureSubsetName = "tsneHVG")
122 110
         })
123
-        sce <- setTopHVG(sce, method = "modelGeneVar", hvgNumber = nTop,
124
-                         featureSubsetName = "tsneHVG")
125 111
         sce <- subsetSCERows(sce, rowData = "tsneHVG", returnAsAltExp = FALSE)
126 112
       }
127 113
     }
... ...
@@ -1,39 +1,39 @@
1 1
 #' Run UMAP embedding with scater method
2
-#' @description Uniform Manifold Approximation and Projection (UMAP) algorithm 
3
-#' is commonly for 2D visualization of single-cell data. This function wraps the 
4
-#' scater \code{\link[scater]{calculateUMAP}} function. 
5
-#' 
2
+#' @description Uniform Manifold Approximation and Projection (UMAP) algorithm
3
+#' is commonly for 2D visualization of single-cell data. This function wraps the
4
+#' scater \code{\link[scater]{calculateUMAP}} function.
5
+#'
6 6
 #' With this funciton, users can create UMAP embedding directly from raw count
7
-#' matrix, with necessary preprocessing including normalization, scaling, 
8
-#' dimension reduction all automated. Yet we still recommend having the PCA as 
9
-#' input, so that the result can match with the clustering based on the same 
7
+#' matrix, with necessary preprocessing including normalization, scaling,
8
+#' dimension reduction all automated. Yet we still recommend having the PCA as
9
+#' input, so that the result can match with the clustering based on the same
10 10
 #' input PCA.
11 11
 #' @param inSCE Input \linkS4class{SingleCellExperiment} object.
12 12
 #' @param useAssay Assay to use for UMAP computation. If \code{useAltExp} is
13 13
 #' specified, \code{useAssay} has to exist in
14
-#' \code{assays(altExp(inSCE, useAltExp))}. Ignored when using 
14
+#' \code{assays(altExp(inSCE, useAltExp))}. Ignored when using
15 15
 #' \code{useReducedDim}. Default \code{"logcounts"}.
16 16
 #' @param useReducedDim The low dimension representation to use for UMAP
17
-#' computation. If \code{useAltExp} is specified, \code{useReducedDim} has to 
17
+#' computation. If \code{useAltExp} is specified, \code{useReducedDim} has to
18 18
 #' exist in \code{reducedDims(altExp(inSCE, useAltExp))}. Default \code{NULL}.
19 19
 #' @param useAltExp The subset to use for UMAP computation, usually for the
20 20
 #' selected variable features. Default \code{NULL}.
21 21
 #' @param sample Character vector. Indicates which sample each cell belongs to.
22
-#' If given a single character, will take the annotation from \code{colData}. 
22
+#' If given a single character, will take the annotation from \code{colData}.
23 23
 #' Default \code{NULL}.
24 24
 #' @param reducedDimName A name to store the results of the UMAP embedding
25 25
 #' coordinates obtained from this method. Default \code{"UMAP"}.
26 26
 #' @param logNorm Whether the counts will need to be log-normalized prior to
27 27
 #' generating the UMAP via \code{\link{scaterlogNormCounts}}. Ignored when using
28 28
 #' \code{useReducedDim}. Default \code{FALSE}.
29
-#' @param useFeatureSubset Subset of feature to use for dimension reduction. A 
29
+#' @param useFeatureSubset Subset of feature to use for dimension reduction. A
30 30
 #' character string indicating a \code{rowData} variable that stores the logical
31
-#' vector of HVG selection, or a vector that can subset the rows of 
31
+#' vector of HVG selection, or a vector that can subset the rows of
32 32
 #' \code{inSCE}. Default \code{NULL}.
33 33
 #' @param nTop Automatically detect this number of variable features to use for
34
-#' dimension reduction. Ignored when using \code{useReducedDim} or using 
34
+#' dimension reduction. Ignored when using \code{useReducedDim} or using
35 35
 #' \code{useFeatureSubset}. Default \code{2000}.
36
-#' @param scale Whether \code{useAssay} matrix will need to be standardized. 
36
+#' @param scale Whether \code{useAssay} matrix will need to be standardized.
37 37
 #' Default \code{TRUE}.
38 38
 #' @param pca Logical. Whether to perform dimension reduction with PCA before
39 39
 #' UMAP. Ignored when using \code{useReducedDim}. Default \code{TRUE}.
... ...
@@ -53,12 +53,12 @@
53 53
 #' result on a more even dispersal of points. Default \code{0.01}. See
54 54
 #' \code{\link[scater]{calculateUMAP}} for more information.
55 55
 #' @param spread The effective scale of embedded points. In combination with
56
-#' \code{minDist}, this determines how clustered/clumped the embedded points 
57
-#' are. Default \code{1}. See \code{\link[scater]{calculateUMAP}} for more 
56
+#' \code{minDist}, this determines how clustered/clumped the embedded points
57
+#' are. Default \code{1}. See \code{\link[scater]{calculateUMAP}} for more
58 58
 #' information.
59
-#' @param seed Random seed for reproducibility of UMAP results. 
59
+#' @param seed Random seed for reproducibility of UMAP results.
60 60
 #' Default \code{NULL} will use global seed in use by the R environment.
61
-#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying whether 
61
+#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying whether
62 62
 #' the PCA should be parallelized.
63 63
 #' @return A \linkS4class{SingleCellExperiment} object with UMAP computation
64 64
 #' updated in \code{reducedDim(inSCE, reducedDimName)}.
... ...
@@ -73,67 +73,35 @@
73 73
 #' # Run from PCA
74 74
 #' sce <- scaterlogNormCounts(sce, "logcounts")
75 75
 #' sce <- runModelGeneVar(sce)
76
-#' sce <- scaterPCA(sce, useAssay = "logcounts", 
76
+#' sce <- scaterPCA(sce, useAssay = "logcounts",
77 77
 #'                  useFeatureSubset = "HVG_modelGeneVar2000", scale = TRUE)
78 78
 #' sce <- getUMAP(sce, useReducedDim = "PCA")
79 79
 #' }
80 80
 #' @importFrom S4Vectors metadata<-
81
-getUMAP <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL, 
82
-                    useAltExp = NULL, sample = NULL, reducedDimName = "UMAP", 
83
-                    logNorm = FALSE, useFeatureSubset = NULL, nTop = 2000, 
84
-                    scale = TRUE, pca = TRUE, initialDims = 25, nNeighbors = 30, 
85
-                    nIterations = 200, alpha = 1, minDist = 0.01, spread = 1, 
81
+getUMAP <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
82
+                    useAltExp = NULL, sample = NULL, reducedDimName = "UMAP",
83
+                    logNorm = FALSE, useFeatureSubset = NULL, nTop = 2000,
84
+                    scale = TRUE, pca = TRUE, initialDims = 25, nNeighbors = 30,
85
+                    nIterations = 200, alpha = 1, minDist = 0.01, spread = 1,
86 86
                     seed = NULL, BPPARAM = BiocParallel::SerialParam()) {
87 87
   params <- as.list(environment())
88 88
   params$inSCE <- NULL
89 89
   params$BPPARAM <- NULL
90
-  # input class check
91
-  if (!inherits(inSCE, "SingleCellExperiment")){
92
-    stop("Please use a SingleCellExperiment object")
93
-  }
94
-  # matrix specification check
95
-  # When useReducedDim, never useAssay; 
96
-  # when useAltExp, useAssay/reducedDim from there
97
-  if (is.null(useAssay) && is.null(useReducedDim)) {
98
-    stop("`useAssay` and `useReducedDim` cannot be NULL at the same time.")
99
-  } 
100
-  if (!is.null(useAltExp)) {
101
-    if (!useAltExp %in% SingleCellExperiment::altExpNames(inSCE)) {
102
-      stop("Specified `useAltExp` not found.")
103
-    }
104
-    sce <- SingleCellExperiment::altExp(inSCE, useAltExp)
105
-  } else {
106
-    sce <- inSCE
107
-  }
108
-  if (!is.null(useReducedDim)) {
109
-    useAssay <- NULL
110
-    if (!useReducedDim %in% SingleCellExperiment::reducedDimNames(sce)) {
111
-      stop("Specified `useReducedDim` not found.")
112
-    }
113
-  } else {
114
-    # In this case, there is definitely `useAssay` specified
115
-    if (!useAssay %in% SummarizedExperiment::assayNames(sce)) {
116
-      stop("Specified `useAssay` not found.")
117
-    }
118
-  }
119
-  # Check sample variable
120
-  if(!is.null(sample)) {
121
-    if (is.character(sample) && length(sample) == 1) {
122
-      if (!sample %in% names(SummarizedExperiment::colData(inSCE))) {
123
-        stop("Given sample annotation '", sample, "' not found.")
124
-      }
125
-      sample <- SummarizedExperiment::colData(inSCE)[[sample]]
126
-    } else if (length(sample) > 1) {
127
-      if(length(sample) != ncol(inSCE)) {
128
-        stop("'sample' must be the same length as the number of columns in ", 
129
-             "'inSCE'")
130
-      }
131
-    }
132
-  } else {
133
-    sample = rep(1, ncol(inSCE))
134
-  }
90
+  # Note: useMat = list(useAssay = useAssay, ...)
91
+  # `.selectSCEMatrix()` does the check and corrects the conditions
92
+  # When using useReducedDim, useAssay will be ignored
93
+  useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay,
94
+                             useReducedDim = useReducedDim,
95
+                             useAltExp = useAltExp,
96
+                             returnMatrix = FALSE)$names
97
+  params$useAssay <- useMat$useAssay
98
+  useAssay <- useMat$useAssay
99
+  if (!is.null(useAltExp)) sce <- SingleCellExperiment::altExp(inSCE, useAltExp)
100
+  else sce <- inSCE
101
+  sample <- .manageCellVar(inSCE, sample)
102
+  if (is.null(sample)) sample <- rep(1, ncol(inSCE))
135 103
   samples <- unique(sample)
136
-  umapDims = matrix(nrow = ncol(inSCE), ncol = 2)
104
+  umapDims <- matrix(nrow = ncol(inSCE), ncol = 2)
137 105
   # Start looping for each sample
138 106
   for (i in seq_along(samples)){
139 107
     sceSampleInd <- sample == samples[i]
... ...
@@ -146,13 +114,13 @@ getUMAP <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
146 114
         useAssayTemp = "ScaterLogNormCounts"
147 115
       }
148 116
     }
149
-    subset_row <- .parseUseFeatureSubset(inSCE, useFeatureSubset, 
117
+    subset_row <- .parseUseFeatureSubset(inSCE, useFeatureSubset,
150 118
                                          altExpObj = sce, returnType = "logic")
151
-    # initialDims passed to `pca` of scran::calculateUMAP. 
119
+    # initialDims passed to `pca` of scran::calculateUMAP.
152 120
     if (!isTRUE(pca) & !is.null(useAssay)) {
153 121
       initialDims <- NULL
154 122
     }
155
-    
123
+
156 124
     nNeighbors <- min(ncol(sceSample), nNeighbors)
157 125
     message(paste0(date(), " ... Computing Scater UMAP for sample '",
158 126
                    samples[i], "'."))
... ...
@@ -163,8 +131,8 @@ getUMAP <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
163 131
                                        learning_rate = alpha,
164 132
                                        min_dist = minDist, spread = spread,
165 133
                                        n_sgd_threads = 1, pca = initialDims,
166
-                                       n_epochs = nIterations, 
167
-                                       subset_row = subset_row, ntop = nTop, 
134
+                                       n_epochs = nIterations,
135
+                                       subset_row = subset_row, ntop = nTop,
168 136
                                        BPPARAM = BPPARAM)
169 137
     })
170 138
     if (is.null(rownames(sceSample))) {
... ...
@@ -55,9 +55,8 @@
55 55
                 "matType. Force using user specification.")
56 56
       }
57 57
       matType <- ifelse(is.null(matType), bcInfo$matType, matType)
58
-      batch <- ifelse(is.null(batch), bcInfo$batch, batch)
58
+      if (is.null(batch)) batch <- bcInfo$batch
59 59
       if (is.null(condition)) condition <- bcInfo$condition
60
-      #condition <- ifelse(is.null(condition), bcInfo$condition, condition)
61 60
     }
62 61
   }
63 62
   return(list(origAssay = origAssay,
... ...
@@ -94,7 +93,7 @@
94 93
 #' @return An object of class \code{"gtable"}, combining four \code{ggplot}s.
95 94
 #' @examples
96 95
 #' data("sceBatches")
97
-#' sceBatches <- scaterlogNormCounts(sceBatches, "logcounts")
96
+#' logcounts(sceBatches) <- log1p(counts(sceBatches))
98 97
 #' sceBatches <- runLimmaBC(sceBatches)
99 98
 #' plotBatchCorrCompare(sceBatches, "LIMMA", condition = "cell_type")
100 99
 #' @export
... ...
@@ -232,59 +231,33 @@ plotBatchCorrCompare <- function(inSCE, corrMat, batch = NULL, condition = NULL,
232 231
 #'                   condition = "cell_type")
233 232
 plotBatchVariance <- function(inSCE, useAssay = NULL, useReddim = NULL,
234 233
                               useAltExp = NULL, batch = 'batch',
235
-                              condition=NULL, title = NULL) {
236
-  if(!inherits(inSCE, 'SingleCellExperiment')){
237
-    stop("'inSCE' must inherit from 'SingleCellExperiment'.")
238
-  }
239
-  if(is.null(useAssay) + is.null(useReddim) + is.null(useAltExp) != 2){
240
-    stop("One and only one of `useAssay`, `useReddim`, ",
241
-         "`usAltExp` has to be specified.")
242
-  }
243
-  if(!is.null(useAssay)){
244
-    if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)){
245
-      stop("'useAssay' not found in 'inSCE'.")
246
-    }
247
-    mat <- SummarizedExperiment::assay(inSCE, useAssay)
248
-  }
249
-  if(!is.null(useReddim)){
250
-    if(!useReddim %in% SingleCellExperiment::reducedDimNames(inSCE)){
251
-      stop("'useReddim not found in 'inSCE'.")
252
-    }
253
-    mat <- t(SingleCellExperiment::reducedDim(inSCE, useReddim))
254
-  }
255
-  if(!is.null(useAltExp)){
256
-    if(!useAltExp %in% SingleCellExperiment::altExpNames(inSCE)){
257
-      stop("'useAltExp not found in 'inSCE'.")
258
-    }
259
-    ae <- SingleCellExperiment::altExp(inSCE, useAltExp)
260
-    mat <- SummarizedExperiment::assay(ae)
261
-  }
234
+                              condition = NULL, title = NULL) {
235
+  useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay,
236
+                             useReducedDim = useReddim, useAltExp = useAltExp,
237
+                             returnMatrix = TRUE, cellAsCol = TRUE)
238
+  mat <- useMat$mat
262 239
   if(is.null(batch)){
263 240
     stop("Batch annotation has to be given.")
264
-  } else{
265
-    if(!batch %in% names(SummarizedExperiment::colData(inSCE))){
266
-      stop("'batch' not found in 'inSCE'.")
267
-    }
268 241
   }
242
+  batchCol <- .manageCellVar(inSCE, var = batch, as.factor = TRUE)
269 243
   if(!inherits(mat, 'matrix')){
270 244
     mat <- as.matrix(mat)
271 245
   }
272
-  batchCol <- SummarizedExperiment::colData(inSCE)[, batch]
273
-  nlb <- nlevels(as.factor(batchCol))
246
+  nlb <- nlevels(batchCol)
274 247
   if (nlb <= 1){
275 248
     stop("No more than one batch found in specified annotation")
276 249
   } else {
277
-    batchMod <- stats::model.matrix(~as.factor(batchCol))
250
+    batchMod <- stats::model.matrix(~batchCol)
278 251
   }
279 252
   if (is.null(condition)){
280 253
     condMod <- matrix(rep(1, ncol(mat)), ncol = 1)
281 254
   } else {
282
-    condCol <- SingleCellExperiment::colData(inSCE)[, condition]
283
-    nlc <- nlevels(as.factor(condCol))
255
+    condCol <- .manageCellVar(inSCE, var = condition, as.factor = TRUE)
256
+    nlc <- nlevels(condCol)
284 257
     if (nlc <= 1){
285 258
       condMod <- matrix(rep(1, ncol(mat)), ncol = 1)
286 259
     } else {
287
-      condMod <- stats::model.matrix(~as.factor(condCol))
260
+      condMod <- stats::model.matrix(~condCol)
288 261
     }
289 262
   }
290 263
   mod <- cbind(condMod, batchMod[, -1])
... ...
@@ -3,12 +3,12 @@
3 3
 #' BBKNN, an extremely fast graph-based data integration algorithm. It modifies
4 4
 #' the neighbourhood construction step to produce a graph that is balanced
5 5
 #' across all batches of the data.
6
-#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required.
6
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object
7 7
 #' @param useAssay A single character indicating the name of the assay requiring
8 8
 #' batch correction. Default \code{"logcounts"}.
9
-#' @param batch A single character indicating a field in
10
-#' \code{\link{colData}} that annotates the batches.
11
-#' Default \code{"batch"}.
9
+#' @param batch A single character indicating a field in \code{colData} that
10
+#' annotates the batches of each cell; or a vector/factor with the same length
11
+#' as the number of cells. Default \code{"batch"}.
12 12
 #' @param reducedDimName A single character. The name for the corrected
13 13
 #' low-dimensional representation. Will be saved to \code{reducedDim(inSCE)}.
14 14
 #' Default \code{"BBKNN"}.
... ...
@@ -24,17 +24,12 @@
24 24
 #' @examples
25 25
 #' \dontrun{
26 26
 #' data('sceBatches', package = 'singleCellTK')
27
-#' logcounts(sceBatches) <- log(counts(sceBatches) + 1)
28
-#' sceBatches.small <- sample(sceBatches, 20)
29
-#' sceCorr <- runBBKNN(sceBatches.small, useAssay = "logcounts",
30
-#'                     nComponents = 10)
27
+#' logcounts(sceBatches) <- log1p(counts(sceBatches))
28
+#' sceBatches <- runBBKNN(sceBatches, useAssay = "logcounts",
29
+#'                        nComponents = 10)
31 30
 #' }
32 31
 runBBKNN <-function(inSCE, useAssay = 'logcounts', batch = 'batch',
33 32
                     reducedDimName = 'BBKNN', nComponents = 50L){
34
-  ## Input check
35
-  if(!inherits(inSCE, "SingleCellExperiment")){
36
-    stop("\"inSCE\" should be a SingleCellExperiment Object.")
37
-  }
38 33
   if(!reticulate::py_module_available(module = "bbknn")){
39 34
     warning("Cannot find python module 'bbknn', please install Conda and",
40 35
             " run sctkPythonInstallConda() or run ",
... ...
@@ -48,15 +43,16 @@ runBBKNN <-function(inSCE, useAssay = 'logcounts', batch = 'batch',
48 43
             " to select the correct Python environment.")
49 44
     return(inSCE)
50 45
   }
51
-  if(!batch %in% names(SummarizedExperiment::colData(inSCE))){
52
-    stop(paste("\"batch\" name:", batch, "not found"))
53
-  }
46
+  useAssay <- .selectSCEMatrix(inSCE, useAssay = useAssay,
47
+                               returnMatrix = FALSE)$names$useAssay
48
+  batchVec <- .manageCellVar(inSCE, batch, as.factor = FALSE)
54 49
   reducedDimName <- gsub(' ', '_', reducedDimName)
55 50
   nComponents <- as.integer(nComponents)
56 51
   ## Run algorithm
57 52
   adata <- .sce2adata(inSCE, useAssay = useAssay)
53
+  adata$obs[["batch"]] <- batchVec
58 54
   sc$tl$pca(adata, n_comps = nComponents)
59
-  bbknn$bbknn(adata, batch_key = batch, n_pcs = nComponents)
55
+  bbknn$bbknn(adata, batch_key = "batch", n_pcs = nComponents)
60 56
   sc$tl$umap(adata, n_components = nComponents)
61 57
   bbknnUmap <- adata$obsm[["X_umap"]]
62 58
   SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- bbknnUmap
... ...
@@ -84,7 +80,7 @@ runBBKNN <-function(inSCE, useAssay = 'logcounts', batch = 'batch',
84 80
 #' informative covariates could still be useful. If the cell types are unknown
85 81
 #' and are expected to be unbalanced, it is recommended to set \code{useSVA}
86 82
 #' to \code{TRUE}.
87
-#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required.
83
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object
88 84
 #' @param useAssay A single character indicating the name of the assay requiring
89 85
 #' batch correction. Default \code{"counts"}.
90 86
 #' @param batch A single character indicating a field in
... ...
@@ -198,69 +194,76 @@ runComBatSeq <- function(inSCE, useAssay = "counts", batch = 'batch',
198 194
 #'
199 195
 #' fastMNN is a variant of the classic MNN method, modified for speed and more
200 196
 #' robust performance. For introduction of MNN, see \code{\link{runMNNCorrect}}.
201
-#' @param inSCE  inherited object. Required.
197
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object
202 198
 #' @param useAssay A single character indicating the name of the assay requiring
203
-#' batch correction. Default \code{"logcounts"}. Alternatively, see
204
-#' \code{pcInput} parameter.
205
-#' @param batch A single character indicating a field in
206
-#' \code{\link{colData}} that annotates the batches.
207
-#' Default \code{"batch"}.
199
+#' batch correction. Default \code{"logcounts"}.
200
+#' @param useReducedDim A single character indicating the dimension reduction
201
+#' used for batch correction. Will ignore \code{useAssay} when using.
202
+#' Default \code{NULL}.
203
+#' @param batch A single character indicating a field in \code{colData} that
204
+#' annotates the batches of each cell; or a vector/factor with the same length
205
+#' as the number of cells. Default \code{"batch"}.
208 206
 #' @param reducedDimName A single character. The name for the corrected
209
-#' low-dimensional representation. Will be saved to \code{reducedDim(inSCE)}.
210
-#' Default \code{"fastMNN"}.
211
-#' @param pcInput A logical scalar. Whether to use a low-dimension matrix for
212
-#' batch effect correction. If \code{TRUE}, \code{useAssay} will be searched
213
-#' from \code{reducedDimNames(inSCE)}. Default \code{FALSE}.
207
+#' low-dimensional representation. Default \code{"fastMNN"}.
208
+#' @param k An integer scalar specifying the number of nearest neighbors to
209
+#' consider when identifying MNNs. See "See Also". Default \code{20}.
210
+#' @param propK A numeric scalar in (0, 1) specifying the proportion of cells in
211
+#' each dataset to use for mutual nearest neighbor searching. See "See Also".
212
+#' Default \code{NULL}.
213
+#' @param ndist A numeric scalar specifying the threshold beyond which
214
+#' neighbours are to be ignored when computing correction vectors. See "See
215
+#' Also". Default \code{3}.
216
+#' @param minBatchSkip Numeric scalar specifying the minimum relative magnitude
217
+#' of the batch effect, below which no correction will be performed at a given
218
+#' merge step. See "See Also". Default \code{0}.
219
+#' @param cosNorm A logical scalar indicating whether cosine normalization
220
+#' should be performed on \code{useAssay} prior to PCA. See "See Also". Default
221
+#' \code{TRUE}.
222
+#' @param nComponents An integer scalar specifying the number of dimensions to
223
+#' produce. See "See Also". Default \code{50}.
224
+#' @param weights The weighting scheme to use. Passed to
225
+#' \code{\link[batchelor]{multiBatchPCA}}. Default \code{NULL}.
226
+#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying whether
227
+#' the SVD should be parallelized.
214 228
 #' @return The input \linkS4class{SingleCellExperiment} object with
215 229
 #' \code{reducedDim(inSCE, reducedDimName)} updated.
230
+#' @seealso \code{\link[batchelor]{fastMNN}} for using \code{useAssay}, and
231
+#' \code{\link[batchelor]{reducedMNN}} for using \code{useReducedDim}
216 232
 #' @export
217 233
 #' @references Lun ATL, et al., 2016
218 234
 #' @examples
219 235
 #' data('sceBatches', package = 'singleCellTK')
220
-#' logcounts(sceBatches) <- log(counts(sceBatches) + 1)
221
-#' sceCorr <- runFastMNN(sceBatches, useAssay = 'logcounts', pcInput = FALSE)
222
-runFastMNN <- function(inSCE, useAssay = "logcounts",
223
-                       reducedDimName = "fastMNN", batch = 'batch',
224
-                       pcInput = FALSE){
225
-  ## Input check
226
-  if(!inherits(inSCE, "SingleCellExperiment")){
227
-    stop("\"inSCE\" should be a SingleCellExperiment Object.")
228
-  }
229
-  if(isTRUE(pcInput)){
230
-    if(!(useAssay %in% SingleCellExperiment::reducedDimNames(inSCE))) {
231
-      stop(paste("\"useAssay\" (reducedDim) name: ", useAssay, " not found."))
232
-    }
233
-  } else {
234
-    if(!(useAssay %in% SummarizedExperiment::assayNames(inSCE))) {
235
-      stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found."))
236
-    }
237
-  }
238
-
239
-  if(!(batch %in% names(SummarizedExperiment::colData(inSCE)))){
240
-    stop(paste("\"batch name:", batch, "not found."))
241
-  }
236
+#' logcounts(sceBatches) <- log1p(counts(sceBatches))
237
+#' sceCorr <- runFastMNN(sceBatches, useAssay = 'logcounts')
238
+runFastMNN <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
239
+                       batch = 'batch', reducedDimName = "fastMNN", k = 20,
240
+                       propK = NULL, ndist = 3, minBatchSkip = 0,
241
+                       cosNorm = TRUE, nComponents = 50, weights = NULL,
242
+                       BPPARAM = BiocParallel::SerialParam()){
243
+  useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay,
244
+                             useReducedDim = useReducedDim,
245
+                             returnMatrix = TRUE)
246
+  mat <- useMat$mat
247
+  batchVec <- .manageCellVar(inSCE, batch, as.factor = TRUE)
242 248
   reducedDimName <- gsub(' ', '_', reducedDimName)
243 249
 
244
-  ## Run algorithm
245
-  batches <- list()
246
-  batchCol <- SummarizedExperiment::colData(inSCE)[[batch]]
247
-  batchFactor <- as.factor(batchCol)
248
-
249
-  if(pcInput){
250
-    mat <- SingleCellExperiment::reducedDim(inSCE, useAssay)
251
-    redMNN <- batchelor::reducedMNN(mat, batch = batchFactor)
250
+  if (!is.null(useReducedDim)) {
251
+    redMNN <- batchelor::reducedMNN(mat, batch = batchVec, BPPARAM = BPPARAM,
252
+                                    k = k, prop.k = propK, ndist = ndist,
253
+                                    min.batch.skip = minBatchSkip)
252 254
     newRedDim <- redMNN$corrected
253 255
   } else {
254
-    mat <- SummarizedExperiment::assay(inSCE, useAssay)
255
-    mnnSCE <- batchelor::fastMNN(mat, batch = batchFactor)
256
+    mnnSCE <- batchelor::fastMNN(mat, batch = batchVec, BPPARAM = BPPARAM, k = k,
257
+                                 prop.k = propK, ndist = ndist,
258
+                                 min.batch.skip = minBatchSkip,
259
+                                 cos.norm = cosNorm, d = nComponents,
260
+                                 weights = weights)
256 261
     newRedDim <- SingleCellExperiment::reducedDim(mnnSCE, 'corrected')
257 262
   }
258 263
   SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- newRedDim
259
-  S4Vectors::metadata(inSCE)$batchCorr[[reducedDimName]] <- list(useAssay = useAssay,
260
-                                                            origLogged = TRUE,
261
-                                                            method = "fastMNN",
262
-                                                            matType = "reducedDim",
263
-                                                            batch = batch)
264
+  S4Vectors::metadata(inSCE)$batchCorr[[reducedDimName]] <-
265
+    list(useAssay = useAssay, origLogged = TRUE, method = "fastMNN",
266
+         matType = "reducedDim", batch = batch)
264 267
   return(inSCE)
265 268
 }
266 269
 
... ...
@@ -268,15 +271,15 @@ runFastMNN <- function(inSCE, useAssay = "logcounts",
268 271
 #' @description Harmony is an algorithm that projects cells into a shared
269 272
 #' embedding in which cells group by cell type rather than dataset-specific
270 273
 #' conditions.
271
-#' @param inSCE Input \linkS4class{SingleCellExperiment} inherited object.
274
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object
272 275
 #' @param useAssay A single character indicating the name of the assay requiring
273 276
 #' batch correction. Default \code{"logcounts"}.
274 277
 #' @param useReducedDim A single character indicating the name of the reducedDim
275 278
 #' used to be corrected. Specifying this will ignore \code{useAssay}. Default
276 279
 #' \code{NULL}.
277
-#' @param batch A single character indicating a field in
278
-#' \code{\link{colData}} that annotates the batches.
279
-#' Default \code{"batch"}.
280
+#' @param batch A single character indicating a field in \code{colData} that
281
+#' annotates the batches of each cell; or a vector/factor with the same length
282
+#' as the number of cells. Default \code{"batch"}.
280 283
 #' @param reducedDimName A single character. The name for the corrected
281 284
 #' low-dimensional representation. Will be saved to \code{reducedDim(inSCE)}.
282 285
 #' Default \code{"HARMONY"}.
... ...
@@ -307,7 +310,7 @@ runFastMNN <- function(inSCE, useAssay = "logcounts",
307 310
 #' @references Ilya Korsunsky, et al., 2019
308 311
 #' @examples
309 312
 #' data('sceBatches', package = 'singleCellTK')
310
-#' logcounts(sceBatches) <- log(counts(sceBatches) + 1)
313
+#' logcounts(sceBatches) <- log1p(counts(sceBatches))
311 314
 #' sceCorr <- runHarmony(sceBatches)
312 315
 runHarmony <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
313 316
                        batch = "batch", reducedDimName = "HARMONY",
... ...
@@ -320,15 +323,15 @@ runHarmony <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
320 323
          call. = FALSE)
321 324
   }
322 325
   ## Input check
323
-  matrixSelect <- .selectSCEMatrix(inSCE, useAssay, useReducedDim,
324
-                                   useAltExp = NULL, returnMatrix = TRUE)
325
-  batch <- .manageCellVar(inSCE, batch, as.factor = TRUE)
326
+  useMat <- .selectSCEMatrix(inSCE, useAssay, useReducedDim,
327
+                             useAltExp = NULL, returnMatrix = TRUE)
328
+  batchVec <- .manageCellVar(inSCE, batch, as.factor = TRUE)
326 329
   reducedDimName <- gsub(' ', '_', reducedDimName)
327 330
   ## Run algorithm
328 331
   message(Sys.Date(), " ... Running harmony batch correction")
329
-  mat <- matrixSelect$mat
330
-  do_pca <- ifelse(is.null(matrixSelect$names$useReducedDim), TRUE, FALSE)
331
-  if (!is.null(matrixSelect$names$useReducedDim)) {
332
+  mat <- useMat$mat
333
+  do_pca <- ifelse(is.null(useMat$names$useReducedDim), TRUE, FALSE)
334
+  if (!is.null(useMat$names$useReducedDim)) {
332 335
     if (nComponents > ncol(mat)) {
333 336
       warning("Specified number of components more than available, ",
334 337
               "using all (", ncol(mat), ") components.")
... ...
@@ -336,12 +339,15 @@ runHarmony <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
336 339
     }
337 340
     mat <- mat[,seq(nComponents)]
338 341
   }
339
-  h <- harmony::HarmonyMatrix(data_mat = mat, meta_data = batch,
342
+  h <- harmony::HarmonyMatrix(data_mat = mat, meta_data = batchVec,
340 343
                               do_pca = do_pca, npcs = nComponents,
341 344
                               lambda = lambda, theta = theta,
342 345
                               sigma = sigma, max.iter.harmony = nIter,
343 346
                               verbose = verbose, return_object = FALSE, ...)
344 347
   SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- h
348
+  S4Vectors::metadata(inSCE)$batchCorr[[reducedDimName]] <-
349
+    list(useAssay = useAssay, origLogged = TRUE, method = "harmony",
350
+         matType = "reducedDim", batch = batch)
345 351
   return(inSCE)
346 352
 }
347 353
 
... ...
@@ -349,7 +355,7 @@ runHarmony <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
349 355
 # #'
350 356
 # #' LIGER relies on integrative non-negative matrix factorization to identify
351 357
 # #' shared and dataset-specific factors.
352
-# #' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required.
358
+# #' @param Input \linkS4class{SingleCellExperiment} object
353 359
 # #' @param useAssay A single character indicating the name of the assay requiring
354 360
 # #' batch correction. Default \code{"logcounts"}.
355 361
 # #' @param batch A single character indicating a field in
... ...
@@ -427,12 +433,12 @@ runHarmony <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
427 433
 #'
428 434
 #' Limma's batch effect removal function fits a linear model to the data, then
429 435
 #' removes the component due to the batch effects.
430
-#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required.
436
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object
431 437
 #' @param useAssay A single character indicating the name of the assay requiring
432 438
 #' batch correction. Default \code{"logcounts"}.
433
-#' @param batch A single character indicating a field in
434
-#' \code{\link{colData}} that annotates the batches.
435
-#' Default \code{"batch"}.
439
+#' @param batch A single character indicating a field in \code{colData} that
440
+#' annotates the batches of each cell; or a vector/factor with the same length
441
+#' as the number of cells. Default \code{"batch"}.
436 442
 #' @param assayName A single characeter. The name for the corrected assay. Will
437 443
 #' be saved to \code{\link{assay}}. Default
438 444
 #' \code{"LIMMA"}.
... ...
@@ -442,30 +448,19 @@ runHarmony <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
442 448
 #' @references Gordon K Smyth, et al., 2003
443 449
 #' @examples
444 450
 #' data('sceBatches', package = 'singleCellTK')
445
-#' logcounts(sceBatches) <- log(counts(sceBatches) + 1)
451
+#' logcounts(sceBatches) <- log1p(counts(sceBatches))
446 452
 #' sceCorr <- runLimmaBC(sceBatches)
447 453
 runLimmaBC <- function(inSCE, useAssay = "logcounts", assayName = "LIMMA",
448 454
                        batch = "batch") {
449 455
   ## Input check
450
-  if(!inherits(inSCE, "SingleCellExperiment")){
451
-    stop("\"inSCE\" should be a SingleCellExperiment Object.")
452
-  }
453
-  if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) {
454
-    stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found."))
455
-  }
456
-  if(!batch %in% names(SummarizedExperiment::colData(inSCE))){
457
-    stop(paste("\"batch\" name:", batch, "not found."))
458
-  }
459
-
456
+  useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay, returnMatrix = TRUE)
457
+  mat <- useMat$mat
458
+  batchVec <- .manageCellVar(inSCE, batch, as.factor = TRUE)
460 459
   assayName <- gsub(' ', '_', assayName)
461
-
462 460
   ## Run algorithm
463 461
   ## One more check for the batch names
464
-  batchCol <- SummarizedExperiment::colData(inSCE)[[batch]]
465
-  mat <- SummarizedExperiment::assay(inSCE, useAssay)
466
-  newMat <- limma::removeBatchEffect(mat, batch = batchCol)
462
+  newMat <- limma::removeBatchEffect(mat, batch = batchVec)
467 463
   expData(inSCE, assayName, tag = "batchCorrected", altExp = FALSE) <- newMat
468
-  #SummarizedExperiment::assay(inSCE, assayName) <- newMat
469 464
   S4Vectors::metadata(inSCE)$batchCorr[[assayName]] <- list(useAssay = useAssay,
470 465
                                                             origLogged = TRUE,
471 466
                                                             method = "Limma",
... ...
@@ -482,57 +477,61 @@ runLimmaBC <- function(inSCE, useAssay = "logcounts", assayName = "LIMMA",
482 477
 #' does so by identifying pairs of MNN in the high-dimensional log-expression
483 478
 #' space. For each MNN pair, a pairwise correction vector is computed by
484 479
 #' applying a Gaussian smoothing kernel with bandwidth `sigma`.
485
-#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required.
480
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object
486 481
 #' @param useAssay A single character indicating the name of the assay requiring
487 482
 #' batch correction. Default \code{"logcounts"}.
488
-#' @param batch A single character indicating a field in
489
-#' \code{\link{colData}} that annotates the batches.
490
-#' Default \code{"batch"}.
491
-#' @param k An integer. Specifies the number of nearest neighbours to
492
-#' consider when defining MNN pairs. This should be interpreted as the minimum
493
-#' frequency of each cell type or state in each batch. Larger values will
494
-#' improve the precision of the correction by increasing the number of MNN
495
-#' pairs, at the cost of reducing accuracy by allowing MNN pairs to form between
496
-#' cells of different type. Default \code{20L}.
497
-#' @param sigma A Numeric scalar. Specifies how much information is
498
-#' shared between MNN pairs when computing the batch effect. Larger values will
499
-#' share more information, approaching a global correction for all cells in the
500
-#' same batch. Smaller values allow the correction to vary across cell types,
501
-#' which may be more accurate but comes at the cost of precision. Default
502
-#' \code{0.1}.
483
+#' @param batch A single character indicating a field in \code{colData} that
484
+#' annotates the batches of each cell; or a vector/factor with the same length
485
+#' as the number of cells. Default \code{"batch"}.
503 486
 #' @param assayName A single characeter. The name for the corrected assay. Will
504 487
 #' be saved to \code{\link{assay}}. Default
505 488
 #' \code{"MNN"}.
489
+#' @param k An integer scalar specifying the number of nearest neighbors to
490
+#' consider when identifying MNNs. See "See Also". Default \code{20}.
491
+#' @param propK A numeric scalar in (0, 1) specifying the proportion of cells in
492
+#' each dataset to use for mutual nearest neighbor searching. See "See Also".
493
+#' Default \code{NULL}.
494
+#' @param sigma A numeric scalar specifying the bandwidth of the Gaussian
495
+#' smoothing kernel used to compute the correction vector for each cell. See
496
+#' "See Also". Default \code{0.1}.
497
+#' @param cosNormIn A logical scalar indicating whether cosine normalization
498
+#' should be performed on the input data prior to calculating distances between
499
+#' cells. See "See Also". Default \code{TRUE}.
500
+#' @param cosNormOut A logical scalar indicating whether cosine normalization
501
+#' should be performed prior to computing corrected expression values. See "See
502
+#' Also". Default \code{TRUE}.
503
+#' @param varAdj A logical scalar indicating whether variance adjustment should
504
+#' be performed on the correction vectors. See "See Also". Default \code{TRUE}.
505
+#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying whether
506
+#' the PCA and nearest-neighbor searches should be parallelized.
506 507
 #' @return The input \linkS4class{SingleCellExperiment} object with
507 508
 #' \code{assay(inSCE, assayName)} updated.
509
+#' @seealso \code{\link[batchelor]{mnnCorrect}}
508 510
 #' @export
509
-#' @references Lun ATL, et al., 2016 & 2018
511
+#' @references Haghverdi L, Lun ATL, et. al., 2018
510 512
 #' @examples
511 513
 #' data('sceBatches', package = 'singleCellTK')
512
-#' logcounts(sceBatches) <- log(counts(sceBatches) + 1)
514
+#' logcounts(sceBatches) <- log1p(counts(sceBatches))
513 515
 #' sceCorr <- runMNNCorrect(sceBatches)
514 516
 runMNNCorrect <- function(inSCE, useAssay = 'logcounts', batch = 'batch',
515
-                          assayName = 'MNN', k = 20L, sigma = 0.1){
517
+                          assayName = 'MNN', k = 20L, propK = NULL,
518
+                          sigma = 0.1, cosNormIn = TRUE, cosNormOut = TRUE,
519
+                          varAdj = TRUE, BPPARAM = BiocParallel::SerialParam()){
516 520
   ## Input check
517
-  if(!inherits(inSCE, "SingleCellExperiment")){
518
-    stop("\"inSCE\" should be a SingleCellExperiment Object.")
519
-  }
520
-  if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) {
521
-    stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found."))
522
-  }
523
-  if(!batch %in% names(SummarizedExperiment::colData(inSCE))){
524
-    stop(paste("\"batch name:", batch, "not found."))
525
-  }
521
+  useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay, returnMatrix = TRUE)
522
+  mat <- useMat$mat
523
+  batchVec <- .manageCellVar(inSCE, batch, as.factor = TRUE)
526 524
   assayName <- gsub(' ', '_', assayName)
527 525
   k <- as.integer(k)
528 526
 
529 527
   ## Run algorithm
530
-  batchCol <- SummarizedExperiment::colData(inSCE)[[batch]]
531
-  batchFactor <- as.factor(batchCol)
532
-  mnnSCE <- batchelor::mnnCorrect(inSCE, assay.type = useAssay,
533
-                                  batch = batchFactor, k = k, sigma = sigma)
534
-  corrected <- SummarizedExperiment::assay(mnnSCE, 'corrected')
535
-  expData(inSCE, assayName, tag = "batchCorrected", altExp = FALSE) <- corrected
528
+  corr.sce <- batchelor::mnnCorrect(mat, batch = batchVec, k = k,
529
+                                    prop.k = propK, sigma = sigma,
530
+                                    cos.norm.in = cosNormIn,
531
+                                    cos.norm.out = cosNormOut, var.adj = varAdj,
532
+                                    BPPARAM = BPPARAM)
533
+  expData(inSCE, assayName, tag = "batchCorrected", altExp = FALSE) <-
534
+    SummarizedExperiment::assay(corr.sce, "corrected")
536 535
   S4Vectors::metadata(inSCE)$batchCorr[[assayName]] <- list(useAssay = useAssay,
537 536
                                                             origLogged = TRUE,
538 537
                                                             method = "MNN",
... ...
@@ -547,22 +546,24 @@ runMNNCorrect <- function(inSCE, useAssay = 'logcounts', batch = 'batch',
547 546
 #' SCANORAMA is analogous to computer vision algorithms for panorama stitching
548 547
 #' that identify images with overlapping content and merge these into a larger
549 548
 #' panorama.
550
-#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required.
549
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object
551 550
 #' @param useAssay A single character indicating the name of the assay requiring
552 551
 #' batch correction. Scanorama requires a transformed normalized expression
553 552
 #' assay. Default \code{"logcounts"}.
554
-#' @param batch A single character indicating a field in
555
-#' \code{\link{colData}} that annotates the batches.
556
-#' Default \code{"batch"}.
553
+#' @param batch A single character indicating a field in \code{colData} that
554
+#' annotates the batches of each cell; or a vector/factor with the same length
555
+#' as the number of cells. Default \code{"batch"}.
556
+#' @param assayName A single characeter. The name for the corrected assay. Will
557
+#' be saved to \code{\link{assay}}. Default
558
+#' \code{"SCANORAMA"}.
557 559
 #' @param SIGMA A numeric scalar. Algorithmic parameter, correction smoothing
558 560
 #' parameter on Gaussian kernel. Default \code{15}.
559 561
 #' @param ALPHA A numeric scalar. Algorithmic parameter, alignment score
560 562
 #' minimum cutoff. Default \code{0.1}.
561 563
 #' @param KNN An integer. Algorithmic parameter, number of nearest neighbors to
562
-#' use for matching. Default \code{20L}.
563
-#' @param assayName A single characeter. The name for the corrected assay. Will
564
-#' be saved to \code{\link{assay}}. Default
565
-#' \code{"SCANORAMA"}.
564
+#' use for matching. Default \code{20}.
565
+#' @param approx Boolean. Use approximate nearest neighbors, greatly speeds up
566
+#' matching runtime. Default \code{TRUE}.
566 567
 #' @return The input \linkS4class{SingleCellExperiment} object with
567 568
 #' \code{assay(inSCE, assayName)} updated.
568 569
 #' @export
... ...
@@ -570,16 +571,13 @@ runMNNCorrect <- function(inSCE, useAssay = 'logcounts', batch = 'batch',
570 571
 #' @examples
571 572
 #' \dontrun{
572 573
 #' data('sceBatches', package = 'singleCellTK')
573
-#' sceBatches <- scaterlogNormCounts(sceBatches)
574
+#' logcounts(sceBatches) <- log1p(counts(sceBatches))
574 575
 #' sceCorr <- runSCANORAMA(sceBatches, "ScaterLogNormCounts")
575 576
 #' }
576 577
 runSCANORAMA <- function(inSCE, useAssay = 'logcounts', batch = 'batch',
577
-                         SIGMA = 15, ALPHA = 0.1, KNN = 20L,
578
-                         assayName = 'SCANORAMA'){
578
+                         assayName = 'SCANORAMA', SIGMA = 15, ALPHA = 0.1,
579
+                         KNN = 20, approx = TRUE){
579 580
   ## Input check
580
-  if(!inherits(inSCE, "SingleCellExperiment")){
581
-    stop("\"inSCE\" should be a SingleCellExperiment Object.")
582
-  }
583 581
   if(!reticulate::py_module_available(module = "scanorama")){
584 582
     warning("Cannot find python module 'scanorama', please install Conda and",
585 583
             " run sctkPythonInstallConda() or run ",
... ...
@@ -593,31 +591,27 @@ runSCANORAMA <- function(inSCE, useAssay = 'logcounts', batch = 'batch',
593 591
             " to select the correct Python environment.")
594 592
     return(inSCE)
595 593
   }
596
-  if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) {
597
-    stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found"))
598
-  }
599
-  if(!batch %in% names(SummarizedExperiment::colData(inSCE))){
600
-    stop(paste("\"batch\" name:", batch, "not found"))
601
-  }
594
+  useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay)
595
+  batchVec <- .manageCellVar(inSCE, batch)
602 596
   assayName <- gsub(' ', '_', assayName)
603 597
   adata <- .sce2adata(inSCE, useAssay)
598
+  adata$obs[["batch"]] <- batchVec
604 599
   py <- reticulate::py
605 600
   py$adata <- adata
606
-  py$batch <- batch
607 601
   py$sigma <- SIGMA
608 602
   py$alpha <- ALPHA
609 603
   py$KNN <- as.integer(KNN)
610 604
   reticulate::py_run_string('
611 605
 import numpy as np
612 606
 import scanorama
613
-batches = list(set(adata.obs[batch]))
614
-adatas = [adata[adata.obs[batch] == b,] for b in batches]
615
-datasets_full = [a.X.toarray() for a in adatas]
607
+batches = list(set(adata.obs["batch"]))
608
+adatas = [adata[adata.obs["batch"] == b,] for b in batches]
609
+datasets_full = [a.X for a in adatas]
616 610
 genes_list = [a.var_names.to_list() for a in adatas]
617 611
 corrected, genes = scanorama.correct(datasets_full, genes_list, sigma=sigma,
618 612
                                      alpha=alpha, knn=KNN)
619 613
 corrected = [m.toarray() for m in corrected]
620
-cellOrders = [adata.obs[batch] == b for b in batches]
614
+cellOrders = [adata.obs["batch"] == b for b in batches]
621 615
 integrated = np.zeros(adata.shape)
622 616
 integrated[:,:] = np.NAN
623 617
 for i in range(len(batches)):
... ...
@@ -632,6 +626,9 @@ integrated = integrated[:, orderIdx]
632 626
   rownames(mat) <- rownames(inSCE)
633 627
   colnames(mat) <- colnames(inSCE)
634 628
   expData(inSCE, assayName, tag = "batchCorrected", altExp = FALSE) <- mat
629
+  S4Vectors::metadata(inSCE)$batchCorr[[assayName]] <-
630
+    list(useAssay = useAssay,  origLogged = TRUE, method = "Scanorama",
631
+         matType = "assay", batch = batch)
635 632
   return(inSCE)
636 633
 }
637 634
 
... ...
@@ -640,12 +637,16 @@ integrated = integrated[:, orderIdx]
640 637
 #' The scMerge method leverages factor analysis, stably expressed genes (SEGs)
641 638
 #' and (pseudo-) replicates to remove unwanted variations and merge multiple
642 639
 #' scRNA-Seq data.
643
-#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required.
640
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object
644 641
 #' @param useAssay A single character indicating the name of the assay requiring
645 642
 #' batch correction. Default \code{"logcounts"}.
646 643
 #' @param batch A single character indicating a field in
647 644
 #' \code{\link{colData}} that annotates the batches.
648 645
 #' Default \code{"batch"}.
646
+#' @param assayName A single characeter. The name for the corrected assay. Will
647
+#' be saved to \code{\link{assay}}. Default \code{"scMerge"}.
648
+#' @param hvgExprs A single characeter. The assay that to be used for highly
649
+#' variable genes identification. Default \code{"counts"}.
649 650
 #' @param kmeansK An integer vector. Indicating the kmeans' K-value for each
650 651
 #' batch (i.e. how many subclusters in each batch should exist), in order to
651 652
 #' construct pseudo-replicates. The length of code{kmeansK} needs to be the same
... ...
@@ -656,14 +657,12 @@ integrated = integrated[:, orderIdx]
656 657
 #' \code{'cell_type'}.
657 658
 #' @param seg A vector of gene names or indices that specifies SEG (Stably
658 659
 #' Expressed Genes) set as negative control. Pre-defined dataset with human and
659
-#' mouse SEG lists is available to user by running \code{data('SEG')}. Default
660
+#' mouse SEG lists is available with \code{\link[scMerge]{segList}} or
661
+#' \code{\link[scMerge]{segList_ensemblGeneID}}. Default
660 662
 #' \code{NULL}, and this value will be auto-detected by default with
661 663
 #' \code{\link[scMerge]{scSEGIndex}}.
662
-#' @param nCores An integer. The number of cores of processors to allocate for
663
-#' the task. Default \code{1L}.
664
-#' @param assayName A single characeter. The name for the corrected assay. Will
665
-#' be saved to \code{\link{assay}}. Default
666
-#' \code{"scMerge"}.
664
+#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying whether
665
+#' should be parallelized. Default \code{BiocParallel::SerialParam()}.
667 666
 #' @return The input \linkS4class{SingleCellExperiment} object with
668 667
 #' \code{assay(inSCE, assayName)} updated.
669 668
 #' @export
... ...
@@ -671,53 +670,33 @@ integrated = integrated[:, orderIdx]
671 670
 #' @examples
672 671
 #' data('sceBatches', package = 'singleCellTK')
673 672
 #' \dontrun{
674
-#' logcounts(sceBatches) <- log(counts(sceBatches) + 1)
673
+#' logcounts(sceBatches) <- log1p(counts(sceBatches))
675 674
 #' sceCorr <- runSCMerge(sceBatches)
676 675
 #' }
677 676
 runSCMerge <- function(inSCE, useAssay = "logcounts", batch = 'batch',
678
-                       assayName = "scMerge", seg = NULL, kmeansK = NULL,
679
-                       cellType = 'cell_type',
680
-                       nCores = 1L){
677
+                       assayName = "scMerge", hvgExprs = "counts", seg = NULL,
678
+                       kmeansK = NULL, cellType = NULL,
679
+                       BPPARAM = BiocParallel::SerialParam()){
681 680
   ## Input check
682
-  if(!inherits(inSCE, "SingleCellExperiment")){
683
-    stop("\"inSCE\" should be a SingleCellExperiment Object.")
684
-  }
685
-  if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) {
686
-    stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found."))
687
-  }
688
-  if(!batch %in% names(SummarizedExperiment::colData(inSCE))){
689
-    stop(paste("\"batch\" name:", batch, "not found"))
690
-  }
691
-  if(is.null(cellType) & is.null(kmeansK)){
692
-    stop("\"cellType\" and \"kmeansK\" cannot be NULL at the same time")
693
-  }
694
-  if(!is.null(cellType) &&
695
-     !cellType %in% names(SummarizedExperiment::colData(inSCE))){
696
-    # If NULL, scMerge still works
697
-    stop(paste("\"cellType\" name:", cellType, "not found"))
698
-  }
699
-
700
-  nCores <- min(as.integer(nCores), parallel::detectCores())
681
+  useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay, returnMatrix = TRUE)
682
+  .selectSCEMatrix(inSCE, useAssay = hvgExprs, returnMatrix = FALSE)
683
+  mat <- useMat$mat
684
+  batchVec <- .manageCellVar(inSCE, batch)
685
+  cellTypeCol <- .manageCellVar(inSCE, cellType)
686
+  #if(is.null(cellType) & is.null(kmeansK)){
687
+  #  stop("\"cellType\" and \"kmeansK\" cannot be NULL at the same time")
688
+  #}
701 689
   assayName <- gsub(' ', '_', assayName)
702 690
 
703 691
   ## Run algorithm
692
+  uniqBatch <- unique(batch)
704 693
 
705
-  batchCol <- SummarizedExperiment::colData(inSCE)[[batch]]
706
-  uniqBatch <- unique(batchCol)
707
-
708
-  # Infer parameters
709
-  if(is.null(cellType)){
710
-    cellTypeCol <- NULL
711
-  } else {
712
-    cellTypeCol <- SummarizedExperiment::colData(inSCE)[[cellType]]
713
-  }
714 694
   ## kmeansK
715 695
   if(!is.null(cellType) && is.null(kmeansK)){
716 696
     # If kmeansK not given, detect by cell type.
717
-    cellTypeCol <- SummarizedExperiment::colData(inSCE)[[cellType]]
718 697
     kmeansK <- c()
719 698
     for (i in seq_along(uniqBatch)){
720
-      cellTypePerBatch <- cellTypeCol[batchCol == uniqBatch[i]]
699
+      cellTypePerBatch <- cellType[batchVec == uniqBatch[i]]
721 700
       kmeansK <- c(kmeansK, length(unique(cellTypePerBatch)))
722 701
     }
723 702
     cat("Detected kmeansK:\n")
... ...
@@ -725,37 +704,25 @@ runSCMerge <- function(inSCE, useAssay = "logcounts", batch = 'batch',
725 704
   }
726 705
   ## SEG
727 706
   if(is.null(seg)){
728
-    bpParam <- BiocParallel::MulticoreParam(workers = nCores)
729
-    seg <- scMerge::scSEGIndex(SummarizedExperiment::assay(inSCE, useAssay),
730
-                               cell_type = cellTypeCol,
731
-                               BPPARAM = bpParam)
732
-    ctl <- rownames(seg[order(seg$segIdx, decreasing = TRUE)[seq_len(1000)],])
707
+    seg <- scMerge::scSEGIndex(mat, cell_type = cellTypeCol, BPPARAM = BPPARAM)
708
+    ctl <- rownames(seg[order(seg$segIdx, decreasing = TRUE)[seq(1000)],])
733 709
   } else {
734 710
     ctl <- seg
735 711
   }
736 712
 
737
-  # scMerge automatically search for the column called "batch"...
738
-  colDataNames <- names(SummarizedExperiment::colData(inSCE))
739
-  names(SummarizedExperiment::colData(inSCE))[colDataNames == batch] <- 'batch'
740
-  bpParam <- BiocParallel::MulticoreParam(workers = nCores)
741 713
   inSCE <- scMerge::scMerge(sce_combine = inSCE, exprs = useAssay,
742
-                            hvg_exprs = useAssay,
714
+                            hvg_exprs = useAssay, batch_name = batch,
743 715
                             assay_name = assayName,
744 716
                             ctl = ctl, kmeansK = kmeansK,
745 717
                             #marker_list = topVarGenesPerBatch,
746 718
                             cell_type = cellTypeCol,
747
-                            BPPARAM = bpParam)
748
-  colDataNames <- names(SummarizedExperiment::colData(inSCE))
749
-  names(SummarizedExperiment::colData(inSCE))[colDataNames == 'batch'] <- batch
719
+                            BPPARAM = BPPARAM)
750 720
   # scMerge's function automatically returns the SCE object with information
751 721
   # completed, thus using this helper function to simply add the tag.
752 722
   inSCE <- expSetDataTag(inSCE, "batchCorrected", assayName)
753
-  S4Vectors::metadata(inSCE)$batchCorr[[assayName]] <- list(useAssay = useAssay,
754
-                                                            origLogged = TRUE,
755
-                                                            method = "scMerge",
756
-                                                            matType = "assay",
757
-                                                            batch = batch,
758
-                                                            condition = cellType)
723
+  S4Vectors::metadata(inSCE)$batchCorr[[assayName]] <-
724
+    list(useAssay = useAssay, origLogged = TRUE, method = "scMerge",
725
+         matType = "assay", batch = batch, condition = cellType)
759 726
   return(inSCE)
760 727
 }
761 728
 
... ...
@@ -766,7 +733,7 @@ runSCMerge <- function(inSCE, useAssay = "logcounts", batch = 'batch',
766 733
 #' model accounts for zero inflation (dropouts), over-dispersion, and the count
767 734
 #' nature of the data. The model also accounts for the difference in library
768 735
 #' sizes and optionally for batch effects and/or other covariates.
769
-#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required.
736
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object
770 737
 #' @param useAssay A single character indicating the name of the assay requiring
771 738
 #' batch correction. Note that ZINBWaVE works for counts (integer) input rather
772 739
 #' than logcounts that other methods prefer. Default \code{"counts"}.
... ...
@@ -785,6 +752,8 @@ runSCMerge <- function(inSCE, useAssay = "logcounts", batch = 'batch',
785 752
 #' @param reducedDimName A single character. The name for the corrected
786 753
 #' low-dimensional representation. Will be saved to \code{reducedDim(inSCE)}.
787 754
 #' Default \code{"zinbwave"}.
755
+#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying whether
756
+#' should be parallelized. Default \code{BiocParallel::SerialParam()}.
788 757
 #' @return The input \linkS4class{SingleCellExperiment} object with
789 758
 #' \code{reducedDim(inSCE, reducedDimName)} updated.
790 759
 #' @export
... ...
@@ -796,7 +765,8 @@ runSCMerge <- function(inSCE, useAssay = "logcounts", batch = 'batch',
796 765
 #' }
797 766
 runZINBWaVE <- function(inSCE, useAssay = 'counts', batch = 'batch',
798 767
                         nHVG = 1000L, nComponents = 50L, epsilon = 1000,
799
-                        nIter = 10L, reducedDimName = 'zinbwave'){
768
+                        nIter = 10L, reducedDimName = 'zinbwave',
769
+                        BPPARAM = BiocParallel::SerialParam()){
800 770
   ## Input check
801 771
   if(!inherits(inSCE, "SingleCellExperiment")){
802 772
     stop("\"inSCE\" should be a SingleCellExperiment Object.")
... ...
@@ -819,16 +789,24 @@ runZINBWaVE <- function(inSCE, useAssay = 'counts', batch = 'batch',
819 789
     vars <- matrixStats::rowVars(logAssay)
820 790
     names(vars) <- rownames(inSCE)
821 791
     vars <- sort(vars, decreasing = TRUE)
822
-    tmpSCE <- inSCE[names(vars)[seq_len(nHVG)],]
792
+    hvgs <- names(vars)[seq_len(nHVG)]
793
+    #tmpSCE <- inSCE[names(vars)[seq_len(nHVG)],]
823 794
   } else {
824
-    tmpSCE <- inSCE
795
+    #tmpSCE <- inSCE
796
+    hvgs <- rownames(inSCE)
825 797
   }
826
-  epsilon <- min(nrow(tmpSCE), epsilon)
827
-  tmpSCE <- zinbwave::zinbwave(tmpSCE, K = nComponents, epsilon = epsilon,
828
-                               which_assay = useAssay,
798
+  epsilon <- min(nrow(inSCE), epsilon)
799
+  inSCE <- zinbwave::zinbwave(inSCE, K = nComponents, epsilon = epsilon,
800
+                               which_assay = useAssay, which_genes = hvgs,
829 801
                                X = paste('~', batch, sep = ''),
830
-                               maxiter.optimize = nIter, verbose = TRUE)
831
-  SingleCellExperiment::reducedDim(inSCE, reducedDimName) <-
832
-    SingleCellExperiment::reducedDim(tmpSCE, 'zinbwave')
802
+                               maxiter.optimize = nIter, verbose = TRUE,
803
+                              BPPARAM = BPPARAM)
804
+  reddimName <- reducedDimNames(inSCE)
805
+  reddimName[reddimName == "zinbwave"] <- reducedDimName
806
+  #SingleCellExperiment::reducedDim(inSCE, reducedDimName) <-
807
+  #  SingleCellExperiment::reducedDim(inSCE, 'zinbwave')
808
+  S4Vectors::metadata(inSCE)$batchCorr[[reducedDimName]] <-
809
+    list(useAssay = useAssay, origLogged = FALSE, method = "ZINBWaVE",
810
+         matType = "reducedDim", batch = batch)
833 811
   return(inSCE)
834 812
 }
... ...
@@ -38,24 +38,17 @@
38 38
                            classGroup2 = NULL, groupName1, groupName2,
39 39
                            analysisName, covariates = NULL, overwrite = FALSE){
40 40
     # Input checks
41
-    if(!inherits(inSCE, "SingleCellExperiment")){
42
-        stop('"inSCE" should be a SingleCellExperiment inherited Object.')
43
-    }
44
-    if (is.null(useAssay) & is.null(useReducedDim)) {
45
-      stop("`useAssay` or `useReducedDim` must be specified.")
46
-    } else {
47
-      if (!is.null(useReducedDim)) {
48
-        if(!useReducedDim %in% SingleCellExperiment::reducedDimNames(inSCE)){
49
-          stop(paste('"useReducedDim" name: ', useReducedDim, ' not found.'))
50
-        }
51
-        useAssay <- NULL
52
-      } else {
53
-        if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)){
54
-          stop(paste('"useAssay" name: ', useAssay, ' not found.'))
41
+    useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay,
42
+                               useReducedDim = useReducedDim,
43
+                               returnMatrix = FALSE)
44
+    if ("diffExp" %in% names(S4Vectors::metadata(inSCE))){
45
+      if(analysisName %in% names(S4Vectors::metadata(inSCE)$diffExp)){
46
+        if(!isTRUE(overwrite)){
47
+          stop("analysisName '", analysisName, "' already exists. ",
48
+               "Set `overwrite` to `TRUE` to overwrite.")
55 49
         }
56 50
       }
57 51
     }
58
-
59 52
     if(is.null(index1) && (is.null(classGroup1) || is.null(class))){
60 53
         stop('At least "index1" or "classGroup1" should be specified.')
61 54
     } else if(!is.null(index1) && (!is.null(classGroup1) || !is.null(class))){
... ...
@@ -66,14 +59,7 @@
66 59
        !all(covariates %in% names(SummarizedExperiment::colData(inSCE)))){
67 60
         stop("Not all specified covariates exist.")
68 61
     }
69
-    if ("diffExp" %in% names(S4Vectors::metadata(inSCE))){
70
-        if(analysisName %in% names(S4Vectors::metadata(inSCE)$diffExp)){
71
-            if(!isTRUE(overwrite)){
72
-                stop("analysisName '", analysisName, "' already exists. ",
73
-                     "Set `overwrite` to `TRUE` to overwrite.")
74
-            }
75
-        }
76
-    }
62
+
77 63
     groupNames <- c(groupName1, groupName2)
78 64
     annotation <- c(class, covariates)
79 65
     if(!is.null(index1)){
... ...
@@ -84,18 +70,7 @@
84 70
             cells2 <- sort(setdiff(colnames(inSCE), cells1))
85 71
         }
86 72
     } else {
87
-        if(length(class) == 1 && inherits(class, "character")){
88
-            if(!class %in% names(SummarizedExperiment::colData(inSCE))){
89
-                stop("class: '", class, "' not found.")
90
-            }
91
-            class <- SummarizedExperiment::colData(inSCE)[[class]]
92
-        } else {
93
-            if(!length(class) == ncol(inSCE)){
94
-                stop("Length of given `class` vector should equal to ",
95
-                     "`ncol(inSCE)`; Or specify a column name of ",
96
-                     "`colData(inSCE)`")
97
-            }
98
-        }
73
+        class <- .manageCellVar(inSCE, var = class)
99 74
         uniqCats <- unique(as.vector(class))
100 75
         index1 <- class %in% classGroup1
101 76
         if(is.null(classGroup2)){
... ...
@@ -129,8 +104,8 @@
129 104
         }
130 105
     }
131 106
     return(
132
-        list(useAssay = useAssay,
133
-             useReducedDim = useReducedDim,
107
+        list(useAssay = useMat$names$useAssay,
108
+             useReducedDim = useMat$names$useReducedDim,
134 109
              groupNames = groupNames,
135 110
              select = select,
136 111
              annotation = annotation)
... ...
@@ -702,7 +677,7 @@ runWilcox <- function(inSCE, useAssay = 'logcounts', useReducedDim = NULL,
702 677
   if (!is.null(useAssay)) {
703 678
     mat <- expData(inSCE, useAssay)
704 679
   } else {
705
-    mat <- t(expData(inSCE, useReducedDim))
680
+    mat <- t(SingleCellExperiment::reducedDim(inSCE, useReducedDim))
706 681
   }
707 682
   if (isTRUE(verbose)) {
708 683
     message(date(), " ... Running DE with wilcox, Analysis name: ",
... ...
@@ -1,65 +1,3 @@
1
-.matrixTypeCheck <- function(inSCE, redDimType = NULL,
2
-                             useAssay = NULL, useReducedDim = NULL,
3
-                             useAltExp = NULL) {
4
-  # Helper function for checking if the specified matrix type is valid
5
-  if (!inherits(inSCE, "SingleCellExperiment")){
6
-    stop("Please use a SingleCellExperiment object")
7
-  }
8
-  if (redDimType == "embedding") {
9
-    if (is.null(useAssay) && is.null(useReducedDim)) {
10
-      stop("`useAssay` and `useReducedDim` cannot be NULL at the same time.")
11
-    } else if (!is.null(useAssay) && !is.null(useReducedDim)) {
12
-      stop("`useAssay` and `useReducedDim` cannot be specified at the same time.")
13
-    } else {
14
-      if (!is.null(useReducedDim)) {
15
-        if (!useReducedDim %in% SingleCellExperiment::reducedDimNames(inSCE)) {
16
-          stop("Specified `useReducedDim` not found.")
17
-        }
18
-        if (!is.null(useAltExp)) {
19
-          warning("`useAltExp` will be ignored when using `useReducedDim`.")
20
-        }
21
-        sce <- inSCE
22
-      } else {
23
-        if (!is.null(useAltExp)) {
24
-          if (!useAltExp %in% SingleCellExperiment::altExpNames(inSCE)) {
25
-            stop("Specified `useAltExp` not found.")
26
-          }
27
-          sce <- SingleCellExperiment::altExp(inSCE, useAltExp)
28
-          if (!useAssay %in% SummarizedExperiment::assayNames(sce)) {
29
-            stop("Specified `useAssay` not found in `useAltExp`.")
30
-          }
31
-        } else {
32
-          if (!useAssay %in% SummarizedExperiment::assayNames(inSCE)) {
33
-            stop("Specified `useAssay` not found.")
34
-          }
35
-          sce <- inSCE
36
-        }
37
-      }
38
-    }
39
-  } else if (redDimType == "linear") {
40
-    if (!is.null(useReducedDim)) {
41
-      stop("Currently `useReducedDim` is not allowed for linear dimension reduction.")
42
-    }
43
-    if (is.null(useAssay)) {
44
-      stop("`useAssay` cannot be NULL for linear dimension reduction")
45
-    } else {
46
-      if (is.null(useAltExp)) {
47
-        if (!useAssay %in% SummarizedExperiment::assayNames(inSCE)) {
48
-          stop("Specified `useAssay` not found.")
49
-        }
50
-      } else {
51
-        if (!useAltExp %in% SingleCellExperiment::altExpNames(inSCE)) {
52
-          stop("Specified `useAltExp` not found.")
53
-        }
54
-        sce <- SingleCellExperiment::altExp(inSCE, useAltExp)
55
-        if (!useAssay %in% SummarizedExperiment::assayNames(sce)) {
56
-          stop("Specified `useAssay` not found in `useAltExp`.")
57
-        }
58
-      }
59
-    }
60
-  }
61
-}
62
-
63 1
 #' Generic Wrapper function for running dimensionality reduction
64 2
 #' @details Wrapper function to run one of the available dimensionality
65 3
 #' reduction algorithms integrated within SCTK from \code{\link{scaterPCA}},
... ...
@@ -81,9 +19,9 @@
81 19
 #' @param useReducedDim The low dimension representation to use for embedding
82 20
 #' computation. Default \code{NULL}.
83 21
 #' @param reducedDimName The name of the result matrix. Required.
84
-#' @param useFeatureSubset Subset of feature to use for dimension reduction. A 
22
+#' @param useFeatureSubset Subset of feature to use for dimension reduction. A
85 23
 #' character string indicating a \code{rowData} variable that stores the logical
86
-#' vector of HVG selection, or a vector that can subset the rows of 
24
+#' vector of HVG selection, or a vector that can subset the rows of
87 25
 #' \code{inSCE}. Default \code{NULL}.
88 26
 #' @param scale Logical scalar, whether to standardize the expression values.
89 27
 #' Default \code{TRUE}.
... ...
@@ -115,39 +53,43 @@ runDimReduce <- function(inSCE,
115 53
                                     "scaterUMAP",
116 54
                                     "seuratUMAP"),
117 55
                          useAssay = NULL, useReducedDim = NULL,
118
-                         useAltExp = NULL, reducedDimName = method, 
119
-                         nComponents = 20, useFeatureSubset = NULL, 
120
-                         scale = FALSE, seed = NULL, ...) 
56
+                         useAltExp = NULL, reducedDimName = method,
57
+                         nComponents = 20, useFeatureSubset = NULL,
58
+                         scale = FALSE, seed = NULL, ...)
121 59
 {
122 60
 
123 61
   method <- match.arg(method)
124 62
   args <- list(...)
125
-  
126
-  if (method %in% c("scaterPCA", "seuratPCA", "seuratICA")) {
127
-    .matrixTypeCheck(inSCE, "linear", useAssay, useReducedDim, useAltExp)
128
-  } else {
129
-    .matrixTypeCheck(inSCE, "embedding", useAssay, useReducedDim, useAltExp)
63
+  if (method %in% c("scaterPCA", "seuratPCA", "seuratICA") &
64
+      !is.null(useReducedDim)) {
65
+    stop("`useReducedDim` is not allowed for linear dimension reduction.")
130 66
   }
131 67
 
132 68
   if (method == "scaterPCA") {
133
-    inSCE <- scaterPCA(inSCE = inSCE, useAssay = useAssay, 
134
-                       useAltExp = useAltExp, reducedDimName = reducedDimName, 
135
-                       nComponents = nComponents, 
136
-                       useFeatureSubset = useFeatureSubset, scale = scale, 
69
+    inSCE <- scaterPCA(inSCE = inSCE, useAssay = useAssay,
70
+                       useAltExp = useAltExp, reducedDimName = reducedDimName,
71
+                       nComponents = nComponents,
72
+                       useFeatureSubset = useFeatureSubset, scale = scale,
137 73
                        seed = seed, ...)
138 74
   } else if (method == "scaterUMAP") {
139 75
     inSCE <- getUMAP(inSCE = inSCE, useAssay = useAssay, useAltExp = useAltExp,
140
-                     useReducedDim = useReducedDim, 
141
-                     useFeatureSubset = useFeatureSubset, scale = scale, 
76
+                     useReducedDim = useReducedDim,
77
+                     useFeatureSubset = useFeatureSubset, scale = scale,
142 78
                      reducedDimName = reducedDimName, seed = seed, ...)
143 79
   } else if (method == "rTSNE") {
144 80
     inSCE <- getTSNE(inSCE = inSCE, useAssay = useAssay, useAltExp = useAltExp,
145
-                     useReducedDim = useReducedDim, 
146
-                     useFeatureSubset = useFeatureSubset, scale = scale, 
81
+                     useReducedDim = useReducedDim,
82
+                     useFeatureSubset = useFeatureSubset, scale = scale,
147 83
                      reducedDimName = reducedDimName, seed = seed, ...)
148 84
   } else {
149 85
     # Seurat part
150
-    
86
+    # TODO: Honestly, the input checks should have been implemented for
87
+    # functions being wrapped because they are being exposed to users as well.
88
+    # We should not being performing redundant checks when wrapping them again.
89
+    useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay,
90
+                               useReducedDim = useReducedDim,
91
+                               useAltExp = useAltExp, returnMatrix = FALSE)
92
+    useAssay <- useMat$names$useAssay
151 93
     if (!is.null(useAltExp)) {
152 94
       tempSCE <- SingleCellExperiment::altExp(inSCE, useAltExp)
153 95
     } else if (!is.null(useAssay)) {
... ...
@@ -157,17 +99,17 @@ runDimReduce <- function(inSCE,
157 99
       ## SeuratPCA/ICA
158 100
       if (method == "seuratPCA") {
159 101
         message(paste0(date(), " ... Computing Seurat PCA."))
160
-        tempSCE <- runSeuratPCA(tempSCE, useAssay = useAssay, 
102
+        tempSCE <- runSeuratPCA(tempSCE, useAssay = useAssay,
161 103
                                 reducedDimName = reducedDimName,
162
-                                nPCs = nComponents, 
104
+                                nPCs = nComponents,
163 105
                                 useFeatureSubset = useFeatureSubset,
164 106
                                 scale = scale, seed = seed, ...)
165 107
       } else if (method == "seuratICA") {
166 108
         message(paste0(date(), " ... Computing Seurat ICA."))
167 109
         tempSCE <- runSeuratICA(tempSCE, useAssay = useAssay,
168 110
                                 reducedDimName = reducedDimName,
169
-                                nics = nComponents, 
170
-                                useFeatureSubset = useFeatureSubset, 
111
+                                nics = nComponents,
112
+                                useFeatureSubset = useFeatureSubset,
171 113
                                 scale = scale, seed = seed, ...)
172 114
       }
173 115
       seuratObj <- tempSCE@metadata$seurat
... ...
@@ -187,24 +129,24 @@ runDimReduce <- function(inSCE,
187 129
           message(paste0(date(), " ... Computing Seurat PCA."))
188 130
           tempSCE <- runSeuratPCA(inSCE = tempSCE,
189 131
                                useAssay = useAssay,
190
-                               reducedDimName = paste0(useAssay, "_seuratPCA"), 
132
+                               reducedDimName = paste0(useAssay, "_seuratPCA"),
191 133
                                useFeatureSubset = useFeatureSubset, seed = seed)
192 134
         } else if (args$useReduction == "ica") {
193 135
           message(paste0(date(), " ... Computing Seurat ICA."))
194 136
           tempSCE <- runSeuratICA(inSCE = tempSCE,
195 137
                                useAssay = useAssay,
196
-                               reducedDimName = paste0(useAssay, "_seuratICA"), 
138
+                               reducedDimName = paste0(useAssay, "_seuratICA"),
197 139
                                useFeatureSubset = useFeatureSubset, seed = seed)
198 140
         }
199 141
         if (method == "seuratUMAP") {
200 142
           message(paste0(date(), " ... Computing Seurat UMAP."))
201
-          tempSCE <- runSeuratUMAP(inSCE = tempSCE, 
202
-                                   reducedDimName = reducedDimName, 
143
+          tempSCE <- runSeuratUMAP(inSCE = tempSCE,
144
+                                   reducedDimName = reducedDimName,
203 145
                                    seed = seed, ...)
204 146
         } else {
205 147
           message(paste0(date(), " ... Computing Seurat tSNE."))
206 148
           tempSCE <- runSeuratTSNE(inSCE = tempSCE,
207
-                                   reducedDimName = reducedDimName, 
149
+                                   reducedDimName = reducedDimName,
208 150
                                    seed = seed, ...)
209 151
         }
210 152
       } else {
... ...
@@ -227,13 +169,13 @@ runDimReduce <- function(inSCE,
227 169
           # hard-code useReduction="pca"
228 170
           message(paste0(date(), " ... Computing Seurat UMAP."))
229 171
           tempSCE <- runSeuratUMAP(inSCE = tempSCE, useReduction = "pca",
230
-                                   reducedDimName = reducedDimName, 
172
+                                   reducedDimName = reducedDimName,
231 173
                                    seed = seed, ...)
232 174
         } else {
233 175
           # hard-code useReduction="pca"
234 176
           message(paste0(date(), " ... Computing Seurat tSNE."))
235 177
           tempSCE <- runSeuratTSNE(inSCE = tempSCE, useReduction = "pca",
236
-                                   reducedDimName = reducedDimName, 
178
+                                   reducedDimName = reducedDimName,
237 179
                                    seed = seed, ...)
238 180
         }
239 181
       }
... ...
@@ -118,11 +118,7 @@ runSingleR <- function(inSCE,
118 118
                     "Using its default labeling.")
119 119
         }
120 120
     }
121
-    if (is.null(labelByCluster)) {
122
-        clusters <- NULL
123
-    } else {
124
-        clusters <- inSCE[[labelByCluster]]
125
-    }
121
+    clusters <- .manageCellVar(inSCE, var = labelByCluster)
126 122
     # predictions <- SingleR::SingleR(test = inSCE, assay.type.test = useAssay,
127 123
     #                                 ref = ref, clusters = clusters,
128 124
     #