... | ... |
@@ -75,35 +75,21 @@ getTSNE <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL, |
75 | 75 |
nIterations = 1000, numThreads = 1, seed = NULL){ |
76 | 76 |
params <- as.list(environment()) |
77 | 77 |
params$inSCE <- NULL |
78 |
- # input class check |
|
79 |
- if (!inherits(inSCE, "SingleCellExperiment")){ |
|
80 |
- stop("Please use a SingleCellExperiment object") |
|
81 |
- } |
|
82 |
- # matrix specification check |
|
83 |
- # When useReducedDim, never useAssay; |
|
84 |
- # when useAltExp, useAssay/reducedDim from there |
|
85 |
- if (is.null(useAssay) && is.null(useReducedDim)) { |
|
86 |
- stop("`useAssay` and `useReducedDim` cannot be NULL at the same time.") |
|
87 |
- } |
|
78 |
+ # Note: useMat = list(useAssay = useAssay, ...) |
|
79 |
+ # `.selectSCEMatrix()` does the check and corrects the conditions |
|
80 |
+ # When using useReducedDim, useAssay will be ignored |
|
81 |
+ useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay, |
|
82 |
+ useReducedDim = useReducedDim, |
|
83 |
+ useAltExp = useAltExp, |
|
84 |
+ returnMatrix = FALSE)$names |
|
85 |
+ params$useAssay <- useMat$useAssay |
|
86 |
+ useAssay <- useMat$useAssay |
|
87 |
+ |
|
88 | 88 |
if (!is.null(useAltExp)) { |
89 |
- if (!useAltExp %in% SingleCellExperiment::altExpNames(inSCE)) { |
|
90 |
- stop("Specified `useAltExp` not found.") |
|
91 |
- } |
|
92 | 89 |
sce <- SingleCellExperiment::altExp(inSCE, useAltExp) |
93 | 90 |
} else { |
94 | 91 |
sce <- inSCE |
95 | 92 |
} |
96 |
- if (!is.null(useReducedDim)) { |
|
97 |
- useAssay <- NULL |
|
98 |
- if (!useReducedDim %in% SingleCellExperiment::reducedDimNames(sce)) { |
|
99 |
- stop("Specified `useReducedDim` not found.") |
|
100 |
- } |
|
101 |
- } else { |
|
102 |
- # In this case, there is definitely `useAssay` specified |
|
103 |
- if (!useAssay %in% SummarizedExperiment::assayNames(sce)) { |
|
104 |
- stop("Specified `useAssay` not found.") |
|
105 |
- } |
|
106 |
- } |
|
107 | 93 |
|
108 | 94 |
if (!is.null(useAssay)) { |
109 | 95 |
if (isTRUE(logNorm)) { |
... | ... |
@@ -119,9 +105,9 @@ getTSNE <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL, |
119 | 105 |
if (!is.null(nTop) && nTop < nrow(sce)) { |
120 | 106 |
suppressMessages({ |
121 | 107 |
sce <- runFeatureSelection(sce, useAssay, method = "modelGeneVar") |
108 |
+ sce <- setTopHVG(sce, method = "modelGeneVar", hvgNumber = nTop, |
|
109 |
+ featureSubsetName = "tsneHVG") |
|
122 | 110 |
}) |
123 |
- sce <- setTopHVG(sce, method = "modelGeneVar", hvgNumber = nTop, |
|
124 |
- featureSubsetName = "tsneHVG") |
|
125 | 111 |
sce <- subsetSCERows(sce, rowData = "tsneHVG", returnAsAltExp = FALSE) |
126 | 112 |
} |
127 | 113 |
} |
... | ... |
@@ -1,39 +1,39 @@ |
1 | 1 |
#' Run UMAP embedding with scater method |
2 |
-#' @description Uniform Manifold Approximation and Projection (UMAP) algorithm |
|
3 |
-#' is commonly for 2D visualization of single-cell data. This function wraps the |
|
4 |
-#' scater \code{\link[scater]{calculateUMAP}} function. |
|
5 |
-#' |
|
2 |
+#' @description Uniform Manifold Approximation and Projection (UMAP) algorithm |
|
3 |
+#' is commonly for 2D visualization of single-cell data. This function wraps the |
|
4 |
+#' scater \code{\link[scater]{calculateUMAP}} function. |
|
5 |
+#' |
|
6 | 6 |
#' With this funciton, users can create UMAP embedding directly from raw count |
7 |
-#' matrix, with necessary preprocessing including normalization, scaling, |
|
8 |
-#' dimension reduction all automated. Yet we still recommend having the PCA as |
|
9 |
-#' input, so that the result can match with the clustering based on the same |
|
7 |
+#' matrix, with necessary preprocessing including normalization, scaling, |
|
8 |
+#' dimension reduction all automated. Yet we still recommend having the PCA as |
|
9 |
+#' input, so that the result can match with the clustering based on the same |
|
10 | 10 |
#' input PCA. |
11 | 11 |
#' @param inSCE Input \linkS4class{SingleCellExperiment} object. |
12 | 12 |
#' @param useAssay Assay to use for UMAP computation. If \code{useAltExp} is |
13 | 13 |
#' specified, \code{useAssay} has to exist in |
14 |
-#' \code{assays(altExp(inSCE, useAltExp))}. Ignored when using |
|
14 |
+#' \code{assays(altExp(inSCE, useAltExp))}. Ignored when using |
|
15 | 15 |
#' \code{useReducedDim}. Default \code{"logcounts"}. |
16 | 16 |
#' @param useReducedDim The low dimension representation to use for UMAP |
17 |
-#' computation. If \code{useAltExp} is specified, \code{useReducedDim} has to |
|
17 |
+#' computation. If \code{useAltExp} is specified, \code{useReducedDim} has to |
|
18 | 18 |
#' exist in \code{reducedDims(altExp(inSCE, useAltExp))}. Default \code{NULL}. |
19 | 19 |
#' @param useAltExp The subset to use for UMAP computation, usually for the |
20 | 20 |
#' selected variable features. Default \code{NULL}. |
21 | 21 |
#' @param sample Character vector. Indicates which sample each cell belongs to. |
22 |
-#' If given a single character, will take the annotation from \code{colData}. |
|
22 |
+#' If given a single character, will take the annotation from \code{colData}. |
|
23 | 23 |
#' Default \code{NULL}. |
24 | 24 |
#' @param reducedDimName A name to store the results of the UMAP embedding |
25 | 25 |
#' coordinates obtained from this method. Default \code{"UMAP"}. |
26 | 26 |
#' @param logNorm Whether the counts will need to be log-normalized prior to |
27 | 27 |
#' generating the UMAP via \code{\link{scaterlogNormCounts}}. Ignored when using |
28 | 28 |
#' \code{useReducedDim}. Default \code{FALSE}. |
29 |
-#' @param useFeatureSubset Subset of feature to use for dimension reduction. A |
|
29 |
+#' @param useFeatureSubset Subset of feature to use for dimension reduction. A |
|
30 | 30 |
#' character string indicating a \code{rowData} variable that stores the logical |
31 |
-#' vector of HVG selection, or a vector that can subset the rows of |
|
31 |
+#' vector of HVG selection, or a vector that can subset the rows of |
|
32 | 32 |
#' \code{inSCE}. Default \code{NULL}. |
33 | 33 |
#' @param nTop Automatically detect this number of variable features to use for |
34 |
-#' dimension reduction. Ignored when using \code{useReducedDim} or using |
|
34 |
+#' dimension reduction. Ignored when using \code{useReducedDim} or using |
|
35 | 35 |
#' \code{useFeatureSubset}. Default \code{2000}. |
36 |
-#' @param scale Whether \code{useAssay} matrix will need to be standardized. |
|
36 |
+#' @param scale Whether \code{useAssay} matrix will need to be standardized. |
|
37 | 37 |
#' Default \code{TRUE}. |
38 | 38 |
#' @param pca Logical. Whether to perform dimension reduction with PCA before |
39 | 39 |
#' UMAP. Ignored when using \code{useReducedDim}. Default \code{TRUE}. |
... | ... |
@@ -53,12 +53,12 @@ |
53 | 53 |
#' result on a more even dispersal of points. Default \code{0.01}. See |
54 | 54 |
#' \code{\link[scater]{calculateUMAP}} for more information. |
55 | 55 |
#' @param spread The effective scale of embedded points. In combination with |
56 |
-#' \code{minDist}, this determines how clustered/clumped the embedded points |
|
57 |
-#' are. Default \code{1}. See \code{\link[scater]{calculateUMAP}} for more |
|
56 |
+#' \code{minDist}, this determines how clustered/clumped the embedded points |
|
57 |
+#' are. Default \code{1}. See \code{\link[scater]{calculateUMAP}} for more |
|
58 | 58 |
#' information. |
59 |
-#' @param seed Random seed for reproducibility of UMAP results. |
|
59 |
+#' @param seed Random seed for reproducibility of UMAP results. |
|
60 | 60 |
#' Default \code{NULL} will use global seed in use by the R environment. |
61 |
-#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying whether |
|
61 |
+#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying whether |
|
62 | 62 |
#' the PCA should be parallelized. |
63 | 63 |
#' @return A \linkS4class{SingleCellExperiment} object with UMAP computation |
64 | 64 |
#' updated in \code{reducedDim(inSCE, reducedDimName)}. |
... | ... |
@@ -73,67 +73,35 @@ |
73 | 73 |
#' # Run from PCA |
74 | 74 |
#' sce <- scaterlogNormCounts(sce, "logcounts") |
75 | 75 |
#' sce <- runModelGeneVar(sce) |
76 |
-#' sce <- scaterPCA(sce, useAssay = "logcounts", |
|
76 |
+#' sce <- scaterPCA(sce, useAssay = "logcounts", |
|
77 | 77 |
#' useFeatureSubset = "HVG_modelGeneVar2000", scale = TRUE) |
78 | 78 |
#' sce <- getUMAP(sce, useReducedDim = "PCA") |
79 | 79 |
#' } |
80 | 80 |
#' @importFrom S4Vectors metadata<- |
81 |
-getUMAP <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL, |
|
82 |
- useAltExp = NULL, sample = NULL, reducedDimName = "UMAP", |
|
83 |
- logNorm = FALSE, useFeatureSubset = NULL, nTop = 2000, |
|
84 |
- scale = TRUE, pca = TRUE, initialDims = 25, nNeighbors = 30, |
|
85 |
- nIterations = 200, alpha = 1, minDist = 0.01, spread = 1, |
|
81 |
+getUMAP <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL, |
|
82 |
+ useAltExp = NULL, sample = NULL, reducedDimName = "UMAP", |
|
83 |
+ logNorm = FALSE, useFeatureSubset = NULL, nTop = 2000, |
|
84 |
+ scale = TRUE, pca = TRUE, initialDims = 25, nNeighbors = 30, |
|
85 |
+ nIterations = 200, alpha = 1, minDist = 0.01, spread = 1, |
|
86 | 86 |
seed = NULL, BPPARAM = BiocParallel::SerialParam()) { |
87 | 87 |
params <- as.list(environment()) |
88 | 88 |
params$inSCE <- NULL |
89 | 89 |
params$BPPARAM <- NULL |
90 |
- # input class check |
|
91 |
- if (!inherits(inSCE, "SingleCellExperiment")){ |
|
92 |
- stop("Please use a SingleCellExperiment object") |
|
93 |
- } |
|
94 |
- # matrix specification check |
|
95 |
- # When useReducedDim, never useAssay; |
|
96 |
- # when useAltExp, useAssay/reducedDim from there |
|
97 |
- if (is.null(useAssay) && is.null(useReducedDim)) { |
|
98 |
- stop("`useAssay` and `useReducedDim` cannot be NULL at the same time.") |
|
99 |
- } |
|
100 |
- if (!is.null(useAltExp)) { |
|
101 |
- if (!useAltExp %in% SingleCellExperiment::altExpNames(inSCE)) { |
|
102 |
- stop("Specified `useAltExp` not found.") |
|
103 |
- } |
|
104 |
- sce <- SingleCellExperiment::altExp(inSCE, useAltExp) |
|
105 |
- } else { |
|
106 |
- sce <- inSCE |
|
107 |
- } |
|
108 |
- if (!is.null(useReducedDim)) { |
|
109 |
- useAssay <- NULL |
|
110 |
- if (!useReducedDim %in% SingleCellExperiment::reducedDimNames(sce)) { |
|
111 |
- stop("Specified `useReducedDim` not found.") |
|
112 |
- } |
|
113 |
- } else { |
|
114 |
- # In this case, there is definitely `useAssay` specified |
|
115 |
- if (!useAssay %in% SummarizedExperiment::assayNames(sce)) { |
|
116 |
- stop("Specified `useAssay` not found.") |
|
117 |
- } |
|
118 |
- } |
|
119 |
- # Check sample variable |
|
120 |
- if(!is.null(sample)) { |
|
121 |
- if (is.character(sample) && length(sample) == 1) { |
|
122 |
- if (!sample %in% names(SummarizedExperiment::colData(inSCE))) { |
|
123 |
- stop("Given sample annotation '", sample, "' not found.") |
|
124 |
- } |
|
125 |
- sample <- SummarizedExperiment::colData(inSCE)[[sample]] |
|
126 |
- } else if (length(sample) > 1) { |
|
127 |
- if(length(sample) != ncol(inSCE)) { |
|
128 |
- stop("'sample' must be the same length as the number of columns in ", |
|
129 |
- "'inSCE'") |
|
130 |
- } |
|
131 |
- } |
|
132 |
- } else { |
|
133 |
- sample = rep(1, ncol(inSCE)) |
|
134 |
- } |
|
90 |
+ # Note: useMat = list(useAssay = useAssay, ...) |
|
91 |
+ # `.selectSCEMatrix()` does the check and corrects the conditions |
|
92 |
+ # When using useReducedDim, useAssay will be ignored |
|
93 |
+ useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay, |
|
94 |
+ useReducedDim = useReducedDim, |
|
95 |
+ useAltExp = useAltExp, |
|
96 |
+ returnMatrix = FALSE)$names |
|
97 |
+ params$useAssay <- useMat$useAssay |
|
98 |
+ useAssay <- useMat$useAssay |
|
99 |
+ if (!is.null(useAltExp)) sce <- SingleCellExperiment::altExp(inSCE, useAltExp) |
|
100 |
+ else sce <- inSCE |
|
101 |
+ sample <- .manageCellVar(inSCE, sample) |
|
102 |
+ if (is.null(sample)) sample <- rep(1, ncol(inSCE)) |
|
135 | 103 |
samples <- unique(sample) |
136 |
- umapDims = matrix(nrow = ncol(inSCE), ncol = 2) |
|
104 |
+ umapDims <- matrix(nrow = ncol(inSCE), ncol = 2) |
|
137 | 105 |
# Start looping for each sample |
138 | 106 |
for (i in seq_along(samples)){ |
139 | 107 |
sceSampleInd <- sample == samples[i] |
... | ... |
@@ -146,13 +114,13 @@ getUMAP <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL, |
146 | 114 |
useAssayTemp = "ScaterLogNormCounts" |
147 | 115 |
} |
148 | 116 |
} |
149 |
- subset_row <- .parseUseFeatureSubset(inSCE, useFeatureSubset, |
|
117 |
+ subset_row <- .parseUseFeatureSubset(inSCE, useFeatureSubset, |
|
150 | 118 |
altExpObj = sce, returnType = "logic") |
151 |
- # initialDims passed to `pca` of scran::calculateUMAP. |
|
119 |
+ # initialDims passed to `pca` of scran::calculateUMAP. |
|
152 | 120 |
if (!isTRUE(pca) & !is.null(useAssay)) { |
153 | 121 |
initialDims <- NULL |
154 | 122 |
} |
155 |
- |
|
123 |
+ |
|
156 | 124 |
nNeighbors <- min(ncol(sceSample), nNeighbors) |
157 | 125 |
message(paste0(date(), " ... Computing Scater UMAP for sample '", |
158 | 126 |
samples[i], "'.")) |
... | ... |
@@ -163,8 +131,8 @@ getUMAP <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL, |
163 | 131 |
learning_rate = alpha, |
164 | 132 |
min_dist = minDist, spread = spread, |
165 | 133 |
n_sgd_threads = 1, pca = initialDims, |
166 |
- n_epochs = nIterations, |
|
167 |
- subset_row = subset_row, ntop = nTop, |
|
134 |
+ n_epochs = nIterations, |
|
135 |
+ subset_row = subset_row, ntop = nTop, |
|
168 | 136 |
BPPARAM = BPPARAM) |
169 | 137 |
}) |
170 | 138 |
if (is.null(rownames(sceSample))) { |
... | ... |
@@ -55,9 +55,8 @@ |
55 | 55 |
"matType. Force using user specification.") |
56 | 56 |
} |
57 | 57 |
matType <- ifelse(is.null(matType), bcInfo$matType, matType) |
58 |
- batch <- ifelse(is.null(batch), bcInfo$batch, batch) |
|
58 |
+ if (is.null(batch)) batch <- bcInfo$batch |
|
59 | 59 |
if (is.null(condition)) condition <- bcInfo$condition |
60 |
- #condition <- ifelse(is.null(condition), bcInfo$condition, condition) |
|
61 | 60 |
} |
62 | 61 |
} |
63 | 62 |
return(list(origAssay = origAssay, |
... | ... |
@@ -94,7 +93,7 @@ |
94 | 93 |
#' @return An object of class \code{"gtable"}, combining four \code{ggplot}s. |
95 | 94 |
#' @examples |
96 | 95 |
#' data("sceBatches") |
97 |
-#' sceBatches <- scaterlogNormCounts(sceBatches, "logcounts") |
|
96 |
+#' logcounts(sceBatches) <- log1p(counts(sceBatches)) |
|
98 | 97 |
#' sceBatches <- runLimmaBC(sceBatches) |
99 | 98 |
#' plotBatchCorrCompare(sceBatches, "LIMMA", condition = "cell_type") |
100 | 99 |
#' @export |
... | ... |
@@ -232,59 +231,33 @@ plotBatchCorrCompare <- function(inSCE, corrMat, batch = NULL, condition = NULL, |
232 | 231 |
#' condition = "cell_type") |
233 | 232 |
plotBatchVariance <- function(inSCE, useAssay = NULL, useReddim = NULL, |
234 | 233 |
useAltExp = NULL, batch = 'batch', |
235 |
- condition=NULL, title = NULL) { |
|
236 |
- if(!inherits(inSCE, 'SingleCellExperiment')){ |
|
237 |
- stop("'inSCE' must inherit from 'SingleCellExperiment'.") |
|
238 |
- } |
|
239 |
- if(is.null(useAssay) + is.null(useReddim) + is.null(useAltExp) != 2){ |
|
240 |
- stop("One and only one of `useAssay`, `useReddim`, ", |
|
241 |
- "`usAltExp` has to be specified.") |
|
242 |
- } |
|
243 |
- if(!is.null(useAssay)){ |
|
244 |
- if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)){ |
|
245 |
- stop("'useAssay' not found in 'inSCE'.") |
|
246 |
- } |
|
247 |
- mat <- SummarizedExperiment::assay(inSCE, useAssay) |
|
248 |
- } |
|
249 |
- if(!is.null(useReddim)){ |
|
250 |
- if(!useReddim %in% SingleCellExperiment::reducedDimNames(inSCE)){ |
|
251 |
- stop("'useReddim not found in 'inSCE'.") |
|
252 |
- } |
|
253 |
- mat <- t(SingleCellExperiment::reducedDim(inSCE, useReddim)) |
|
254 |
- } |
|
255 |
- if(!is.null(useAltExp)){ |
|
256 |
- if(!useAltExp %in% SingleCellExperiment::altExpNames(inSCE)){ |
|
257 |
- stop("'useAltExp not found in 'inSCE'.") |
|
258 |
- } |
|
259 |
- ae <- SingleCellExperiment::altExp(inSCE, useAltExp) |
|
260 |
- mat <- SummarizedExperiment::assay(ae) |
|
261 |
- } |
|
234 |
+ condition = NULL, title = NULL) { |
|
235 |
+ useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay, |
|
236 |
+ useReducedDim = useReddim, useAltExp = useAltExp, |
|
237 |
+ returnMatrix = TRUE, cellAsCol = TRUE) |
|
238 |
+ mat <- useMat$mat |
|
262 | 239 |
if(is.null(batch)){ |
263 | 240 |
stop("Batch annotation has to be given.") |
264 |
- } else{ |
|
265 |
- if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
266 |
- stop("'batch' not found in 'inSCE'.") |
|
267 |
- } |
|
268 | 241 |
} |
242 |
+ batchCol <- .manageCellVar(inSCE, var = batch, as.factor = TRUE) |
|
269 | 243 |
if(!inherits(mat, 'matrix')){ |
270 | 244 |
mat <- as.matrix(mat) |
271 | 245 |
} |
272 |
- batchCol <- SummarizedExperiment::colData(inSCE)[, batch] |
|
273 |
- nlb <- nlevels(as.factor(batchCol)) |
|
246 |
+ nlb <- nlevels(batchCol) |
|
274 | 247 |
if (nlb <= 1){ |
275 | 248 |
stop("No more than one batch found in specified annotation") |
276 | 249 |
} else { |
277 |
- batchMod <- stats::model.matrix(~as.factor(batchCol)) |
|
250 |
+ batchMod <- stats::model.matrix(~batchCol) |
|
278 | 251 |
} |
279 | 252 |
if (is.null(condition)){ |
280 | 253 |
condMod <- matrix(rep(1, ncol(mat)), ncol = 1) |
281 | 254 |
} else { |
282 |
- condCol <- SingleCellExperiment::colData(inSCE)[, condition] |
|
283 |
- nlc <- nlevels(as.factor(condCol)) |
|
255 |
+ condCol <- .manageCellVar(inSCE, var = condition, as.factor = TRUE) |
|
256 |
+ nlc <- nlevels(condCol) |
|
284 | 257 |
if (nlc <= 1){ |
285 | 258 |
condMod <- matrix(rep(1, ncol(mat)), ncol = 1) |
286 | 259 |
} else { |
287 |
- condMod <- stats::model.matrix(~as.factor(condCol)) |
|
260 |
+ condMod <- stats::model.matrix(~condCol) |
|
288 | 261 |
} |
289 | 262 |
} |
290 | 263 |
mod <- cbind(condMod, batchMod[, -1]) |
... | ... |
@@ -3,12 +3,12 @@ |
3 | 3 |
#' BBKNN, an extremely fast graph-based data integration algorithm. It modifies |
4 | 4 |
#' the neighbourhood construction step to produce a graph that is balanced |
5 | 5 |
#' across all batches of the data. |
6 |
-#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required. |
|
6 |
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object |
|
7 | 7 |
#' @param useAssay A single character indicating the name of the assay requiring |
8 | 8 |
#' batch correction. Default \code{"logcounts"}. |
9 |
-#' @param batch A single character indicating a field in |
|
10 |
-#' \code{\link{colData}} that annotates the batches. |
|
11 |
-#' Default \code{"batch"}. |
|
9 |
+#' @param batch A single character indicating a field in \code{colData} that |
|
10 |
+#' annotates the batches of each cell; or a vector/factor with the same length |
|
11 |
+#' as the number of cells. Default \code{"batch"}. |
|
12 | 12 |
#' @param reducedDimName A single character. The name for the corrected |
13 | 13 |
#' low-dimensional representation. Will be saved to \code{reducedDim(inSCE)}. |
14 | 14 |
#' Default \code{"BBKNN"}. |
... | ... |
@@ -24,17 +24,12 @@ |
24 | 24 |
#' @examples |
25 | 25 |
#' \dontrun{ |
26 | 26 |
#' data('sceBatches', package = 'singleCellTK') |
27 |
-#' logcounts(sceBatches) <- log(counts(sceBatches) + 1) |
|
28 |
-#' sceBatches.small <- sample(sceBatches, 20) |
|
29 |
-#' sceCorr <- runBBKNN(sceBatches.small, useAssay = "logcounts", |
|
30 |
-#' nComponents = 10) |
|
27 |
+#' logcounts(sceBatches) <- log1p(counts(sceBatches)) |
|
28 |
+#' sceBatches <- runBBKNN(sceBatches, useAssay = "logcounts", |
|
29 |
+#' nComponents = 10) |
|
31 | 30 |
#' } |
32 | 31 |
runBBKNN <-function(inSCE, useAssay = 'logcounts', batch = 'batch', |
33 | 32 |
reducedDimName = 'BBKNN', nComponents = 50L){ |
34 |
- ## Input check |
|
35 |
- if(!inherits(inSCE, "SingleCellExperiment")){ |
|
36 |
- stop("\"inSCE\" should be a SingleCellExperiment Object.") |
|
37 |
- } |
|
38 | 33 |
if(!reticulate::py_module_available(module = "bbknn")){ |
39 | 34 |
warning("Cannot find python module 'bbknn', please install Conda and", |
40 | 35 |
" run sctkPythonInstallConda() or run ", |
... | ... |
@@ -48,15 +43,16 @@ runBBKNN <-function(inSCE, useAssay = 'logcounts', batch = 'batch', |
48 | 43 |
" to select the correct Python environment.") |
49 | 44 |
return(inSCE) |
50 | 45 |
} |
51 |
- if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
52 |
- stop(paste("\"batch\" name:", batch, "not found")) |
|
53 |
- } |
|
46 |
+ useAssay <- .selectSCEMatrix(inSCE, useAssay = useAssay, |
|
47 |
+ returnMatrix = FALSE)$names$useAssay |
|
48 |
+ batchVec <- .manageCellVar(inSCE, batch, as.factor = FALSE) |
|
54 | 49 |
reducedDimName <- gsub(' ', '_', reducedDimName) |
55 | 50 |
nComponents <- as.integer(nComponents) |
56 | 51 |
## Run algorithm |
57 | 52 |
adata <- .sce2adata(inSCE, useAssay = useAssay) |
53 |
+ adata$obs[["batch"]] <- batchVec |
|
58 | 54 |
sc$tl$pca(adata, n_comps = nComponents) |
59 |
- bbknn$bbknn(adata, batch_key = batch, n_pcs = nComponents) |
|
55 |
+ bbknn$bbknn(adata, batch_key = "batch", n_pcs = nComponents) |
|
60 | 56 |
sc$tl$umap(adata, n_components = nComponents) |
61 | 57 |
bbknnUmap <- adata$obsm[["X_umap"]] |
62 | 58 |
SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- bbknnUmap |
... | ... |
@@ -84,7 +80,7 @@ runBBKNN <-function(inSCE, useAssay = 'logcounts', batch = 'batch', |
84 | 80 |
#' informative covariates could still be useful. If the cell types are unknown |
85 | 81 |
#' and are expected to be unbalanced, it is recommended to set \code{useSVA} |
86 | 82 |
#' to \code{TRUE}. |
87 |
-#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required. |
|
83 |
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object |
|
88 | 84 |
#' @param useAssay A single character indicating the name of the assay requiring |
89 | 85 |
#' batch correction. Default \code{"counts"}. |
90 | 86 |
#' @param batch A single character indicating a field in |
... | ... |
@@ -198,69 +194,76 @@ runComBatSeq <- function(inSCE, useAssay = "counts", batch = 'batch', |
198 | 194 |
#' |
199 | 195 |
#' fastMNN is a variant of the classic MNN method, modified for speed and more |
200 | 196 |
#' robust performance. For introduction of MNN, see \code{\link{runMNNCorrect}}. |
201 |
-#' @param inSCE inherited object. Required. |
|
197 |
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object |
|
202 | 198 |
#' @param useAssay A single character indicating the name of the assay requiring |
203 |
-#' batch correction. Default \code{"logcounts"}. Alternatively, see |
|
204 |
-#' \code{pcInput} parameter. |
|
205 |
-#' @param batch A single character indicating a field in |
|
206 |
-#' \code{\link{colData}} that annotates the batches. |
|
207 |
-#' Default \code{"batch"}. |
|
199 |
+#' batch correction. Default \code{"logcounts"}. |
|
200 |
+#' @param useReducedDim A single character indicating the dimension reduction |
|
201 |
+#' used for batch correction. Will ignore \code{useAssay} when using. |
|
202 |
+#' Default \code{NULL}. |
|
203 |
+#' @param batch A single character indicating a field in \code{colData} that |
|
204 |
+#' annotates the batches of each cell; or a vector/factor with the same length |
|
205 |
+#' as the number of cells. Default \code{"batch"}. |
|
208 | 206 |
#' @param reducedDimName A single character. The name for the corrected |
209 |
-#' low-dimensional representation. Will be saved to \code{reducedDim(inSCE)}. |
|
210 |
-#' Default \code{"fastMNN"}. |
|
211 |
-#' @param pcInput A logical scalar. Whether to use a low-dimension matrix for |
|
212 |
-#' batch effect correction. If \code{TRUE}, \code{useAssay} will be searched |
|
213 |
-#' from \code{reducedDimNames(inSCE)}. Default \code{FALSE}. |
|
207 |
+#' low-dimensional representation. Default \code{"fastMNN"}. |
|
208 |
+#' @param k An integer scalar specifying the number of nearest neighbors to |
|
209 |
+#' consider when identifying MNNs. See "See Also". Default \code{20}. |
|
210 |
+#' @param propK A numeric scalar in (0, 1) specifying the proportion of cells in |
|
211 |
+#' each dataset to use for mutual nearest neighbor searching. See "See Also". |
|
212 |
+#' Default \code{NULL}. |
|
213 |
+#' @param ndist A numeric scalar specifying the threshold beyond which |
|
214 |
+#' neighbours are to be ignored when computing correction vectors. See "See |
|
215 |
+#' Also". Default \code{3}. |
|
216 |
+#' @param minBatchSkip Numeric scalar specifying the minimum relative magnitude |
|
217 |
+#' of the batch effect, below which no correction will be performed at a given |
|
218 |
+#' merge step. See "See Also". Default \code{0}. |
|
219 |
+#' @param cosNorm A logical scalar indicating whether cosine normalization |
|
220 |
+#' should be performed on \code{useAssay} prior to PCA. See "See Also". Default |
|
221 |
+#' \code{TRUE}. |
|
222 |
+#' @param nComponents An integer scalar specifying the number of dimensions to |
|
223 |
+#' produce. See "See Also". Default \code{50}. |
|
224 |
+#' @param weights The weighting scheme to use. Passed to |
|
225 |
+#' \code{\link[batchelor]{multiBatchPCA}}. Default \code{NULL}. |
|
226 |
+#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying whether |
|
227 |
+#' the SVD should be parallelized. |
|
214 | 228 |
#' @return The input \linkS4class{SingleCellExperiment} object with |
215 | 229 |
#' \code{reducedDim(inSCE, reducedDimName)} updated. |
230 |
+#' @seealso \code{\link[batchelor]{fastMNN}} for using \code{useAssay}, and |
|
231 |
+#' \code{\link[batchelor]{reducedMNN}} for using \code{useReducedDim} |
|
216 | 232 |
#' @export |
217 | 233 |
#' @references Lun ATL, et al., 2016 |
218 | 234 |
#' @examples |
219 | 235 |
#' data('sceBatches', package = 'singleCellTK') |
220 |
-#' logcounts(sceBatches) <- log(counts(sceBatches) + 1) |
|
221 |
-#' sceCorr <- runFastMNN(sceBatches, useAssay = 'logcounts', pcInput = FALSE) |
|
222 |
-runFastMNN <- function(inSCE, useAssay = "logcounts", |
|
223 |
- reducedDimName = "fastMNN", batch = 'batch', |
|
224 |
- pcInput = FALSE){ |
|
225 |
- ## Input check |
|
226 |
- if(!inherits(inSCE, "SingleCellExperiment")){ |
|
227 |
- stop("\"inSCE\" should be a SingleCellExperiment Object.") |
|
228 |
- } |
|
229 |
- if(isTRUE(pcInput)){ |
|
230 |
- if(!(useAssay %in% SingleCellExperiment::reducedDimNames(inSCE))) { |
|
231 |
- stop(paste("\"useAssay\" (reducedDim) name: ", useAssay, " not found.")) |
|
232 |
- } |
|
233 |
- } else { |
|
234 |
- if(!(useAssay %in% SummarizedExperiment::assayNames(inSCE))) { |
|
235 |
- stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found.")) |
|
236 |
- } |
|
237 |
- } |
|
238 |
- |
|
239 |
- if(!(batch %in% names(SummarizedExperiment::colData(inSCE)))){ |
|
240 |
- stop(paste("\"batch name:", batch, "not found.")) |
|
241 |
- } |
|
236 |
+#' logcounts(sceBatches) <- log1p(counts(sceBatches)) |
|
237 |
+#' sceCorr <- runFastMNN(sceBatches, useAssay = 'logcounts') |
|
238 |
+runFastMNN <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL, |
|
239 |
+ batch = 'batch', reducedDimName = "fastMNN", k = 20, |
|
240 |
+ propK = NULL, ndist = 3, minBatchSkip = 0, |
|
241 |
+ cosNorm = TRUE, nComponents = 50, weights = NULL, |
|
242 |
+ BPPARAM = BiocParallel::SerialParam()){ |
|
243 |
+ useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay, |
|
244 |
+ useReducedDim = useReducedDim, |
|
245 |
+ returnMatrix = TRUE) |
|
246 |
+ mat <- useMat$mat |
|
247 |
+ batchVec <- .manageCellVar(inSCE, batch, as.factor = TRUE) |
|
242 | 248 |
reducedDimName <- gsub(' ', '_', reducedDimName) |
243 | 249 |
|
244 |
- ## Run algorithm |
|
245 |
- batches <- list() |
|
246 |
- batchCol <- SummarizedExperiment::colData(inSCE)[[batch]] |
|
247 |
- batchFactor <- as.factor(batchCol) |
|
248 |
- |
|
249 |
- if(pcInput){ |
|
250 |
- mat <- SingleCellExperiment::reducedDim(inSCE, useAssay) |
|
251 |
- redMNN <- batchelor::reducedMNN(mat, batch = batchFactor) |
|
250 |
+ if (!is.null(useReducedDim)) { |
|
251 |
+ redMNN <- batchelor::reducedMNN(mat, batch = batchVec, BPPARAM = BPPARAM, |
|
252 |
+ k = k, prop.k = propK, ndist = ndist, |
|
253 |
+ min.batch.skip = minBatchSkip) |
|
252 | 254 |
newRedDim <- redMNN$corrected |
253 | 255 |
} else { |
254 |
- mat <- SummarizedExperiment::assay(inSCE, useAssay) |
|
255 |
- mnnSCE <- batchelor::fastMNN(mat, batch = batchFactor) |
|
256 |
+ mnnSCE <- batchelor::fastMNN(mat, batch = batchVec, BPPARAM = BPPARAM, k = k, |
|
257 |
+ prop.k = propK, ndist = ndist, |
|
258 |
+ min.batch.skip = minBatchSkip, |
|
259 |
+ cos.norm = cosNorm, d = nComponents, |
|
260 |
+ weights = weights) |
|
256 | 261 |
newRedDim <- SingleCellExperiment::reducedDim(mnnSCE, 'corrected') |
257 | 262 |
} |
258 | 263 |
SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- newRedDim |
259 |
- S4Vectors::metadata(inSCE)$batchCorr[[reducedDimName]] <- list(useAssay = useAssay, |
|
260 |
- origLogged = TRUE, |
|
261 |
- method = "fastMNN", |
|
262 |
- matType = "reducedDim", |
|
263 |
- batch = batch) |
|
264 |
+ S4Vectors::metadata(inSCE)$batchCorr[[reducedDimName]] <- |
|
265 |
+ list(useAssay = useAssay, origLogged = TRUE, method = "fastMNN", |
|
266 |
+ matType = "reducedDim", batch = batch) |
|
264 | 267 |
return(inSCE) |
265 | 268 |
} |
266 | 269 |
|
... | ... |
@@ -268,15 +271,15 @@ runFastMNN <- function(inSCE, useAssay = "logcounts", |
268 | 271 |
#' @description Harmony is an algorithm that projects cells into a shared |
269 | 272 |
#' embedding in which cells group by cell type rather than dataset-specific |
270 | 273 |
#' conditions. |
271 |
-#' @param inSCE Input \linkS4class{SingleCellExperiment} inherited object. |
|
274 |
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object |
|
272 | 275 |
#' @param useAssay A single character indicating the name of the assay requiring |
273 | 276 |
#' batch correction. Default \code{"logcounts"}. |
274 | 277 |
#' @param useReducedDim A single character indicating the name of the reducedDim |
275 | 278 |
#' used to be corrected. Specifying this will ignore \code{useAssay}. Default |
276 | 279 |
#' \code{NULL}. |
277 |
-#' @param batch A single character indicating a field in |
|
278 |
-#' \code{\link{colData}} that annotates the batches. |
|
279 |
-#' Default \code{"batch"}. |
|
280 |
+#' @param batch A single character indicating a field in \code{colData} that |
|
281 |
+#' annotates the batches of each cell; or a vector/factor with the same length |
|
282 |
+#' as the number of cells. Default \code{"batch"}. |
|
280 | 283 |
#' @param reducedDimName A single character. The name for the corrected |
281 | 284 |
#' low-dimensional representation. Will be saved to \code{reducedDim(inSCE)}. |
282 | 285 |
#' Default \code{"HARMONY"}. |
... | ... |
@@ -307,7 +310,7 @@ runFastMNN <- function(inSCE, useAssay = "logcounts", |
307 | 310 |
#' @references Ilya Korsunsky, et al., 2019 |
308 | 311 |
#' @examples |
309 | 312 |
#' data('sceBatches', package = 'singleCellTK') |
310 |
-#' logcounts(sceBatches) <- log(counts(sceBatches) + 1) |
|
313 |
+#' logcounts(sceBatches) <- log1p(counts(sceBatches)) |
|
311 | 314 |
#' sceCorr <- runHarmony(sceBatches) |
312 | 315 |
runHarmony <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL, |
313 | 316 |
batch = "batch", reducedDimName = "HARMONY", |
... | ... |
@@ -320,15 +323,15 @@ runHarmony <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL, |
320 | 323 |
call. = FALSE) |
321 | 324 |
} |
322 | 325 |
## Input check |
323 |
- matrixSelect <- .selectSCEMatrix(inSCE, useAssay, useReducedDim, |
|
324 |
- useAltExp = NULL, returnMatrix = TRUE) |
|
325 |
- batch <- .manageCellVar(inSCE, batch, as.factor = TRUE) |
|
326 |
+ useMat <- .selectSCEMatrix(inSCE, useAssay, useReducedDim, |
|
327 |
+ useAltExp = NULL, returnMatrix = TRUE) |
|
328 |
+ batchVec <- .manageCellVar(inSCE, batch, as.factor = TRUE) |
|
326 | 329 |
reducedDimName <- gsub(' ', '_', reducedDimName) |
327 | 330 |
## Run algorithm |
328 | 331 |
message(Sys.Date(), " ... Running harmony batch correction") |
329 |
- mat <- matrixSelect$mat |
|
330 |
- do_pca <- ifelse(is.null(matrixSelect$names$useReducedDim), TRUE, FALSE) |
|
331 |
- if (!is.null(matrixSelect$names$useReducedDim)) { |
|
332 |
+ mat <- useMat$mat |
|
333 |
+ do_pca <- ifelse(is.null(useMat$names$useReducedDim), TRUE, FALSE) |
|
334 |
+ if (!is.null(useMat$names$useReducedDim)) { |
|
332 | 335 |
if (nComponents > ncol(mat)) { |
333 | 336 |
warning("Specified number of components more than available, ", |
334 | 337 |
"using all (", ncol(mat), ") components.") |
... | ... |
@@ -336,12 +339,15 @@ runHarmony <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL, |
336 | 339 |
} |
337 | 340 |
mat <- mat[,seq(nComponents)] |
338 | 341 |
} |
339 |
- h <- harmony::HarmonyMatrix(data_mat = mat, meta_data = batch, |
|
342 |
+ h <- harmony::HarmonyMatrix(data_mat = mat, meta_data = batchVec, |
|
340 | 343 |
do_pca = do_pca, npcs = nComponents, |
341 | 344 |
lambda = lambda, theta = theta, |
342 | 345 |
sigma = sigma, max.iter.harmony = nIter, |
343 | 346 |
verbose = verbose, return_object = FALSE, ...) |
344 | 347 |
SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- h |
348 |
+ S4Vectors::metadata(inSCE)$batchCorr[[reducedDimName]] <- |
|
349 |
+ list(useAssay = useAssay, origLogged = TRUE, method = "harmony", |
|
350 |
+ matType = "reducedDim", batch = batch) |
|
345 | 351 |
return(inSCE) |
346 | 352 |
} |
347 | 353 |
|
... | ... |
@@ -349,7 +355,7 @@ runHarmony <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL, |
349 | 355 |
# #' |
350 | 356 |
# #' LIGER relies on integrative non-negative matrix factorization to identify |
351 | 357 |
# #' shared and dataset-specific factors. |
352 |
-# #' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required. |
|
358 |
+# #' @param Input \linkS4class{SingleCellExperiment} object |
|
353 | 359 |
# #' @param useAssay A single character indicating the name of the assay requiring |
354 | 360 |
# #' batch correction. Default \code{"logcounts"}. |
355 | 361 |
# #' @param batch A single character indicating a field in |
... | ... |
@@ -427,12 +433,12 @@ runHarmony <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL, |
427 | 433 |
#' |
428 | 434 |
#' Limma's batch effect removal function fits a linear model to the data, then |
429 | 435 |
#' removes the component due to the batch effects. |
430 |
-#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required. |
|
436 |
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object |
|
431 | 437 |
#' @param useAssay A single character indicating the name of the assay requiring |
432 | 438 |
#' batch correction. Default \code{"logcounts"}. |
433 |
-#' @param batch A single character indicating a field in |
|
434 |
-#' \code{\link{colData}} that annotates the batches. |
|
435 |
-#' Default \code{"batch"}. |
|
439 |
+#' @param batch A single character indicating a field in \code{colData} that |
|
440 |
+#' annotates the batches of each cell; or a vector/factor with the same length |
|
441 |
+#' as the number of cells. Default \code{"batch"}. |
|
436 | 442 |
#' @param assayName A single characeter. The name for the corrected assay. Will |
437 | 443 |
#' be saved to \code{\link{assay}}. Default |
438 | 444 |
#' \code{"LIMMA"}. |
... | ... |
@@ -442,30 +448,19 @@ runHarmony <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL, |
442 | 448 |
#' @references Gordon K Smyth, et al., 2003 |
443 | 449 |
#' @examples |
444 | 450 |
#' data('sceBatches', package = 'singleCellTK') |
445 |
-#' logcounts(sceBatches) <- log(counts(sceBatches) + 1) |
|
451 |
+#' logcounts(sceBatches) <- log1p(counts(sceBatches)) |
|
446 | 452 |
#' sceCorr <- runLimmaBC(sceBatches) |
447 | 453 |
runLimmaBC <- function(inSCE, useAssay = "logcounts", assayName = "LIMMA", |
448 | 454 |
batch = "batch") { |
449 | 455 |
## Input check |
450 |
- if(!inherits(inSCE, "SingleCellExperiment")){ |
|
451 |
- stop("\"inSCE\" should be a SingleCellExperiment Object.") |
|
452 |
- } |
|
453 |
- if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) { |
|
454 |
- stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found.")) |
|
455 |
- } |
|
456 |
- if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
457 |
- stop(paste("\"batch\" name:", batch, "not found.")) |
|
458 |
- } |
|
459 |
- |
|
456 |
+ useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay, returnMatrix = TRUE) |
|
457 |
+ mat <- useMat$mat |
|
458 |
+ batchVec <- .manageCellVar(inSCE, batch, as.factor = TRUE) |
|
460 | 459 |
assayName <- gsub(' ', '_', assayName) |
461 |
- |
|
462 | 460 |
## Run algorithm |
463 | 461 |
## One more check for the batch names |
464 |
- batchCol <- SummarizedExperiment::colData(inSCE)[[batch]] |
|
465 |
- mat <- SummarizedExperiment::assay(inSCE, useAssay) |
|
466 |
- newMat <- limma::removeBatchEffect(mat, batch = batchCol) |
|
462 |
+ newMat <- limma::removeBatchEffect(mat, batch = batchVec) |
|
467 | 463 |
expData(inSCE, assayName, tag = "batchCorrected", altExp = FALSE) <- newMat |
468 |
- #SummarizedExperiment::assay(inSCE, assayName) <- newMat |
|
469 | 464 |
S4Vectors::metadata(inSCE)$batchCorr[[assayName]] <- list(useAssay = useAssay, |
470 | 465 |
origLogged = TRUE, |
471 | 466 |
method = "Limma", |
... | ... |
@@ -482,57 +477,61 @@ runLimmaBC <- function(inSCE, useAssay = "logcounts", assayName = "LIMMA", |
482 | 477 |
#' does so by identifying pairs of MNN in the high-dimensional log-expression |
483 | 478 |
#' space. For each MNN pair, a pairwise correction vector is computed by |
484 | 479 |
#' applying a Gaussian smoothing kernel with bandwidth `sigma`. |
485 |
-#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required. |
|
480 |
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object |
|
486 | 481 |
#' @param useAssay A single character indicating the name of the assay requiring |
487 | 482 |
#' batch correction. Default \code{"logcounts"}. |
488 |
-#' @param batch A single character indicating a field in |
|
489 |
-#' \code{\link{colData}} that annotates the batches. |
|
490 |
-#' Default \code{"batch"}. |
|
491 |
-#' @param k An integer. Specifies the number of nearest neighbours to |
|
492 |
-#' consider when defining MNN pairs. This should be interpreted as the minimum |
|
493 |
-#' frequency of each cell type or state in each batch. Larger values will |
|
494 |
-#' improve the precision of the correction by increasing the number of MNN |
|
495 |
-#' pairs, at the cost of reducing accuracy by allowing MNN pairs to form between |
|
496 |
-#' cells of different type. Default \code{20L}. |
|
497 |
-#' @param sigma A Numeric scalar. Specifies how much information is |
|
498 |
-#' shared between MNN pairs when computing the batch effect. Larger values will |
|
499 |
-#' share more information, approaching a global correction for all cells in the |
|
500 |
-#' same batch. Smaller values allow the correction to vary across cell types, |
|
501 |
-#' which may be more accurate but comes at the cost of precision. Default |
|
502 |
-#' \code{0.1}. |
|
483 |
+#' @param batch A single character indicating a field in \code{colData} that |
|
484 |
+#' annotates the batches of each cell; or a vector/factor with the same length |
|
485 |
+#' as the number of cells. Default \code{"batch"}. |
|
503 | 486 |
#' @param assayName A single characeter. The name for the corrected assay. Will |
504 | 487 |
#' be saved to \code{\link{assay}}. Default |
505 | 488 |
#' \code{"MNN"}. |
489 |
+#' @param k An integer scalar specifying the number of nearest neighbors to |
|
490 |
+#' consider when identifying MNNs. See "See Also". Default \code{20}. |
|
491 |
+#' @param propK A numeric scalar in (0, 1) specifying the proportion of cells in |
|
492 |
+#' each dataset to use for mutual nearest neighbor searching. See "See Also". |
|
493 |
+#' Default \code{NULL}. |
|
494 |
+#' @param sigma A numeric scalar specifying the bandwidth of the Gaussian |
|
495 |
+#' smoothing kernel used to compute the correction vector for each cell. See |
|
496 |
+#' "See Also". Default \code{0.1}. |
|
497 |
+#' @param cosNormIn A logical scalar indicating whether cosine normalization |
|
498 |
+#' should be performed on the input data prior to calculating distances between |
|
499 |
+#' cells. See "See Also". Default \code{TRUE}. |
|
500 |
+#' @param cosNormOut A logical scalar indicating whether cosine normalization |
|
501 |
+#' should be performed prior to computing corrected expression values. See "See |
|
502 |
+#' Also". Default \code{TRUE}. |
|
503 |
+#' @param varAdj A logical scalar indicating whether variance adjustment should |
|
504 |
+#' be performed on the correction vectors. See "See Also". Default \code{TRUE}. |
|
505 |
+#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying whether |
|
506 |
+#' the PCA and nearest-neighbor searches should be parallelized. |
|
506 | 507 |
#' @return The input \linkS4class{SingleCellExperiment} object with |
507 | 508 |
#' \code{assay(inSCE, assayName)} updated. |
509 |
+#' @seealso \code{\link[batchelor]{mnnCorrect}} |
|
508 | 510 |
#' @export |
509 |
-#' @references Lun ATL, et al., 2016 & 2018 |
|
511 |
+#' @references Haghverdi L, Lun ATL, et. al., 2018 |
|
510 | 512 |
#' @examples |
511 | 513 |
#' data('sceBatches', package = 'singleCellTK') |
512 |
-#' logcounts(sceBatches) <- log(counts(sceBatches) + 1) |
|
514 |
+#' logcounts(sceBatches) <- log1p(counts(sceBatches)) |
|
513 | 515 |
#' sceCorr <- runMNNCorrect(sceBatches) |
514 | 516 |
runMNNCorrect <- function(inSCE, useAssay = 'logcounts', batch = 'batch', |
515 |
- assayName = 'MNN', k = 20L, sigma = 0.1){ |
|
517 |
+ assayName = 'MNN', k = 20L, propK = NULL, |
|
518 |
+ sigma = 0.1, cosNormIn = TRUE, cosNormOut = TRUE, |
|
519 |
+ varAdj = TRUE, BPPARAM = BiocParallel::SerialParam()){ |
|
516 | 520 |
## Input check |
517 |
- if(!inherits(inSCE, "SingleCellExperiment")){ |
|
518 |
- stop("\"inSCE\" should be a SingleCellExperiment Object.") |
|
519 |
- } |
|
520 |
- if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) { |
|
521 |
- stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found.")) |
|
522 |
- } |
|
523 |
- if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
524 |
- stop(paste("\"batch name:", batch, "not found.")) |
|
525 |
- } |
|
521 |
+ useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay, returnMatrix = TRUE) |
|
522 |
+ mat <- useMat$mat |
|
523 |
+ batchVec <- .manageCellVar(inSCE, batch, as.factor = TRUE) |
|
526 | 524 |
assayName <- gsub(' ', '_', assayName) |
527 | 525 |
k <- as.integer(k) |
528 | 526 |
|
529 | 527 |
## Run algorithm |
530 |
- batchCol <- SummarizedExperiment::colData(inSCE)[[batch]] |
|
531 |
- batchFactor <- as.factor(batchCol) |
|
532 |
- mnnSCE <- batchelor::mnnCorrect(inSCE, assay.type = useAssay, |
|
533 |
- batch = batchFactor, k = k, sigma = sigma) |
|
534 |
- corrected <- SummarizedExperiment::assay(mnnSCE, 'corrected') |
|
535 |
- expData(inSCE, assayName, tag = "batchCorrected", altExp = FALSE) <- corrected |
|
528 |
+ corr.sce <- batchelor::mnnCorrect(mat, batch = batchVec, k = k, |
|
529 |
+ prop.k = propK, sigma = sigma, |
|
530 |
+ cos.norm.in = cosNormIn, |
|
531 |
+ cos.norm.out = cosNormOut, var.adj = varAdj, |
|
532 |
+ BPPARAM = BPPARAM) |
|
533 |
+ expData(inSCE, assayName, tag = "batchCorrected", altExp = FALSE) <- |
|
534 |
+ SummarizedExperiment::assay(corr.sce, "corrected") |
|
536 | 535 |
S4Vectors::metadata(inSCE)$batchCorr[[assayName]] <- list(useAssay = useAssay, |
537 | 536 |
origLogged = TRUE, |
538 | 537 |
method = "MNN", |
... | ... |
@@ -547,22 +546,24 @@ runMNNCorrect <- function(inSCE, useAssay = 'logcounts', batch = 'batch', |
547 | 546 |
#' SCANORAMA is analogous to computer vision algorithms for panorama stitching |
548 | 547 |
#' that identify images with overlapping content and merge these into a larger |
549 | 548 |
#' panorama. |
550 |
-#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required. |
|
549 |
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object |
|
551 | 550 |
#' @param useAssay A single character indicating the name of the assay requiring |
552 | 551 |
#' batch correction. Scanorama requires a transformed normalized expression |
553 | 552 |
#' assay. Default \code{"logcounts"}. |
554 |
-#' @param batch A single character indicating a field in |
|
555 |
-#' \code{\link{colData}} that annotates the batches. |
|
556 |
-#' Default \code{"batch"}. |
|
553 |
+#' @param batch A single character indicating a field in \code{colData} that |
|
554 |
+#' annotates the batches of each cell; or a vector/factor with the same length |
|
555 |
+#' as the number of cells. Default \code{"batch"}. |
|
556 |
+#' @param assayName A single characeter. The name for the corrected assay. Will |
|
557 |
+#' be saved to \code{\link{assay}}. Default |
|
558 |
+#' \code{"SCANORAMA"}. |
|
557 | 559 |
#' @param SIGMA A numeric scalar. Algorithmic parameter, correction smoothing |
558 | 560 |
#' parameter on Gaussian kernel. Default \code{15}. |
559 | 561 |
#' @param ALPHA A numeric scalar. Algorithmic parameter, alignment score |
560 | 562 |
#' minimum cutoff. Default \code{0.1}. |
561 | 563 |
#' @param KNN An integer. Algorithmic parameter, number of nearest neighbors to |
562 |
-#' use for matching. Default \code{20L}. |
|
563 |
-#' @param assayName A single characeter. The name for the corrected assay. Will |
|
564 |
-#' be saved to \code{\link{assay}}. Default |
|
565 |
-#' \code{"SCANORAMA"}. |
|
564 |
+#' use for matching. Default \code{20}. |
|
565 |
+#' @param approx Boolean. Use approximate nearest neighbors, greatly speeds up |
|
566 |
+#' matching runtime. Default \code{TRUE}. |
|
566 | 567 |
#' @return The input \linkS4class{SingleCellExperiment} object with |
567 | 568 |
#' \code{assay(inSCE, assayName)} updated. |
568 | 569 |
#' @export |
... | ... |
@@ -570,16 +571,13 @@ runMNNCorrect <- function(inSCE, useAssay = 'logcounts', batch = 'batch', |
570 | 571 |
#' @examples |
571 | 572 |
#' \dontrun{ |
572 | 573 |
#' data('sceBatches', package = 'singleCellTK') |
573 |
-#' sceBatches <- scaterlogNormCounts(sceBatches) |
|
574 |
+#' logcounts(sceBatches) <- log1p(counts(sceBatches)) |
|
574 | 575 |
#' sceCorr <- runSCANORAMA(sceBatches, "ScaterLogNormCounts") |
575 | 576 |
#' } |
576 | 577 |
runSCANORAMA <- function(inSCE, useAssay = 'logcounts', batch = 'batch', |
577 |
- SIGMA = 15, ALPHA = 0.1, KNN = 20L, |
|
578 |
- assayName = 'SCANORAMA'){ |
|
578 |
+ assayName = 'SCANORAMA', SIGMA = 15, ALPHA = 0.1, |
|
579 |
+ KNN = 20, approx = TRUE){ |
|
579 | 580 |
## Input check |
580 |
- if(!inherits(inSCE, "SingleCellExperiment")){ |
|
581 |
- stop("\"inSCE\" should be a SingleCellExperiment Object.") |
|
582 |
- } |
|
583 | 581 |
if(!reticulate::py_module_available(module = "scanorama")){ |
584 | 582 |
warning("Cannot find python module 'scanorama', please install Conda and", |
585 | 583 |
" run sctkPythonInstallConda() or run ", |
... | ... |
@@ -593,31 +591,27 @@ runSCANORAMA <- function(inSCE, useAssay = 'logcounts', batch = 'batch', |
593 | 591 |
" to select the correct Python environment.") |
594 | 592 |
return(inSCE) |
595 | 593 |
} |
596 |
- if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) { |
|
597 |
- stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found")) |
|
598 |
- } |
|
599 |
- if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
600 |
- stop(paste("\"batch\" name:", batch, "not found")) |
|
601 |
- } |
|
594 |
+ useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay) |
|
595 |
+ batchVec <- .manageCellVar(inSCE, batch) |
|
602 | 596 |
assayName <- gsub(' ', '_', assayName) |
603 | 597 |
adata <- .sce2adata(inSCE, useAssay) |
598 |
+ adata$obs[["batch"]] <- batchVec |
|
604 | 599 |
py <- reticulate::py |
605 | 600 |
py$adata <- adata |
606 |
- py$batch <- batch |
|
607 | 601 |
py$sigma <- SIGMA |
608 | 602 |
py$alpha <- ALPHA |
609 | 603 |
py$KNN <- as.integer(KNN) |
610 | 604 |
reticulate::py_run_string(' |
611 | 605 |
import numpy as np |
612 | 606 |
import scanorama |
613 |
-batches = list(set(adata.obs[batch])) |
|
614 |
-adatas = [adata[adata.obs[batch] == b,] for b in batches] |
|
615 |
-datasets_full = [a.X.toarray() for a in adatas] |
|
607 |
+batches = list(set(adata.obs["batch"])) |
|
608 |
+adatas = [adata[adata.obs["batch"] == b,] for b in batches] |
|
609 |
+datasets_full = [a.X for a in adatas] |
|
616 | 610 |
genes_list = [a.var_names.to_list() for a in adatas] |
617 | 611 |
corrected, genes = scanorama.correct(datasets_full, genes_list, sigma=sigma, |
618 | 612 |
alpha=alpha, knn=KNN) |
619 | 613 |
corrected = [m.toarray() for m in corrected] |
620 |
-cellOrders = [adata.obs[batch] == b for b in batches] |
|
614 |
+cellOrders = [adata.obs["batch"] == b for b in batches] |
|
621 | 615 |
integrated = np.zeros(adata.shape) |
622 | 616 |
integrated[:,:] = np.NAN |
623 | 617 |
for i in range(len(batches)): |
... | ... |
@@ -632,6 +626,9 @@ integrated = integrated[:, orderIdx] |
632 | 626 |
rownames(mat) <- rownames(inSCE) |
633 | 627 |
colnames(mat) <- colnames(inSCE) |
634 | 628 |
expData(inSCE, assayName, tag = "batchCorrected", altExp = FALSE) <- mat |
629 |
+ S4Vectors::metadata(inSCE)$batchCorr[[assayName]] <- |
|
630 |
+ list(useAssay = useAssay, origLogged = TRUE, method = "Scanorama", |
|
631 |
+ matType = "assay", batch = batch) |
|
635 | 632 |
return(inSCE) |
636 | 633 |
} |
637 | 634 |
|
... | ... |
@@ -640,12 +637,16 @@ integrated = integrated[:, orderIdx] |
640 | 637 |
#' The scMerge method leverages factor analysis, stably expressed genes (SEGs) |
641 | 638 |
#' and (pseudo-) replicates to remove unwanted variations and merge multiple |
642 | 639 |
#' scRNA-Seq data. |
643 |
-#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required. |
|
640 |
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object |
|
644 | 641 |
#' @param useAssay A single character indicating the name of the assay requiring |
645 | 642 |
#' batch correction. Default \code{"logcounts"}. |
646 | 643 |
#' @param batch A single character indicating a field in |
647 | 644 |
#' \code{\link{colData}} that annotates the batches. |
648 | 645 |
#' Default \code{"batch"}. |
646 |
+#' @param assayName A single characeter. The name for the corrected assay. Will |
|
647 |
+#' be saved to \code{\link{assay}}. Default \code{"scMerge"}. |
|
648 |
+#' @param hvgExprs A single characeter. The assay that to be used for highly |
|
649 |
+#' variable genes identification. Default \code{"counts"}. |
|
649 | 650 |
#' @param kmeansK An integer vector. Indicating the kmeans' K-value for each |
650 | 651 |
#' batch (i.e. how many subclusters in each batch should exist), in order to |
651 | 652 |
#' construct pseudo-replicates. The length of code{kmeansK} needs to be the same |
... | ... |
@@ -656,14 +657,12 @@ integrated = integrated[:, orderIdx] |
656 | 657 |
#' \code{'cell_type'}. |
657 | 658 |
#' @param seg A vector of gene names or indices that specifies SEG (Stably |
658 | 659 |
#' Expressed Genes) set as negative control. Pre-defined dataset with human and |
659 |
-#' mouse SEG lists is available to user by running \code{data('SEG')}. Default |
|
660 |
+#' mouse SEG lists is available with \code{\link[scMerge]{segList}} or |
|
661 |
+#' \code{\link[scMerge]{segList_ensemblGeneID}}. Default |
|
660 | 662 |
#' \code{NULL}, and this value will be auto-detected by default with |
661 | 663 |
#' \code{\link[scMerge]{scSEGIndex}}. |
662 |
-#' @param nCores An integer. The number of cores of processors to allocate for |
|
663 |
-#' the task. Default \code{1L}. |
|
664 |
-#' @param assayName A single characeter. The name for the corrected assay. Will |
|
665 |
-#' be saved to \code{\link{assay}}. Default |
|
666 |
-#' \code{"scMerge"}. |
|
664 |
+#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying whether |
|
665 |
+#' should be parallelized. Default \code{BiocParallel::SerialParam()}. |
|
667 | 666 |
#' @return The input \linkS4class{SingleCellExperiment} object with |
668 | 667 |
#' \code{assay(inSCE, assayName)} updated. |
669 | 668 |
#' @export |
... | ... |
@@ -671,53 +670,33 @@ integrated = integrated[:, orderIdx] |
671 | 670 |
#' @examples |
672 | 671 |
#' data('sceBatches', package = 'singleCellTK') |
673 | 672 |
#' \dontrun{ |
674 |
-#' logcounts(sceBatches) <- log(counts(sceBatches) + 1) |
|
673 |
+#' logcounts(sceBatches) <- log1p(counts(sceBatches)) |
|
675 | 674 |
#' sceCorr <- runSCMerge(sceBatches) |
676 | 675 |
#' } |
677 | 676 |
runSCMerge <- function(inSCE, useAssay = "logcounts", batch = 'batch', |
678 |
- assayName = "scMerge", seg = NULL, kmeansK = NULL, |
|
679 |
- cellType = 'cell_type', |
|
680 |
- nCores = 1L){ |
|
677 |
+ assayName = "scMerge", hvgExprs = "counts", seg = NULL, |
|
678 |
+ kmeansK = NULL, cellType = NULL, |
|
679 |
+ BPPARAM = BiocParallel::SerialParam()){ |
|
681 | 680 |
## Input check |
682 |
- if(!inherits(inSCE, "SingleCellExperiment")){ |
|
683 |
- stop("\"inSCE\" should be a SingleCellExperiment Object.") |
|
684 |
- } |
|
685 |
- if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) { |
|
686 |
- stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found.")) |
|
687 |
- } |
|
688 |
- if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
689 |
- stop(paste("\"batch\" name:", batch, "not found")) |
|
690 |
- } |
|
691 |
- if(is.null(cellType) & is.null(kmeansK)){ |
|
692 |
- stop("\"cellType\" and \"kmeansK\" cannot be NULL at the same time") |
|
693 |
- } |
|
694 |
- if(!is.null(cellType) && |
|
695 |
- !cellType %in% names(SummarizedExperiment::colData(inSCE))){ |
|
696 |
- # If NULL, scMerge still works |
|
697 |
- stop(paste("\"cellType\" name:", cellType, "not found")) |
|
698 |
- } |
|
699 |
- |
|
700 |
- nCores <- min(as.integer(nCores), parallel::detectCores()) |
|
681 |
+ useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay, returnMatrix = TRUE) |
|
682 |
+ .selectSCEMatrix(inSCE, useAssay = hvgExprs, returnMatrix = FALSE) |
|
683 |
+ mat <- useMat$mat |
|
684 |
+ batchVec <- .manageCellVar(inSCE, batch) |
|
685 |
+ cellTypeCol <- .manageCellVar(inSCE, cellType) |
|
686 |
+ #if(is.null(cellType) & is.null(kmeansK)){ |
|
687 |
+ # stop("\"cellType\" and \"kmeansK\" cannot be NULL at the same time") |
|
688 |
+ #} |
|
701 | 689 |
assayName <- gsub(' ', '_', assayName) |
702 | 690 |
|
703 | 691 |
## Run algorithm |
692 |
+ uniqBatch <- unique(batch) |
|
704 | 693 |
|
705 |
- batchCol <- SummarizedExperiment::colData(inSCE)[[batch]] |
|
706 |
- uniqBatch <- unique(batchCol) |
|
707 |
- |
|
708 |
- # Infer parameters |
|
709 |
- if(is.null(cellType)){ |
|
710 |
- cellTypeCol <- NULL |
|
711 |
- } else { |
|
712 |
- cellTypeCol <- SummarizedExperiment::colData(inSCE)[[cellType]] |
|
713 |
- } |
|
714 | 694 |
## kmeansK |
715 | 695 |
if(!is.null(cellType) && is.null(kmeansK)){ |
716 | 696 |
# If kmeansK not given, detect by cell type. |
717 |
- cellTypeCol <- SummarizedExperiment::colData(inSCE)[[cellType]] |
|
718 | 697 |
kmeansK <- c() |
719 | 698 |
for (i in seq_along(uniqBatch)){ |
720 |
- cellTypePerBatch <- cellTypeCol[batchCol == uniqBatch[i]] |
|
699 |
+ cellTypePerBatch <- cellType[batchVec == uniqBatch[i]] |
|
721 | 700 |
kmeansK <- c(kmeansK, length(unique(cellTypePerBatch))) |
722 | 701 |
} |
723 | 702 |
cat("Detected kmeansK:\n") |
... | ... |
@@ -725,37 +704,25 @@ runSCMerge <- function(inSCE, useAssay = "logcounts", batch = 'batch', |
725 | 704 |
} |
726 | 705 |
## SEG |
727 | 706 |
if(is.null(seg)){ |
728 |
- bpParam <- BiocParallel::MulticoreParam(workers = nCores) |
|
729 |
- seg <- scMerge::scSEGIndex(SummarizedExperiment::assay(inSCE, useAssay), |
|
730 |
- cell_type = cellTypeCol, |
|
731 |
- BPPARAM = bpParam) |
|
732 |
- ctl <- rownames(seg[order(seg$segIdx, decreasing = TRUE)[seq_len(1000)],]) |
|
707 |
+ seg <- scMerge::scSEGIndex(mat, cell_type = cellTypeCol, BPPARAM = BPPARAM) |
|
708 |
+ ctl <- rownames(seg[order(seg$segIdx, decreasing = TRUE)[seq(1000)],]) |
|
733 | 709 |
} else { |
734 | 710 |
ctl <- seg |
735 | 711 |
} |
736 | 712 |
|
737 |
- # scMerge automatically search for the column called "batch"... |
|
738 |
- colDataNames <- names(SummarizedExperiment::colData(inSCE)) |
|
739 |
- names(SummarizedExperiment::colData(inSCE))[colDataNames == batch] <- 'batch' |
|
740 |
- bpParam <- BiocParallel::MulticoreParam(workers = nCores) |
|
741 | 713 |
inSCE <- scMerge::scMerge(sce_combine = inSCE, exprs = useAssay, |
742 |
- hvg_exprs = useAssay, |
|
714 |
+ hvg_exprs = useAssay, batch_name = batch, |
|
743 | 715 |
assay_name = assayName, |
744 | 716 |
ctl = ctl, kmeansK = kmeansK, |
745 | 717 |
#marker_list = topVarGenesPerBatch, |
746 | 718 |
cell_type = cellTypeCol, |
747 |
- BPPARAM = bpParam) |
|
748 |
- colDataNames <- names(SummarizedExperiment::colData(inSCE)) |
|
749 |
- names(SummarizedExperiment::colData(inSCE))[colDataNames == 'batch'] <- batch |
|
719 |
+ BPPARAM = BPPARAM) |
|
750 | 720 |
# scMerge's function automatically returns the SCE object with information |
751 | 721 |
# completed, thus using this helper function to simply add the tag. |
752 | 722 |
inSCE <- expSetDataTag(inSCE, "batchCorrected", assayName) |
753 |
- S4Vectors::metadata(inSCE)$batchCorr[[assayName]] <- list(useAssay = useAssay, |
|
754 |
- origLogged = TRUE, |
|
755 |
- method = "scMerge", |
|
756 |
- matType = "assay", |
|
757 |
- batch = batch, |
|
758 |
- condition = cellType) |
|
723 |
+ S4Vectors::metadata(inSCE)$batchCorr[[assayName]] <- |
|
724 |
+ list(useAssay = useAssay, origLogged = TRUE, method = "scMerge", |
|
725 |
+ matType = "assay", batch = batch, condition = cellType) |
|
759 | 726 |
return(inSCE) |
760 | 727 |
} |
761 | 728 |
|
... | ... |
@@ -766,7 +733,7 @@ runSCMerge <- function(inSCE, useAssay = "logcounts", batch = 'batch', |
766 | 733 |
#' model accounts for zero inflation (dropouts), over-dispersion, and the count |
767 | 734 |
#' nature of the data. The model also accounts for the difference in library |
768 | 735 |
#' sizes and optionally for batch effects and/or other covariates. |
769 |
-#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. Required. |
|
736 |
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object |
|
770 | 737 |
#' @param useAssay A single character indicating the name of the assay requiring |
771 | 738 |
#' batch correction. Note that ZINBWaVE works for counts (integer) input rather |
772 | 739 |
#' than logcounts that other methods prefer. Default \code{"counts"}. |
... | ... |
@@ -785,6 +752,8 @@ runSCMerge <- function(inSCE, useAssay = "logcounts", batch = 'batch', |
785 | 752 |
#' @param reducedDimName A single character. The name for the corrected |
786 | 753 |
#' low-dimensional representation. Will be saved to \code{reducedDim(inSCE)}. |
787 | 754 |
#' Default \code{"zinbwave"}. |
755 |
+#' @param BPPARAM A \linkS4class{BiocParallelParam} object specifying whether |
|
756 |
+#' should be parallelized. Default \code{BiocParallel::SerialParam()}. |
|
788 | 757 |
#' @return The input \linkS4class{SingleCellExperiment} object with |
789 | 758 |
#' \code{reducedDim(inSCE, reducedDimName)} updated. |
790 | 759 |
#' @export |
... | ... |
@@ -796,7 +765,8 @@ runSCMerge <- function(inSCE, useAssay = "logcounts", batch = 'batch', |
796 | 765 |
#' } |
797 | 766 |
runZINBWaVE <- function(inSCE, useAssay = 'counts', batch = 'batch', |
798 | 767 |
nHVG = 1000L, nComponents = 50L, epsilon = 1000, |
799 |
- nIter = 10L, reducedDimName = 'zinbwave'){ |
|
768 |
+ nIter = 10L, reducedDimName = 'zinbwave', |
|
769 |
+ BPPARAM = BiocParallel::SerialParam()){ |
|
800 | 770 |
## Input check |
801 | 771 |
if(!inherits(inSCE, "SingleCellExperiment")){ |
802 | 772 |
stop("\"inSCE\" should be a SingleCellExperiment Object.") |
... | ... |
@@ -819,16 +789,24 @@ runZINBWaVE <- function(inSCE, useAssay = 'counts', batch = 'batch', |
819 | 789 |
vars <- matrixStats::rowVars(logAssay) |
820 | 790 |
names(vars) <- rownames(inSCE) |
821 | 791 |
vars <- sort(vars, decreasing = TRUE) |
822 |
- tmpSCE <- inSCE[names(vars)[seq_len(nHVG)],] |
|
792 |
+ hvgs <- names(vars)[seq_len(nHVG)] |
|
793 |
+ #tmpSCE <- inSCE[names(vars)[seq_len(nHVG)],] |
|
823 | 794 |
} else { |
824 |
- tmpSCE <- inSCE |
|
795 |
+ #tmpSCE <- inSCE |
|
796 |
+ hvgs <- rownames(inSCE) |
|
825 | 797 |
} |
826 |
- epsilon <- min(nrow(tmpSCE), epsilon) |
|
827 |
- tmpSCE <- zinbwave::zinbwave(tmpSCE, K = nComponents, epsilon = epsilon, |
|
828 |
- which_assay = useAssay, |
|
798 |
+ epsilon <- min(nrow(inSCE), epsilon) |
|
799 |
+ inSCE <- zinbwave::zinbwave(inSCE, K = nComponents, epsilon = epsilon, |
|
800 |
+ which_assay = useAssay, which_genes = hvgs, |
|
829 | 801 |
X = paste('~', batch, sep = ''), |
830 |
- maxiter.optimize = nIter, verbose = TRUE) |
|
831 |
- SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- |
|
832 |
- SingleCellExperiment::reducedDim(tmpSCE, 'zinbwave') |
|
802 |
+ maxiter.optimize = nIter, verbose = TRUE, |
|
803 |
+ BPPARAM = BPPARAM) |
|
804 |
+ reddimName <- reducedDimNames(inSCE) |
|
805 |
+ reddimName[reddimName == "zinbwave"] <- reducedDimName |
|
806 |
+ #SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- |
|
807 |
+ # SingleCellExperiment::reducedDim(inSCE, 'zinbwave') |
|
808 |
+ S4Vectors::metadata(inSCE)$batchCorr[[reducedDimName]] <- |
|
809 |
+ list(useAssay = useAssay, origLogged = FALSE, method = "ZINBWaVE", |
|
810 |
+ matType = "reducedDim", batch = batch) |
|
833 | 811 |
return(inSCE) |
834 | 812 |
} |
... | ... |
@@ -38,24 +38,17 @@ |
38 | 38 |
classGroup2 = NULL, groupName1, groupName2, |
39 | 39 |
analysisName, covariates = NULL, overwrite = FALSE){ |
40 | 40 |
# Input checks |
41 |
- if(!inherits(inSCE, "SingleCellExperiment")){ |
|
42 |
- stop('"inSCE" should be a SingleCellExperiment inherited Object.') |
|
43 |
- } |
|
44 |
- if (is.null(useAssay) & is.null(useReducedDim)) { |
|
45 |
- stop("`useAssay` or `useReducedDim` must be specified.") |
|
46 |
- } else { |
|
47 |
- if (!is.null(useReducedDim)) { |
|
48 |
- if(!useReducedDim %in% SingleCellExperiment::reducedDimNames(inSCE)){ |
|
49 |
- stop(paste('"useReducedDim" name: ', useReducedDim, ' not found.')) |
|
50 |
- } |
|
51 |
- useAssay <- NULL |
|
52 |
- } else { |
|
53 |
- if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)){ |
|
54 |
- stop(paste('"useAssay" name: ', useAssay, ' not found.')) |
|
41 |
+ useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay, |
|
42 |
+ useReducedDim = useReducedDim, |
|
43 |
+ returnMatrix = FALSE) |
|
44 |
+ if ("diffExp" %in% names(S4Vectors::metadata(inSCE))){ |
|
45 |
+ if(analysisName %in% names(S4Vectors::metadata(inSCE)$diffExp)){ |
|
46 |
+ if(!isTRUE(overwrite)){ |
|
47 |
+ stop("analysisName '", analysisName, "' already exists. ", |
|
48 |
+ "Set `overwrite` to `TRUE` to overwrite.") |
|
55 | 49 |
} |
56 | 50 |
} |
57 | 51 |
} |
58 |
- |
|
59 | 52 |
if(is.null(index1) && (is.null(classGroup1) || is.null(class))){ |
60 | 53 |
stop('At least "index1" or "classGroup1" should be specified.') |
61 | 54 |
} else if(!is.null(index1) && (!is.null(classGroup1) || !is.null(class))){ |
... | ... |
@@ -66,14 +59,7 @@ |
66 | 59 |
!all(covariates %in% names(SummarizedExperiment::colData(inSCE)))){ |
67 | 60 |
stop("Not all specified covariates exist.") |
68 | 61 |
} |
69 |
- if ("diffExp" %in% names(S4Vectors::metadata(inSCE))){ |
|
70 |
- if(analysisName %in% names(S4Vectors::metadata(inSCE)$diffExp)){ |
|
71 |
- if(!isTRUE(overwrite)){ |
|
72 |
- stop("analysisName '", analysisName, "' already exists. ", |
|
73 |
- "Set `overwrite` to `TRUE` to overwrite.") |
|
74 |
- } |
|
75 |
- } |
|
76 |
- } |
|
62 |
+ |
|
77 | 63 |
groupNames <- c(groupName1, groupName2) |
78 | 64 |
annotation <- c(class, covariates) |
79 | 65 |
if(!is.null(index1)){ |
... | ... |
@@ -84,18 +70,7 @@ |
84 | 70 |
cells2 <- sort(setdiff(colnames(inSCE), cells1)) |
85 | 71 |
} |
86 | 72 |
} else { |
87 |
- if(length(class) == 1 && inherits(class, "character")){ |
|
88 |
- if(!class %in% names(SummarizedExperiment::colData(inSCE))){ |
|
89 |
- stop("class: '", class, "' not found.") |
|
90 |
- } |
|
91 |
- class <- SummarizedExperiment::colData(inSCE)[[class]] |
|
92 |
- } else { |
|
93 |
- if(!length(class) == ncol(inSCE)){ |
|
94 |
- stop("Length of given `class` vector should equal to ", |
|
95 |
- "`ncol(inSCE)`; Or specify a column name of ", |
|
96 |
- "`colData(inSCE)`") |
|
97 |
- } |
|
98 |
- } |
|
73 |
+ class <- .manageCellVar(inSCE, var = class) |
|
99 | 74 |
uniqCats <- unique(as.vector(class)) |
100 | 75 |
index1 <- class %in% classGroup1 |
101 | 76 |
if(is.null(classGroup2)){ |
... | ... |
@@ -129,8 +104,8 @@ |
129 | 104 |
} |
130 | 105 |
} |
131 | 106 |
return( |
132 |
- list(useAssay = useAssay, |
|
133 |
- useReducedDim = useReducedDim, |
|
107 |
+ list(useAssay = useMat$names$useAssay, |
|
108 |
+ useReducedDim = useMat$names$useReducedDim, |
|
134 | 109 |
groupNames = groupNames, |
135 | 110 |
select = select, |
136 | 111 |
annotation = annotation) |
... | ... |
@@ -702,7 +677,7 @@ runWilcox <- function(inSCE, useAssay = 'logcounts', useReducedDim = NULL, |
702 | 677 |
if (!is.null(useAssay)) { |
703 | 678 |
mat <- expData(inSCE, useAssay) |
704 | 679 |
} else { |
705 |
- mat <- t(expData(inSCE, useReducedDim)) |
|
680 |
+ mat <- t(SingleCellExperiment::reducedDim(inSCE, useReducedDim)) |
|
706 | 681 |
} |
707 | 682 |
if (isTRUE(verbose)) { |
708 | 683 |
message(date(), " ... Running DE with wilcox, Analysis name: ", |
... | ... |
@@ -1,65 +1,3 @@ |
1 |
-.matrixTypeCheck <- function(inSCE, redDimType = NULL, |
|
2 |
- useAssay = NULL, useReducedDim = NULL, |
|
3 |
- useAltExp = NULL) { |
|
4 |
- # Helper function for checking if the specified matrix type is valid |
|
5 |
- if (!inherits(inSCE, "SingleCellExperiment")){ |
|
6 |
- stop("Please use a SingleCellExperiment object") |
|
7 |
- } |
|
8 |
- if (redDimType == "embedding") { |
|
9 |
- if (is.null(useAssay) && is.null(useReducedDim)) { |
|
10 |
- stop("`useAssay` and `useReducedDim` cannot be NULL at the same time.") |
|
11 |
- } else if (!is.null(useAssay) && !is.null(useReducedDim)) { |
|
12 |
- stop("`useAssay` and `useReducedDim` cannot be specified at the same time.") |
|
13 |
- } else { |
|
14 |
- if (!is.null(useReducedDim)) { |
|
15 |
- if (!useReducedDim %in% SingleCellExperiment::reducedDimNames(inSCE)) { |
|
16 |
- stop("Specified `useReducedDim` not found.") |
|
17 |
- } |
|
18 |
- if (!is.null(useAltExp)) { |
|
19 |
- warning("`useAltExp` will be ignored when using `useReducedDim`.") |
|
20 |
- } |
|
21 |
- sce <- inSCE |
|
22 |
- } else { |
|
23 |
- if (!is.null(useAltExp)) { |
|
24 |
- if (!useAltExp %in% SingleCellExperiment::altExpNames(inSCE)) { |
|
25 |
- stop("Specified `useAltExp` not found.") |
|
26 |
- } |
|
27 |
- sce <- SingleCellExperiment::altExp(inSCE, useAltExp) |
|
28 |
- if (!useAssay %in% SummarizedExperiment::assayNames(sce)) { |
|
29 |
- stop("Specified `useAssay` not found in `useAltExp`.") |
|
30 |
- } |
|
31 |
- } else { |
|
32 |
- if (!useAssay %in% SummarizedExperiment::assayNames(inSCE)) { |
|
33 |
- stop("Specified `useAssay` not found.") |
|
34 |
- } |
|
35 |
- sce <- inSCE |
|
36 |
- } |
|
37 |
- } |
|
38 |
- } |
|
39 |
- } else if (redDimType == "linear") { |
|
40 |
- if (!is.null(useReducedDim)) { |
|
41 |
- stop("Currently `useReducedDim` is not allowed for linear dimension reduction.") |
|
42 |
- } |
|
43 |
- if (is.null(useAssay)) { |
|
44 |
- stop("`useAssay` cannot be NULL for linear dimension reduction") |
|
45 |
- } else { |
|
46 |
- if (is.null(useAltExp)) { |
|
47 |
- if (!useAssay %in% SummarizedExperiment::assayNames(inSCE)) { |
|
48 |
- stop("Specified `useAssay` not found.") |
|
49 |
- } |
|
50 |
- } else { |
|
51 |
- if (!useAltExp %in% SingleCellExperiment::altExpNames(inSCE)) { |
|
52 |
- stop("Specified `useAltExp` not found.") |
|
53 |
- } |
|
54 |
- sce <- SingleCellExperiment::altExp(inSCE, useAltExp) |
|
55 |
- if (!useAssay %in% SummarizedExperiment::assayNames(sce)) { |
|
56 |
- stop("Specified `useAssay` not found in `useAltExp`.") |
|
57 |
- } |
|
58 |
- } |
|
59 |
- } |
|
60 |
- } |
|
61 |
-} |
|
62 |
- |
|
63 | 1 |
#' Generic Wrapper function for running dimensionality reduction |
64 | 2 |
#' @details Wrapper function to run one of the available dimensionality |
65 | 3 |
#' reduction algorithms integrated within SCTK from \code{\link{scaterPCA}}, |
... | ... |
@@ -81,9 +19,9 @@ |
81 | 19 |
#' @param useReducedDim The low dimension representation to use for embedding |
82 | 20 |
#' computation. Default \code{NULL}. |
83 | 21 |
#' @param reducedDimName The name of the result matrix. Required. |
84 |
-#' @param useFeatureSubset Subset of feature to use for dimension reduction. A |
|
22 |
+#' @param useFeatureSubset Subset of feature to use for dimension reduction. A |
|
85 | 23 |
#' character string indicating a \code{rowData} variable that stores the logical |
86 |
-#' vector of HVG selection, or a vector that can subset the rows of |
|
24 |
+#' vector of HVG selection, or a vector that can subset the rows of |
|
87 | 25 |
#' \code{inSCE}. Default \code{NULL}. |
88 | 26 |
#' @param scale Logical scalar, whether to standardize the expression values. |
89 | 27 |
#' Default \code{TRUE}. |
... | ... |
@@ -115,39 +53,43 @@ runDimReduce <- function(inSCE, |
115 | 53 |
"scaterUMAP", |
116 | 54 |
"seuratUMAP"), |
117 | 55 |
useAssay = NULL, useReducedDim = NULL, |
118 |
- useAltExp = NULL, reducedDimName = method, |
|
119 |
- nComponents = 20, useFeatureSubset = NULL, |
|
120 |
- scale = FALSE, seed = NULL, ...) |
|
56 |
+ useAltExp = NULL, reducedDimName = method, |
|
57 |
+ nComponents = 20, useFeatureSubset = NULL, |
|
58 |
+ scale = FALSE, seed = NULL, ...) |
|
121 | 59 |
{ |
122 | 60 |
|
123 | 61 |
method <- match.arg(method) |
124 | 62 |
args <- list(...) |
125 |
- |
|
126 |
- if (method %in% c("scaterPCA", "seuratPCA", "seuratICA")) { |
|
127 |
- .matrixTypeCheck(inSCE, "linear", useAssay, useReducedDim, useAltExp) |
|
128 |
- } else { |
|
129 |
- .matrixTypeCheck(inSCE, "embedding", useAssay, useReducedDim, useAltExp) |
|
63 |
+ if (method %in% c("scaterPCA", "seuratPCA", "seuratICA") & |
|
64 |
+ !is.null(useReducedDim)) { |
|
65 |
+ stop("`useReducedDim` is not allowed for linear dimension reduction.") |
|
130 | 66 |
} |
131 | 67 |
|
132 | 68 |
if (method == "scaterPCA") { |
133 |
- inSCE <- scaterPCA(inSCE = inSCE, useAssay = useAssay, |
|
134 |
- useAltExp = useAltExp, reducedDimName = reducedDimName, |
|
135 |
- nComponents = nComponents, |
|
136 |
- useFeatureSubset = useFeatureSubset, scale = scale, |
|
69 |
+ inSCE <- scaterPCA(inSCE = inSCE, useAssay = useAssay, |
|
70 |
+ useAltExp = useAltExp, reducedDimName = reducedDimName, |
|
71 |
+ nComponents = nComponents, |
|
72 |
+ useFeatureSubset = useFeatureSubset, scale = scale, |
|
137 | 73 |
seed = seed, ...) |
138 | 74 |
} else if (method == "scaterUMAP") { |
139 | 75 |
inSCE <- getUMAP(inSCE = inSCE, useAssay = useAssay, useAltExp = useAltExp, |
140 |
- useReducedDim = useReducedDim, |
|
141 |
- useFeatureSubset = useFeatureSubset, scale = scale, |
|
76 |
+ useReducedDim = useReducedDim, |
|
77 |
+ useFeatureSubset = useFeatureSubset, scale = scale, |
|
142 | 78 |
reducedDimName = reducedDimName, seed = seed, ...) |
143 | 79 |
} else if (method == "rTSNE") { |
144 | 80 |
inSCE <- getTSNE(inSCE = inSCE, useAssay = useAssay, useAltExp = useAltExp, |
145 |
- useReducedDim = useReducedDim, |
|
146 |
- useFeatureSubset = useFeatureSubset, scale = scale, |
|
81 |
+ useReducedDim = useReducedDim, |
|
82 |
+ useFeatureSubset = useFeatureSubset, scale = scale, |
|
147 | 83 |
reducedDimName = reducedDimName, seed = seed, ...) |
148 | 84 |
} else { |
149 | 85 |
# Seurat part |
150 |
- |
|
86 |
+ # TODO: Honestly, the input checks should have been implemented for |
|
87 |
+ # functions being wrapped because they are being exposed to users as well. |
|
88 |
+ # We should not being performing redundant checks when wrapping them again. |
|
89 |
+ useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay, |
|
90 |
+ useReducedDim = useReducedDim, |
|
91 |
+ useAltExp = useAltExp, returnMatrix = FALSE) |
|
92 |
+ useAssay <- useMat$names$useAssay |
|
151 | 93 |
if (!is.null(useAltExp)) { |
152 | 94 |
tempSCE <- SingleCellExperiment::altExp(inSCE, useAltExp) |
153 | 95 |
} else if (!is.null(useAssay)) { |
... | ... |
@@ -157,17 +99,17 @@ runDimReduce <- function(inSCE, |
157 | 99 |
## SeuratPCA/ICA |
158 | 100 |
if (method == "seuratPCA") { |
159 | 101 |
message(paste0(date(), " ... Computing Seurat PCA.")) |
160 |
- tempSCE <- runSeuratPCA(tempSCE, useAssay = useAssay, |
|
102 |
+ tempSCE <- runSeuratPCA(tempSCE, useAssay = useAssay, |
|
161 | 103 |
reducedDimName = reducedDimName, |
162 |
- nPCs = nComponents, |
|
104 |
+ nPCs = nComponents, |
|
163 | 105 |
useFeatureSubset = useFeatureSubset, |
164 | 106 |
scale = scale, seed = seed, ...) |
165 | 107 |
} else if (method == "seuratICA") { |
166 | 108 |
message(paste0(date(), " ... Computing Seurat ICA.")) |
167 | 109 |
tempSCE <- runSeuratICA(tempSCE, useAssay = useAssay, |
168 | 110 |
reducedDimName = reducedDimName, |
169 |
- nics = nComponents, |
|
170 |
- useFeatureSubset = useFeatureSubset, |
|
111 |
+ nics = nComponents, |
|
112 |
+ useFeatureSubset = useFeatureSubset, |
|
171 | 113 |
scale = scale, seed = seed, ...) |
172 | 114 |
} |
173 | 115 |
seuratObj <- tempSCE@metadata$seurat |
... | ... |
@@ -187,24 +129,24 @@ runDimReduce <- function(inSCE, |
187 | 129 |
message(paste0(date(), " ... Computing Seurat PCA.")) |
188 | 130 |
tempSCE <- runSeuratPCA(inSCE = tempSCE, |
189 | 131 |
useAssay = useAssay, |
190 |
- reducedDimName = paste0(useAssay, "_seuratPCA"), |
|
132 |
+ reducedDimName = paste0(useAssay, "_seuratPCA"), |
|
191 | 133 |
useFeatureSubset = useFeatureSubset, seed = seed) |
192 | 134 |
} else if (args$useReduction == "ica") { |
193 | 135 |
message(paste0(date(), " ... Computing Seurat ICA.")) |
194 | 136 |
tempSCE <- runSeuratICA(inSCE = tempSCE, |
195 | 137 |
useAssay = useAssay, |
196 |
- reducedDimName = paste0(useAssay, "_seuratICA"), |
|
138 |
+ reducedDimName = paste0(useAssay, "_seuratICA"), |
|
197 | 139 |
useFeatureSubset = useFeatureSubset, seed = seed) |
198 | 140 |
} |
199 | 141 |
if (method == "seuratUMAP") { |
200 | 142 |
message(paste0(date(), " ... Computing Seurat UMAP.")) |
201 |
- tempSCE <- runSeuratUMAP(inSCE = tempSCE, |
|
202 |
- reducedDimName = reducedDimName, |
|
143 |
+ tempSCE <- runSeuratUMAP(inSCE = tempSCE, |
|
144 |
+ reducedDimName = reducedDimName, |
|
203 | 145 |
seed = seed, ...) |
204 | 146 |
} else { |
205 | 147 |
message(paste0(date(), " ... Computing Seurat tSNE.")) |
206 | 148 |
tempSCE <- runSeuratTSNE(inSCE = tempSCE, |
207 |
- reducedDimName = reducedDimName, |
|
149 |
+ reducedDimName = reducedDimName, |
|
208 | 150 |
seed = seed, ...) |
209 | 151 |
} |
210 | 152 |
} else { |
... | ... |
@@ -227,13 +169,13 @@ runDimReduce <- function(inSCE, |
227 | 169 |
# hard-code useReduction="pca" |
228 | 170 |
message(paste0(date(), " ... Computing Seurat UMAP.")) |
229 | 171 |
tempSCE <- runSeuratUMAP(inSCE = tempSCE, useReduction = "pca", |
230 |
- reducedDimName = reducedDimName, |
|
172 |
+ reducedDimName = reducedDimName, |
|
231 | 173 |
seed = seed, ...) |
232 | 174 |
} else { |
233 | 175 |
# hard-code useReduction="pca" |
234 | 176 |
message(paste0(date(), " ... Computing Seurat tSNE.")) |
235 | 177 |
tempSCE <- runSeuratTSNE(inSCE = tempSCE, useReduction = "pca", |
236 |
- reducedDimName = reducedDimName, |
|
178 |
+ reducedDimName = reducedDimName, |
|
237 | 179 |
seed = seed, ...) |
238 | 180 |
} |
239 | 181 |
} |
... | ... |
@@ -118,11 +118,7 @@ runSingleR <- function(inSCE, |
118 | 118 |
"Using its default labeling.") |
119 | 119 |
} |
120 | 120 |
} |
121 |
- if (is.null(labelByCluster)) { |
|
122 |
- clusters <- NULL |
|
123 |
- } else { |
|
124 |
- clusters <- inSCE[[labelByCluster]] |
|
125 |
- } |
|
121 |
+ clusters <- .manageCellVar(inSCE, var = labelByCluster) |
|
126 | 122 |
# predictions <- SingleR::SingleR(test = inSCE, assay.type.test = useAssay, |
127 | 123 |
# ref = ref, clusters = clusters, |
128 | 124 |
# |